xref: /php-src/ext/standard/url.c (revision 8109d210)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Jim Winstead <jimw@php.net>                                  |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include <stdlib.h>
18 #include <string.h>
19 #include <ctype.h>
20 #include <sys/types.h>
21 
22 #ifdef __SSE2__
23 #include <emmintrin.h>
24 #endif
25 
26 #include "php.h"
27 
28 #include "url.h"
29 #include "file.h"
30 
31 /* {{{ free_url */
php_url_free(php_url * theurl)32 PHPAPI void php_url_free(php_url *theurl)
33 {
34 	if (theurl->scheme)
35 		zend_string_release_ex(theurl->scheme, 0);
36 	if (theurl->user)
37 		zend_string_release_ex(theurl->user, 0);
38 	if (theurl->pass)
39 		zend_string_release_ex(theurl->pass, 0);
40 	if (theurl->host)
41 		zend_string_release_ex(theurl->host, 0);
42 	if (theurl->path)
43 		zend_string_release_ex(theurl->path, 0);
44 	if (theurl->query)
45 		zend_string_release_ex(theurl->query, 0);
46 	if (theurl->fragment)
47 		zend_string_release_ex(theurl->fragment, 0);
48 	efree(theurl);
49 }
50 /* }}} */
51 
php_replace_controlchars(char * str,size_t len)52 static void php_replace_controlchars(char *str, size_t len)
53 {
54 	unsigned char *s = (unsigned char *)str;
55 	unsigned char *e = (unsigned char *)str + len;
56 
57 	ZEND_ASSERT(str != NULL);
58 
59 	while (s < e) {
60 		if (iscntrl(*s)) {
61 			*s='_';
62 		}
63 		s++;
64 	}
65 }
66 
php_url_parse(char const * str)67 PHPAPI php_url *php_url_parse(char const *str)
68 {
69 	return php_url_parse_ex(str, strlen(str));
70 }
71 
binary_strcspn(const char * s,const char * e,const char * chars)72 static const char *binary_strcspn(const char *s, const char *e, const char *chars) {
73 	while (*chars) {
74 		const char *p = memchr(s, *chars, e - s);
75 		if (p) {
76 			e = p;
77 		}
78 		chars++;
79 	}
80 	return e;
81 }
82 
83 /* {{{ php_url_parse */
php_url_parse_ex(char const * str,size_t length)84 PHPAPI php_url *php_url_parse_ex(char const *str, size_t length)
85 {
86 	bool has_port;
87 	return php_url_parse_ex2(str, length, &has_port);
88 }
89 
90 /* {{{ php_url_parse_ex2
91  */
php_url_parse_ex2(char const * str,size_t length,bool * has_port)92 PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port)
93 {
94 	char port_buf[6];
95 	php_url *ret = ecalloc(1, sizeof(php_url));
96 	char const *s, *e, *p, *pp, *ue;
97 
98 	*has_port = 0;
99 	s = str;
100 	ue = s + length;
101 
102 	/* parse scheme */
103 	if ((e = memchr(s, ':', length)) && e != s) {
104 		/* validate scheme */
105 		p = s;
106 		while (p < e) {
107 			/* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */
108 			if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') {
109 				if (e + 1 < ue && e < binary_strcspn(s, ue, "?#")) {
110 					goto parse_port;
111 				} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
112 					s += 2;
113 					e = 0;
114 					goto parse_host;
115 				} else {
116 					goto just_path;
117 				}
118 			}
119 			p++;
120 		}
121 
122 		if (e + 1 == ue) { /* only scheme is available */
123 			ret->scheme = zend_string_init(s, (e - s), 0);
124 			php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
125 			return ret;
126 		}
127 
128 		/*
129 		 * certain schemas like mailto: and zlib: may not have any / after them
130 		 * this check ensures we support those.
131 		 */
132 		if (*(e+1) != '/') {
133 			/* check if the data we get is a port this allows us to
134 			 * correctly parse things like a.com:80
135 			 */
136 			p = e + 1;
137 			while (p < ue && isdigit(*p)) {
138 				p++;
139 			}
140 
141 			if ((p == ue || *p == '/') && (p - e) < 7) {
142 				goto parse_port;
143 			}
144 
145 			ret->scheme = zend_string_init(s, (e-s), 0);
146 			php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
147 
148 			s = e + 1;
149 			goto just_path;
150 		} else {
151 			ret->scheme = zend_string_init(s, (e-s), 0);
152 			php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
153 
154 			if (e + 2 < ue && *(e + 2) == '/') {
155 				s = e + 3;
156 				if (zend_string_equals_literal_ci(ret->scheme, "file")) {
157 					if (e + 3 < ue && *(e + 3) == '/') {
158 						/* support windows drive letters as in:
159 						   file:///c:/somedir/file.txt
160 						*/
161 						if (e + 5 < ue && *(e + 5) == ':') {
162 							s = e + 4;
163 						}
164 						goto just_path;
165 					}
166 				}
167 			} else {
168 				s = e + 1;
169 				goto just_path;
170 			}
171 		}
172 	} else if (e) { /* no scheme; starts with colon: look for port */
173 		parse_port:
174 		p = e + 1;
175 		pp = p;
176 
177 		while (pp < ue && pp - p < 6 && isdigit(*pp)) {
178 			pp++;
179 		}
180 
181 		if (pp - p > 0 && pp - p < 6 && (pp == ue || *pp == '/')) {
182 			zend_long port;
183 			char *end;
184 			memcpy(port_buf, p, (pp - p));
185 			port_buf[pp - p] = '\0';
186 			port = ZEND_STRTOL(port_buf, &end, 10);
187 			if (port >= 0 && port <= 65535 && end != port_buf) {
188 				*has_port = 1;
189 				ret->port = (unsigned short) port;
190 				if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
191 				    s += 2;
192 				}
193 			} else {
194 				php_url_free(ret);
195 				return NULL;
196 			}
197 		} else if (p == pp && pp == ue) {
198 			php_url_free(ret);
199 			return NULL;
200 		} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
201 			s += 2;
202 		} else {
203 			goto just_path;
204 		}
205 	} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
206 		s += 2;
207 	} else {
208 		goto just_path;
209 	}
210 
211 parse_host:
212 	e = binary_strcspn(s, ue, "/?#");
213 
214 	/* check for login and password */
215 	if ((p = zend_memrchr(s, '@', (e-s)))) {
216 		if ((pp = memchr(s, ':', (p-s)))) {
217 			ret->user = zend_string_init(s, (pp-s), 0);
218 			php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
219 
220 			pp++;
221 			ret->pass = zend_string_init(pp, (p-pp), 0);
222 			php_replace_controlchars(ZSTR_VAL(ret->pass), ZSTR_LEN(ret->pass));
223 		} else {
224 			ret->user = zend_string_init(s, (p-s), 0);
225 			php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
226 		}
227 
228 		s = p + 1;
229 	}
230 
231 	/* check for port */
232 	if (s < ue && *s == '[' && *(e-1) == ']') {
233 		/* Short circuit portscan,
234 		   we're dealing with an
235 		   IPv6 embedded address */
236 		p = NULL;
237 	} else {
238 		p = zend_memrchr(s, ':', (e-s));
239 	}
240 
241 	if (p) {
242 		if (!ret->port) {
243 			p++;
244 			if (e-p > 5) { /* port cannot be longer then 5 characters */
245 				php_url_free(ret);
246 				return NULL;
247 			} else if (e - p > 0) {
248 				zend_long port;
249 				char *end;
250 				memcpy(port_buf, p, (e - p));
251 				port_buf[e - p] = '\0';
252 				port = ZEND_STRTOL(port_buf, &end, 10);
253 				if (port >= 0 && port <= 65535 && end != port_buf) {
254 					*has_port = 1;
255 					ret->port = (unsigned short)port;
256 				} else {
257 					php_url_free(ret);
258 					return NULL;
259 				}
260 			}
261 			p--;
262 		}
263 	} else {
264 		p = e;
265 	}
266 
267 	/* check if we have a valid host, if we don't reject the string as url */
268 	if ((p-s) < 1) {
269 		php_url_free(ret);
270 		return NULL;
271 	}
272 
273 	ret->host = zend_string_init(s, (p-s), 0);
274 	php_replace_controlchars(ZSTR_VAL(ret->host), ZSTR_LEN(ret->host));
275 
276 	if (e == ue) {
277 		return ret;
278 	}
279 
280 	s = e;
281 
282 	just_path:
283 
284 	e = ue;
285 	p = memchr(s, '#', (e - s));
286 	if (p) {
287 		p++;
288 		if (p < e) {
289 			ret->fragment = zend_string_init(p, (e - p), 0);
290 			php_replace_controlchars(ZSTR_VAL(ret->fragment), ZSTR_LEN(ret->fragment));
291 		} else {
292 			ret->fragment = ZSTR_EMPTY_ALLOC();
293 		}
294 		e = p-1;
295 	}
296 
297 	p = memchr(s, '?', (e - s));
298 	if (p) {
299 		p++;
300 		if (p < e) {
301 			ret->query = zend_string_init(p, (e - p), 0);
302 			php_replace_controlchars(ZSTR_VAL(ret->query), ZSTR_LEN(ret->query));
303 		} else {
304 			ret->query = ZSTR_EMPTY_ALLOC();
305 		}
306 		e = p-1;
307 	}
308 
309 	if (s < e || s == ue) {
310 		ret->path = zend_string_init(s, (e - s), 0);
311 		php_replace_controlchars(ZSTR_VAL(ret->path), ZSTR_LEN(ret->path));
312 	}
313 
314 	return ret;
315 }
316 /* }}} */
317 
318 /* {{{ Parse a URL and return its components */
PHP_FUNCTION(parse_url)319 PHP_FUNCTION(parse_url)
320 {
321 	char *str;
322 	size_t str_len;
323 	php_url *resource;
324 	zend_long key = -1;
325 	zval tmp;
326 	bool has_port;
327 
328 	ZEND_PARSE_PARAMETERS_START(1, 2)
329 		Z_PARAM_STRING(str, str_len)
330 		Z_PARAM_OPTIONAL
331 		Z_PARAM_LONG(key)
332 	ZEND_PARSE_PARAMETERS_END();
333 
334 	resource = php_url_parse_ex2(str, str_len, &has_port);
335 	if (resource == NULL) {
336 		/* @todo Find a method to determine why php_url_parse_ex() failed */
337 		RETURN_FALSE;
338 	}
339 
340 	if (key > -1) {
341 		switch (key) {
342 			case PHP_URL_SCHEME:
343 				if (resource->scheme != NULL) RETVAL_STR_COPY(resource->scheme);
344 				break;
345 			case PHP_URL_HOST:
346 				if (resource->host != NULL) RETVAL_STR_COPY(resource->host);
347 				break;
348 			case PHP_URL_PORT:
349 				if (has_port) RETVAL_LONG(resource->port);
350 				break;
351 			case PHP_URL_USER:
352 				if (resource->user != NULL) RETVAL_STR_COPY(resource->user);
353 				break;
354 			case PHP_URL_PASS:
355 				if (resource->pass != NULL) RETVAL_STR_COPY(resource->pass);
356 				break;
357 			case PHP_URL_PATH:
358 				if (resource->path != NULL) RETVAL_STR_COPY(resource->path);
359 				break;
360 			case PHP_URL_QUERY:
361 				if (resource->query != NULL) RETVAL_STR_COPY(resource->query);
362 				break;
363 			case PHP_URL_FRAGMENT:
364 				if (resource->fragment != NULL) RETVAL_STR_COPY(resource->fragment);
365 				break;
366 			default:
367 				zend_argument_value_error(2, "must be a valid URL component identifier, " ZEND_LONG_FMT " given", key);
368 				break;
369 		}
370 		goto done;
371 	}
372 
373 	/* allocate an array for return */
374 	array_init(return_value);
375 
376     /* add the various elements to the array */
377 	if (resource->scheme != NULL) {
378 		ZVAL_STR_COPY(&tmp, resource->scheme);
379 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_SCHEME), &tmp);
380 	}
381 	if (resource->host != NULL) {
382 		ZVAL_STR_COPY(&tmp, resource->host);
383 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_HOST), &tmp);
384 	}
385 	if (has_port) {
386 		ZVAL_LONG(&tmp, resource->port);
387 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PORT), &tmp);
388 	}
389 	if (resource->user != NULL) {
390 		ZVAL_STR_COPY(&tmp, resource->user);
391 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_USER), &tmp);
392 	}
393 	if (resource->pass != NULL) {
394 		ZVAL_STR_COPY(&tmp, resource->pass);
395 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PASS), &tmp);
396 	}
397 	if (resource->path != NULL) {
398 		ZVAL_STR_COPY(&tmp, resource->path);
399 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PATH), &tmp);
400 	}
401 	if (resource->query != NULL) {
402 		ZVAL_STR_COPY(&tmp, resource->query);
403 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_QUERY), &tmp);
404 	}
405 	if (resource->fragment != NULL) {
406 		ZVAL_STR_COPY(&tmp, resource->fragment);
407 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_FRAGMENT), &tmp);
408 	}
409 done:
410 	php_url_free(resource);
411 }
412 /* }}} */
413 
414 /* {{{ php_htoi */
php_htoi(char * s)415 static int php_htoi(char *s)
416 {
417 	int value;
418 	int c;
419 
420 	c = ((unsigned char *)s)[0];
421 	if (isupper(c))
422 		c = tolower(c);
423 	value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
424 
425 	c = ((unsigned char *)s)[1];
426 	if (isupper(c))
427 		c = tolower(c);
428 	value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
429 
430 	return (value);
431 }
432 /* }}} */
433 
434 /* rfc1738:
435 
436    ...The characters ";",
437    "/", "?", ":", "@", "=" and "&" are the characters which may be
438    reserved for special meaning within a scheme...
439 
440    ...Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
441    reserved characters used for their reserved purposes may be used
442    unencoded within a URL...
443 
444    For added safety, we only leave -_. unencoded.
445  */
446 
447 static const unsigned char hexchars[] = "0123456789ABCDEF";
448 
php_url_encode_impl(const char * s,size_t len,bool raw)449 static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t len, bool raw) /* {{{ */ {
450 	unsigned char c;
451 	unsigned char *to;
452 	unsigned char const *from, *end;
453 	zend_string *start;
454 
455 	from = (unsigned char *)s;
456 	end = (unsigned char *)s + len;
457 	start = zend_string_safe_alloc(3, len, 0, 0);
458 	to = (unsigned char*)ZSTR_VAL(start);
459 
460 #ifdef __SSE2__
461 	while (from + 16 < end) {
462 		__m128i mask;
463 		uint32_t bits;
464 		const __m128i _A = _mm_set1_epi8('A' - 1);
465 		const __m128i Z_ = _mm_set1_epi8('Z' + 1);
466 		const __m128i _a = _mm_set1_epi8('a' - 1);
467 		const __m128i z_ = _mm_set1_epi8('z' + 1);
468 		const __m128i _zero = _mm_set1_epi8('0' - 1);
469 		const __m128i nine_ = _mm_set1_epi8('9' + 1);
470 		const __m128i dot = _mm_set1_epi8('.');
471 		const __m128i minus = _mm_set1_epi8('-');
472 		const __m128i under = _mm_set1_epi8('_');
473 
474 		__m128i in = _mm_loadu_si128((__m128i *)from);
475 
476 		__m128i gt = _mm_cmpgt_epi8(in, _A);
477 		__m128i lt = _mm_cmplt_epi8(in, Z_);
478 		mask = _mm_and_si128(lt, gt); /* upper */
479 		gt = _mm_cmpgt_epi8(in, _a);
480 		lt = _mm_cmplt_epi8(in, z_);
481 		mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* lower */
482 		gt = _mm_cmpgt_epi8(in, _zero);
483 		lt = _mm_cmplt_epi8(in, nine_);
484 		mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* number */
485 		mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, dot));
486 		mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, minus));
487 		mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, under));
488 
489 		if (!raw) {
490 			const __m128i blank = _mm_set1_epi8(' ');
491 			__m128i eq = _mm_cmpeq_epi8(in, blank);
492 			if (_mm_movemask_epi8(eq)) {
493 				in = _mm_add_epi8(in, _mm_and_si128(eq, _mm_set1_epi8('+' - ' ')));
494 				mask = _mm_or_si128(mask, eq);
495 			}
496 		}
497 		if (raw) {
498 			const __m128i wavy = _mm_set1_epi8('~');
499 			mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, wavy));
500 		}
501 		if (((bits = _mm_movemask_epi8(mask)) & 0xffff) == 0xffff) {
502 			_mm_storeu_si128((__m128i*)to, in);
503 			to += 16;
504 		} else {
505 			unsigned char xmm[16];
506 			_mm_storeu_si128((__m128i*)xmm, in);
507 			for (size_t i = 0; i < sizeof(xmm); i++) {
508 				if ((bits & (0x1 << i))) {
509 					*to++ = xmm[i];
510 				} else {
511 					*to++ = '%';
512 					*to++ = hexchars[xmm[i] >> 4];
513 					*to++ = hexchars[xmm[i] & 0xf];
514 				}
515 			}
516 		}
517 		from += 16;
518 	}
519 #endif
520 	while (from < end) {
521 		c = *from++;
522 
523 		if (!raw && c == ' ') {
524 			*to++ = '+';
525 		} else if ((c < '0' && c != '-' && c != '.') ||
526 				(c < 'A' && c > '9') ||
527 				(c > 'Z' && c < 'a' && c != '_') ||
528 				(c > 'z' && (!raw || c != '~'))) {
529 			to[0] = '%';
530 			to[1] = hexchars[c >> 4];
531 			to[2] = hexchars[c & 15];
532 			to += 3;
533 		} else {
534 			*to++ = c;
535 		}
536 	}
537 	*to = '\0';
538 
539 	start = zend_string_truncate(start, to - (unsigned char*)ZSTR_VAL(start), 0);
540 
541 	return start;
542 }
543 /* }}} */
544 
545 /* {{{ php_url_encode */
php_url_encode(char const * s,size_t len)546 PHPAPI zend_string *php_url_encode(char const *s, size_t len)
547 {
548 	return php_url_encode_impl(s, len, 0);
549 }
550 /* }}} */
551 
552 /* {{{ URL-encodes string */
PHP_FUNCTION(urlencode)553 PHP_FUNCTION(urlencode)
554 {
555 	zend_string *in_str;
556 
557 	ZEND_PARSE_PARAMETERS_START(1, 1)
558 		Z_PARAM_STR(in_str)
559 	ZEND_PARSE_PARAMETERS_END();
560 
561 	RETURN_STR(php_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str)));
562 }
563 /* }}} */
564 
565 /* {{{ Decodes URL-encoded string */
PHP_FUNCTION(urldecode)566 PHP_FUNCTION(urldecode)
567 {
568 	zend_string *in_str, *out_str;
569 
570 	ZEND_PARSE_PARAMETERS_START(1, 1)
571 		Z_PARAM_STR(in_str)
572 	ZEND_PARSE_PARAMETERS_END();
573 
574 	out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
575 	ZSTR_LEN(out_str) = php_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
576 
577 	RETURN_NEW_STR(out_str);
578 }
579 /* }}} */
580 
581 /* {{{ php_url_decode */
php_url_decode(char * str,size_t len)582 PHPAPI size_t php_url_decode(char *str, size_t len)
583 {
584 	char *dest = str;
585 	char *data = str;
586 
587 	while (len--) {
588 		if (*data == '+') {
589 			*dest = ' ';
590 		}
591 		else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
592 				 && isxdigit((int) *(data + 2))) {
593 			*dest = (char) php_htoi(data + 1);
594 			data += 2;
595 			len -= 2;
596 		} else {
597 			*dest = *data;
598 		}
599 		data++;
600 		dest++;
601 	}
602 	*dest = '\0';
603 	return dest - str;
604 }
605 /* }}} */
606 
607 /* {{{ php_raw_url_encode */
php_raw_url_encode(char const * s,size_t len)608 PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len)
609 {
610 	return php_url_encode_impl(s, len, 1);
611 }
612 /* }}} */
613 
614 /* {{{ URL-encodes string */
PHP_FUNCTION(rawurlencode)615 PHP_FUNCTION(rawurlencode)
616 {
617 	zend_string *in_str;
618 
619 	ZEND_PARSE_PARAMETERS_START(1, 1)
620 		Z_PARAM_STR(in_str)
621 	ZEND_PARSE_PARAMETERS_END();
622 
623 	RETURN_STR(php_raw_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str)));
624 }
625 /* }}} */
626 
627 /* {{{ Decodes URL-encodes string */
PHP_FUNCTION(rawurldecode)628 PHP_FUNCTION(rawurldecode)
629 {
630 	zend_string *in_str, *out_str;
631 
632 	ZEND_PARSE_PARAMETERS_START(1, 1)
633 		Z_PARAM_STR(in_str)
634 	ZEND_PARSE_PARAMETERS_END();
635 
636 	out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
637 	ZSTR_LEN(out_str) = php_raw_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
638 
639 	RETURN_NEW_STR(out_str);
640 }
641 /* }}} */
642 
643 /* {{{ php_raw_url_decode */
php_raw_url_decode(char * str,size_t len)644 PHPAPI size_t php_raw_url_decode(char *str, size_t len)
645 {
646 	char *dest = str;
647 	char *data = str;
648 
649 	while (len--) {
650 		if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
651 			&& isxdigit((int) *(data + 2))) {
652 			*dest = (char) php_htoi(data + 1);
653 			data += 2;
654 			len -= 2;
655 		} else {
656 			*dest = *data;
657 		}
658 		data++;
659 		dest++;
660 	}
661 	*dest = '\0';
662 	return dest - str;
663 }
664 /* }}} */
665 
666 /* {{{ fetches all the headers sent by the server in response to a HTTP request */
PHP_FUNCTION(get_headers)667 PHP_FUNCTION(get_headers)
668 {
669 	char *url;
670 	size_t url_len;
671 	php_stream *stream;
672 	zval *prev_val, *hdr = NULL;
673 	bool format = 0;
674 	zval *zcontext = NULL;
675 	php_stream_context *context;
676 
677 	ZEND_PARSE_PARAMETERS_START(1, 3)
678 		Z_PARAM_PATH(url, url_len)
679 		Z_PARAM_OPTIONAL
680 		Z_PARAM_BOOL(format)
681 		Z_PARAM_RESOURCE_OR_NULL(zcontext)
682 	ZEND_PARSE_PARAMETERS_END();
683 
684 	context = php_stream_context_from_zval(zcontext, 0);
685 
686 	if (!(stream = php_stream_open_wrapper_ex(url, "r", REPORT_ERRORS | STREAM_USE_URL | STREAM_ONLY_GET_HEADERS, NULL, context))) {
687 		RETURN_FALSE;
688 	}
689 
690 	if (Z_TYPE(stream->wrapperdata) != IS_ARRAY) {
691 		php_stream_close(stream);
692 		RETURN_FALSE;
693 	}
694 
695 	array_init(return_value);
696 
697 	ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(&stream->wrapperdata), hdr) {
698 		if (Z_TYPE_P(hdr) != IS_STRING) {
699 			continue;
700 		}
701 		if (!format) {
702 no_name_header:
703 			add_next_index_str(return_value, zend_string_copy(Z_STR_P(hdr)));
704 		} else {
705 			char c;
706 			char *s, *p;
707 
708 			if ((p = strchr(Z_STRVAL_P(hdr), ':'))) {
709 				c = *p;
710 				*p = '\0';
711 				s = p + 1;
712 				while (isspace((int)*(unsigned char *)s)) {
713 					s++;
714 				}
715 
716 				if ((prev_val = zend_hash_str_find(Z_ARRVAL_P(return_value), Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)))) == NULL) {
717 					add_assoc_stringl_ex(return_value, Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)), s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr))));
718 				} else { /* some headers may occur more than once, therefore we need to remake the string into an array */
719 					convert_to_array(prev_val);
720 					add_next_index_stringl(prev_val, s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr))));
721 				}
722 
723 				*p = c;
724 			} else {
725 				goto no_name_header;
726 			}
727 		}
728 	} ZEND_HASH_FOREACH_END();
729 
730 	php_stream_close(stream);
731 }
732 /* }}} */
733