xref: /php-src/ext/standard/url.c (revision cd0cd3d3)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Jim Winstead <jimw@php.net>                                  |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include <stdlib.h>
18 #include <string.h>
19 #include <ctype.h>
20 #include <sys/types.h>
21 
22 #ifdef __SSE2__
23 #include <emmintrin.h>
24 #endif
25 
26 #include "php.h"
27 
28 #include "url.h"
29 #include "file.h"
30 
31 /* {{{ free_url */
php_url_free(php_url * theurl)32 PHPAPI void php_url_free(php_url *theurl)
33 {
34 	if (theurl->scheme)
35 		zend_string_release_ex(theurl->scheme, 0);
36 	if (theurl->user)
37 		zend_string_release_ex(theurl->user, 0);
38 	if (theurl->pass)
39 		zend_string_release_ex(theurl->pass, 0);
40 	if (theurl->host)
41 		zend_string_release_ex(theurl->host, 0);
42 	if (theurl->path)
43 		zend_string_release_ex(theurl->path, 0);
44 	if (theurl->query)
45 		zend_string_release_ex(theurl->query, 0);
46 	if (theurl->fragment)
47 		zend_string_release_ex(theurl->fragment, 0);
48 	efree(theurl);
49 }
50 /* }}} */
51 
52 /* {{{ php_replace_controlchars_ex */
php_replace_controlchars_ex(char * str,size_t len)53 PHPAPI char *php_replace_controlchars_ex(char *str, size_t len)
54 {
55 	unsigned char *s = (unsigned char *)str;
56 	unsigned char *e = (unsigned char *)str + len;
57 
58 	if (!str) {
59 		return (NULL);
60 	}
61 
62 	while (s < e) {
63 
64 		if (iscntrl(*s)) {
65 			*s='_';
66 		}
67 		s++;
68 	}
69 
70 	return (str);
71 }
72 /* }}} */
73 
php_replace_controlchars(char * str)74 PHPAPI char *php_replace_controlchars(char *str)
75 {
76 	return php_replace_controlchars_ex(str, strlen(str));
77 }
78 
php_url_parse(char const * str)79 PHPAPI php_url *php_url_parse(char const *str)
80 {
81 	return php_url_parse_ex(str, strlen(str));
82 }
83 
binary_strcspn(const char * s,const char * e,const char * chars)84 static const char *binary_strcspn(const char *s, const char *e, const char *chars) {
85 	while (*chars) {
86 		const char *p = memchr(s, *chars, e - s);
87 		if (p) {
88 			e = p;
89 		}
90 		chars++;
91 	}
92 	return e;
93 }
94 
95 /* {{{ php_url_parse */
php_url_parse_ex(char const * str,size_t length)96 PHPAPI php_url *php_url_parse_ex(char const *str, size_t length)
97 {
98 	bool has_port;
99 	return php_url_parse_ex2(str, length, &has_port);
100 }
101 
102 /* {{{ php_url_parse_ex2
103  */
php_url_parse_ex2(char const * str,size_t length,bool * has_port)104 PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port)
105 {
106 	char port_buf[6];
107 	php_url *ret = ecalloc(1, sizeof(php_url));
108 	char const *s, *e, *p, *pp, *ue;
109 
110 	*has_port = 0;
111 	s = str;
112 	ue = s + length;
113 
114 	/* parse scheme */
115 	if ((e = memchr(s, ':', length)) && e != s) {
116 		/* validate scheme */
117 		p = s;
118 		while (p < e) {
119 			/* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */
120 			if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') {
121 				if (e + 1 < ue && e < binary_strcspn(s, ue, "?#")) {
122 					goto parse_port;
123 				} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
124 					s += 2;
125 					e = 0;
126 					goto parse_host;
127 				} else {
128 					goto just_path;
129 				}
130 			}
131 			p++;
132 		}
133 
134 		if (e + 1 == ue) { /* only scheme is available */
135 			ret->scheme = zend_string_init(s, (e - s), 0);
136 			php_replace_controlchars_ex(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
137 			return ret;
138 		}
139 
140 		/*
141 		 * certain schemas like mailto: and zlib: may not have any / after them
142 		 * this check ensures we support those.
143 		 */
144 		if (*(e+1) != '/') {
145 			/* check if the data we get is a port this allows us to
146 			 * correctly parse things like a.com:80
147 			 */
148 			p = e + 1;
149 			while (p < ue && isdigit(*p)) {
150 				p++;
151 			}
152 
153 			if ((p == ue || *p == '/') && (p - e) < 7) {
154 				goto parse_port;
155 			}
156 
157 			ret->scheme = zend_string_init(s, (e-s), 0);
158 			php_replace_controlchars_ex(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
159 
160 			s = e + 1;
161 			goto just_path;
162 		} else {
163 			ret->scheme = zend_string_init(s, (e-s), 0);
164 			php_replace_controlchars_ex(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
165 
166 			if (e + 2 < ue && *(e + 2) == '/') {
167 				s = e + 3;
168 				if (zend_string_equals_literal_ci(ret->scheme, "file")) {
169 					if (e + 3 < ue && *(e + 3) == '/') {
170 						/* support windows drive letters as in:
171 						   file:///c:/somedir/file.txt
172 						*/
173 						if (e + 5 < ue && *(e + 5) == ':') {
174 							s = e + 4;
175 						}
176 						goto just_path;
177 					}
178 				}
179 			} else {
180 				s = e + 1;
181 				goto just_path;
182 			}
183 		}
184 	} else if (e) { /* no scheme; starts with colon: look for port */
185 		parse_port:
186 		p = e + 1;
187 		pp = p;
188 
189 		while (pp < ue && pp - p < 6 && isdigit(*pp)) {
190 			pp++;
191 		}
192 
193 		if (pp - p > 0 && pp - p < 6 && (pp == ue || *pp == '/')) {
194 			zend_long port;
195 			char *end;
196 			memcpy(port_buf, p, (pp - p));
197 			port_buf[pp - p] = '\0';
198 			port = ZEND_STRTOL(port_buf, &end, 10);
199 			if (port >= 0 && port <= 65535 && end != port_buf) {
200 				*has_port = 1;
201 				ret->port = (unsigned short) port;
202 				if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
203 				    s += 2;
204 				}
205 			} else {
206 				php_url_free(ret);
207 				return NULL;
208 			}
209 		} else if (p == pp && pp == ue) {
210 			php_url_free(ret);
211 			return NULL;
212 		} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
213 			s += 2;
214 		} else {
215 			goto just_path;
216 		}
217 	} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
218 		s += 2;
219 	} else {
220 		goto just_path;
221 	}
222 
223 parse_host:
224 	e = binary_strcspn(s, ue, "/?#");
225 
226 	/* check for login and password */
227 	if ((p = zend_memrchr(s, '@', (e-s)))) {
228 		if ((pp = memchr(s, ':', (p-s)))) {
229 			ret->user = zend_string_init(s, (pp-s), 0);
230 			php_replace_controlchars_ex(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
231 
232 			pp++;
233 			ret->pass = zend_string_init(pp, (p-pp), 0);
234 			php_replace_controlchars_ex(ZSTR_VAL(ret->pass), ZSTR_LEN(ret->pass));
235 		} else {
236 			ret->user = zend_string_init(s, (p-s), 0);
237 			php_replace_controlchars_ex(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
238 		}
239 
240 		s = p + 1;
241 	}
242 
243 	/* check for port */
244 	if (s < ue && *s == '[' && *(e-1) == ']') {
245 		/* Short circuit portscan,
246 		   we're dealing with an
247 		   IPv6 embedded address */
248 		p = NULL;
249 	} else {
250 		p = zend_memrchr(s, ':', (e-s));
251 	}
252 
253 	if (p) {
254 		if (!ret->port) {
255 			p++;
256 			if (e-p > 5) { /* port cannot be longer then 5 characters */
257 				php_url_free(ret);
258 				return NULL;
259 			} else if (e - p > 0) {
260 				zend_long port;
261 				char *end;
262 				memcpy(port_buf, p, (e - p));
263 				port_buf[e - p] = '\0';
264 				port = ZEND_STRTOL(port_buf, &end, 10);
265 				if (port >= 0 && port <= 65535 && end != port_buf) {
266 					*has_port = 1;
267 					ret->port = (unsigned short)port;
268 				} else {
269 					php_url_free(ret);
270 					return NULL;
271 				}
272 			}
273 			p--;
274 		}
275 	} else {
276 		p = e;
277 	}
278 
279 	/* check if we have a valid host, if we don't reject the string as url */
280 	if ((p-s) < 1) {
281 		php_url_free(ret);
282 		return NULL;
283 	}
284 
285 	ret->host = zend_string_init(s, (p-s), 0);
286 	php_replace_controlchars_ex(ZSTR_VAL(ret->host), ZSTR_LEN(ret->host));
287 
288 	if (e == ue) {
289 		return ret;
290 	}
291 
292 	s = e;
293 
294 	just_path:
295 
296 	e = ue;
297 	p = memchr(s, '#', (e - s));
298 	if (p) {
299 		p++;
300 		if (p < e) {
301 			ret->fragment = zend_string_init(p, (e - p), 0);
302 			php_replace_controlchars_ex(ZSTR_VAL(ret->fragment), ZSTR_LEN(ret->fragment));
303 		} else {
304 			ret->fragment = ZSTR_EMPTY_ALLOC();
305 		}
306 		e = p-1;
307 	}
308 
309 	p = memchr(s, '?', (e - s));
310 	if (p) {
311 		p++;
312 		if (p < e) {
313 			ret->query = zend_string_init(p, (e - p), 0);
314 			php_replace_controlchars_ex(ZSTR_VAL(ret->query), ZSTR_LEN(ret->query));
315 		} else {
316 			ret->query = ZSTR_EMPTY_ALLOC();
317 		}
318 		e = p-1;
319 	}
320 
321 	if (s < e || s == ue) {
322 		ret->path = zend_string_init(s, (e - s), 0);
323 		php_replace_controlchars_ex(ZSTR_VAL(ret->path), ZSTR_LEN(ret->path));
324 	}
325 
326 	return ret;
327 }
328 /* }}} */
329 
330 /* {{{ Parse a URL and return its components */
PHP_FUNCTION(parse_url)331 PHP_FUNCTION(parse_url)
332 {
333 	char *str;
334 	size_t str_len;
335 	php_url *resource;
336 	zend_long key = -1;
337 	zval tmp;
338 	bool has_port;
339 
340 	ZEND_PARSE_PARAMETERS_START(1, 2)
341 		Z_PARAM_STRING(str, str_len)
342 		Z_PARAM_OPTIONAL
343 		Z_PARAM_LONG(key)
344 	ZEND_PARSE_PARAMETERS_END();
345 
346 	resource = php_url_parse_ex2(str, str_len, &has_port);
347 	if (resource == NULL) {
348 		/* @todo Find a method to determine why php_url_parse_ex() failed */
349 		RETURN_FALSE;
350 	}
351 
352 	if (key > -1) {
353 		switch (key) {
354 			case PHP_URL_SCHEME:
355 				if (resource->scheme != NULL) RETVAL_STR_COPY(resource->scheme);
356 				break;
357 			case PHP_URL_HOST:
358 				if (resource->host != NULL) RETVAL_STR_COPY(resource->host);
359 				break;
360 			case PHP_URL_PORT:
361 				if (has_port) RETVAL_LONG(resource->port);
362 				break;
363 			case PHP_URL_USER:
364 				if (resource->user != NULL) RETVAL_STR_COPY(resource->user);
365 				break;
366 			case PHP_URL_PASS:
367 				if (resource->pass != NULL) RETVAL_STR_COPY(resource->pass);
368 				break;
369 			case PHP_URL_PATH:
370 				if (resource->path != NULL) RETVAL_STR_COPY(resource->path);
371 				break;
372 			case PHP_URL_QUERY:
373 				if (resource->query != NULL) RETVAL_STR_COPY(resource->query);
374 				break;
375 			case PHP_URL_FRAGMENT:
376 				if (resource->fragment != NULL) RETVAL_STR_COPY(resource->fragment);
377 				break;
378 			default:
379 				zend_argument_value_error(2, "must be a valid URL component identifier, " ZEND_LONG_FMT " given", key);
380 				break;
381 		}
382 		goto done;
383 	}
384 
385 	/* allocate an array for return */
386 	array_init(return_value);
387 
388     /* add the various elements to the array */
389 	if (resource->scheme != NULL) {
390 		ZVAL_STR_COPY(&tmp, resource->scheme);
391 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_SCHEME), &tmp);
392 	}
393 	if (resource->host != NULL) {
394 		ZVAL_STR_COPY(&tmp, resource->host);
395 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_HOST), &tmp);
396 	}
397 	if (has_port) {
398 		ZVAL_LONG(&tmp, resource->port);
399 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PORT), &tmp);
400 	}
401 	if (resource->user != NULL) {
402 		ZVAL_STR_COPY(&tmp, resource->user);
403 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_USER), &tmp);
404 	}
405 	if (resource->pass != NULL) {
406 		ZVAL_STR_COPY(&tmp, resource->pass);
407 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PASS), &tmp);
408 	}
409 	if (resource->path != NULL) {
410 		ZVAL_STR_COPY(&tmp, resource->path);
411 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PATH), &tmp);
412 	}
413 	if (resource->query != NULL) {
414 		ZVAL_STR_COPY(&tmp, resource->query);
415 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_QUERY), &tmp);
416 	}
417 	if (resource->fragment != NULL) {
418 		ZVAL_STR_COPY(&tmp, resource->fragment);
419 		zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_FRAGMENT), &tmp);
420 	}
421 done:
422 	php_url_free(resource);
423 }
424 /* }}} */
425 
426 /* {{{ php_htoi */
php_htoi(char * s)427 static int php_htoi(char *s)
428 {
429 	int value;
430 	int c;
431 
432 	c = ((unsigned char *)s)[0];
433 	if (isupper(c))
434 		c = tolower(c);
435 	value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
436 
437 	c = ((unsigned char *)s)[1];
438 	if (isupper(c))
439 		c = tolower(c);
440 	value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
441 
442 	return (value);
443 }
444 /* }}} */
445 
446 /* rfc1738:
447 
448    ...The characters ";",
449    "/", "?", ":", "@", "=" and "&" are the characters which may be
450    reserved for special meaning within a scheme...
451 
452    ...Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
453    reserved characters used for their reserved purposes may be used
454    unencoded within a URL...
455 
456    For added safety, we only leave -_. unencoded.
457  */
458 
459 static const unsigned char hexchars[] = "0123456789ABCDEF";
460 
php_url_encode_impl(const char * s,size_t len,bool raw)461 static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t len, bool raw) /* {{{ */ {
462 	unsigned char c;
463 	unsigned char *to;
464 	unsigned char const *from, *end;
465 	zend_string *start;
466 
467 	from = (unsigned char *)s;
468 	end = (unsigned char *)s + len;
469 	start = zend_string_safe_alloc(3, len, 0, 0);
470 	to = (unsigned char*)ZSTR_VAL(start);
471 
472 #ifdef __SSE2__
473 	while (from + 16 < end) {
474 		__m128i mask;
475 		uint32_t bits;
476 		const __m128i _A = _mm_set1_epi8('A' - 1);
477 		const __m128i Z_ = _mm_set1_epi8('Z' + 1);
478 		const __m128i _a = _mm_set1_epi8('a' - 1);
479 		const __m128i z_ = _mm_set1_epi8('z' + 1);
480 		const __m128i _zero = _mm_set1_epi8('0' - 1);
481 		const __m128i nine_ = _mm_set1_epi8('9' + 1);
482 		const __m128i dot = _mm_set1_epi8('.');
483 		const __m128i minus = _mm_set1_epi8('-');
484 		const __m128i under = _mm_set1_epi8('_');
485 
486 		__m128i in = _mm_loadu_si128((__m128i *)from);
487 
488 		__m128i gt = _mm_cmpgt_epi8(in, _A);
489 		__m128i lt = _mm_cmplt_epi8(in, Z_);
490 		mask = _mm_and_si128(lt, gt); /* upper */
491 		gt = _mm_cmpgt_epi8(in, _a);
492 		lt = _mm_cmplt_epi8(in, z_);
493 		mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* lower */
494 		gt = _mm_cmpgt_epi8(in, _zero);
495 		lt = _mm_cmplt_epi8(in, nine_);
496 		mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* number */
497 		mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, dot));
498 		mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, minus));
499 		mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, under));
500 
501 		if (!raw) {
502 			const __m128i blank = _mm_set1_epi8(' ');
503 			__m128i eq = _mm_cmpeq_epi8(in, blank);
504 			if (_mm_movemask_epi8(eq)) {
505 				in = _mm_add_epi8(in, _mm_and_si128(eq, _mm_set1_epi8('+' - ' ')));
506 				mask = _mm_or_si128(mask, eq);
507 			}
508 		}
509 		if (raw) {
510 			const __m128i wavy = _mm_set1_epi8('~');
511 			mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, wavy));
512 		}
513 		if (((bits = _mm_movemask_epi8(mask)) & 0xffff) == 0xffff) {
514 			_mm_storeu_si128((__m128i*)to, in);
515 			to += 16;
516 		} else {
517 			int i;
518 			unsigned char xmm[16];
519 			_mm_storeu_si128((__m128i*)xmm, in);
520 			for (i = 0; i < sizeof(xmm); i++) {
521 				if ((bits & (0x1 << i))) {
522 					*to++ = xmm[i];
523 				} else {
524 					*to++ = '%';
525 					*to++ = hexchars[xmm[i] >> 4];
526 					*to++ = hexchars[xmm[i] & 0xf];
527 				}
528 			}
529 		}
530 		from += 16;
531 	}
532 #endif
533 	while (from < end) {
534 		c = *from++;
535 
536 		if (!raw && c == ' ') {
537 			*to++ = '+';
538 		} else if ((c < '0' && c != '-' && c != '.') ||
539 				(c < 'A' && c > '9') ||
540 				(c > 'Z' && c < 'a' && c != '_') ||
541 				(c > 'z' && (!raw || c != '~'))) {
542 			to[0] = '%';
543 			to[1] = hexchars[c >> 4];
544 			to[2] = hexchars[c & 15];
545 			to += 3;
546 		} else {
547 			*to++ = c;
548 		}
549 	}
550 	*to = '\0';
551 
552 	start = zend_string_truncate(start, to - (unsigned char*)ZSTR_VAL(start), 0);
553 
554 	return start;
555 }
556 /* }}} */
557 
558 /* {{{ php_url_encode */
php_url_encode(char const * s,size_t len)559 PHPAPI zend_string *php_url_encode(char const *s, size_t len)
560 {
561 	return php_url_encode_impl(s, len, 0);
562 }
563 /* }}} */
564 
565 /* {{{ URL-encodes string */
PHP_FUNCTION(urlencode)566 PHP_FUNCTION(urlencode)
567 {
568 	zend_string *in_str;
569 
570 	ZEND_PARSE_PARAMETERS_START(1, 1)
571 		Z_PARAM_STR(in_str)
572 	ZEND_PARSE_PARAMETERS_END();
573 
574 	RETURN_STR(php_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str)));
575 }
576 /* }}} */
577 
578 /* {{{ Decodes URL-encoded string */
PHP_FUNCTION(urldecode)579 PHP_FUNCTION(urldecode)
580 {
581 	zend_string *in_str, *out_str;
582 
583 	ZEND_PARSE_PARAMETERS_START(1, 1)
584 		Z_PARAM_STR(in_str)
585 	ZEND_PARSE_PARAMETERS_END();
586 
587 	out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
588 	ZSTR_LEN(out_str) = php_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
589 
590 	RETURN_NEW_STR(out_str);
591 }
592 /* }}} */
593 
594 /* {{{ php_url_decode */
php_url_decode(char * str,size_t len)595 PHPAPI size_t php_url_decode(char *str, size_t len)
596 {
597 	char *dest = str;
598 	char *data = str;
599 
600 	while (len--) {
601 		if (*data == '+') {
602 			*dest = ' ';
603 		}
604 		else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
605 				 && isxdigit((int) *(data + 2))) {
606 			*dest = (char) php_htoi(data + 1);
607 			data += 2;
608 			len -= 2;
609 		} else {
610 			*dest = *data;
611 		}
612 		data++;
613 		dest++;
614 	}
615 	*dest = '\0';
616 	return dest - str;
617 }
618 /* }}} */
619 
620 /* {{{ php_raw_url_encode */
php_raw_url_encode(char const * s,size_t len)621 PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len)
622 {
623 	return php_url_encode_impl(s, len, 1);
624 }
625 /* }}} */
626 
627 /* {{{ URL-encodes string */
PHP_FUNCTION(rawurlencode)628 PHP_FUNCTION(rawurlencode)
629 {
630 	zend_string *in_str;
631 
632 	ZEND_PARSE_PARAMETERS_START(1, 1)
633 		Z_PARAM_STR(in_str)
634 	ZEND_PARSE_PARAMETERS_END();
635 
636 	RETURN_STR(php_raw_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str)));
637 }
638 /* }}} */
639 
640 /* {{{ Decodes URL-encodes string */
PHP_FUNCTION(rawurldecode)641 PHP_FUNCTION(rawurldecode)
642 {
643 	zend_string *in_str, *out_str;
644 
645 	ZEND_PARSE_PARAMETERS_START(1, 1)
646 		Z_PARAM_STR(in_str)
647 	ZEND_PARSE_PARAMETERS_END();
648 
649 	out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
650 	ZSTR_LEN(out_str) = php_raw_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
651 
652 	RETURN_NEW_STR(out_str);
653 }
654 /* }}} */
655 
656 /* {{{ php_raw_url_decode */
php_raw_url_decode(char * str,size_t len)657 PHPAPI size_t php_raw_url_decode(char *str, size_t len)
658 {
659 	char *dest = str;
660 	char *data = str;
661 
662 	while (len--) {
663 		if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
664 			&& isxdigit((int) *(data + 2))) {
665 			*dest = (char) php_htoi(data + 1);
666 			data += 2;
667 			len -= 2;
668 		} else {
669 			*dest = *data;
670 		}
671 		data++;
672 		dest++;
673 	}
674 	*dest = '\0';
675 	return dest - str;
676 }
677 /* }}} */
678 
679 /* {{{ fetches all the headers sent by the server in response to a HTTP request */
PHP_FUNCTION(get_headers)680 PHP_FUNCTION(get_headers)
681 {
682 	char *url;
683 	size_t url_len;
684 	php_stream *stream;
685 	zval *prev_val, *hdr = NULL;
686 	bool format = 0;
687 	zval *zcontext = NULL;
688 	php_stream_context *context;
689 
690 	ZEND_PARSE_PARAMETERS_START(1, 3)
691 		Z_PARAM_PATH(url, url_len)
692 		Z_PARAM_OPTIONAL
693 		Z_PARAM_BOOL(format)
694 		Z_PARAM_RESOURCE_OR_NULL(zcontext)
695 	ZEND_PARSE_PARAMETERS_END();
696 
697 	context = php_stream_context_from_zval(zcontext, 0);
698 
699 	if (!(stream = php_stream_open_wrapper_ex(url, "r", REPORT_ERRORS | STREAM_USE_URL | STREAM_ONLY_GET_HEADERS, NULL, context))) {
700 		RETURN_FALSE;
701 	}
702 
703 	if (Z_TYPE(stream->wrapperdata) != IS_ARRAY) {
704 		php_stream_close(stream);
705 		RETURN_FALSE;
706 	}
707 
708 	array_init(return_value);
709 
710 	ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(&stream->wrapperdata), hdr) {
711 		if (Z_TYPE_P(hdr) != IS_STRING) {
712 			continue;
713 		}
714 		if (!format) {
715 no_name_header:
716 			add_next_index_str(return_value, zend_string_copy(Z_STR_P(hdr)));
717 		} else {
718 			char c;
719 			char *s, *p;
720 
721 			if ((p = strchr(Z_STRVAL_P(hdr), ':'))) {
722 				c = *p;
723 				*p = '\0';
724 				s = p + 1;
725 				while (isspace((int)*(unsigned char *)s)) {
726 					s++;
727 				}
728 
729 				if ((prev_val = zend_hash_str_find(Z_ARRVAL_P(return_value), Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)))) == NULL) {
730 					add_assoc_stringl_ex(return_value, Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)), s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr))));
731 				} else { /* some headers may occur more than once, therefore we need to remake the string into an array */
732 					convert_to_array(prev_val);
733 					add_next_index_stringl(prev_val, s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr))));
734 				}
735 
736 				*p = c;
737 			} else {
738 				goto no_name_header;
739 			}
740 		}
741 	} ZEND_HASH_FOREACH_END();
742 
743 	php_stream_close(stream);
744 }
745 /* }}} */
746