xref: /PHP-7.3/ext/standard/url_scanner_ex.re (revision 8d3f8ca1)
1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2018 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  |         Yasuo Ohgaki <yohgaki@ohgaki.net>                            |
17  +----------------------------------------------------------------------+
18*/
19
20#include "php.h"
21
22#ifdef HAVE_UNISTD_H
23#include <unistd.h>
24#endif
25#ifdef HAVE_LIMITS_H
26#include <limits.h>
27#endif
28
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32
33#include "SAPI.h"
34#include "php_ini.h"
35#include "php_globals.h"
36#include "php_string.h"
37#define STATE_TAG SOME_OTHER_STATE_TAG
38#include "basic_functions.h"
39#include "url.h"
40#include "html.h"
41#undef STATE_TAG
42
43#define url_scanner url_scanner_ex
44
45#include "zend_smart_str.h"
46
47static void tag_dtor(zval *zv)
48{
49	free(Z_PTR_P(zv));
50}
51
52static int php_ini_on_update_tags(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
53{
54	url_adapt_state_ex_t *ctx;
55	char *key;
56	char *tmp;
57	char *lasts = NULL;
58
59	if (type) {
60		ctx = &BG(url_adapt_session_ex);
61	} else {
62		ctx = &BG(url_adapt_output_ex);
63	}
64
65	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
66
67	if (ctx->tags)
68		zend_hash_destroy(ctx->tags);
69	else {
70		ctx->tags = malloc(sizeof(HashTable));
71		if (!ctx->tags) {
72			efree(tmp);
73			return FAILURE;
74		}
75	}
76
77	zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
78
79	for (key = php_strtok_r(tmp, ",", &lasts);
80		 key;
81		 key = php_strtok_r(NULL, ",", &lasts)) {
82		char *val;
83
84		val = strchr(key, '=');
85		if (val) {
86			char *q;
87			size_t keylen;
88			zend_string *str;
89
90			*val++ = '\0';
91			for (q = key; *q; q++) {
92				*q = tolower(*q);
93			}
94			keylen = q - key;
95			str = zend_string_init(key, keylen, 1);
96			GC_MAKE_PERSISTENT_LOCAL(str);
97			zend_hash_add_mem(ctx->tags, str, val, strlen(val)+1);
98			zend_string_release_ex(str, 1);
99		}
100	}
101
102	efree(tmp);
103
104	return SUCCESS;
105}
106
107static PHP_INI_MH(OnUpdateSessionTags)
108{
109	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
110}
111
112static PHP_INI_MH(OnUpdateOutputTags)
113{
114	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
115}
116
117static int php_ini_on_update_hosts(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
118{
119	HashTable *hosts;
120	char *key;
121	char *tmp;
122	char *lasts = NULL;
123
124	if (type) {
125		hosts = &BG(url_adapt_session_hosts_ht);
126	} else {
127		hosts = &BG(url_adapt_output_hosts_ht);
128	}
129	zend_hash_clean(hosts);
130
131	/* Use user supplied host whitelist */
132	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
133	for (key = php_strtok_r(tmp, ",", &lasts);
134		 key;
135		 key = php_strtok_r(NULL, ",", &lasts)) {
136		size_t keylen;
137		zend_string *tmp_key;
138		char *q;
139
140		for (q = key; *q; q++) {
141			*q = tolower(*q);
142		}
143		keylen = q - key;
144		if (keylen > 0) {
145			tmp_key = zend_string_init(key, keylen, 0);
146			zend_hash_add_empty_element(hosts, tmp_key);
147			zend_string_release_ex(tmp_key, 0);
148		}
149	}
150	efree(tmp);
151
152	return SUCCESS;
153}
154
155static PHP_INI_MH(OnUpdateSessionHosts)
156{
157	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
158}
159
160static PHP_INI_MH(OnUpdateOutputHosts)
161{
162	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
163}
164
165/* FIXME: OnUpdate*Hosts cannot set default to $_SERVER['HTTP_HOST'] at startup */
166PHP_INI_BEGIN()
167	STD_PHP_INI_ENTRY("session.trans_sid_tags", "a=href,area=href,frame=src,form=", PHP_INI_ALL, OnUpdateSessionTags, url_adapt_session_ex, php_basic_globals, basic_globals)
168	STD_PHP_INI_ENTRY("session.trans_sid_hosts", "", PHP_INI_ALL, OnUpdateSessionHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
169	STD_PHP_INI_ENTRY("url_rewriter.tags", "form=", PHP_INI_ALL, OnUpdateOutputTags, url_adapt_session_ex, php_basic_globals, basic_globals)
170	STD_PHP_INI_ENTRY("url_rewriter.hosts", "", PHP_INI_ALL, OnUpdateOutputHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
171PHP_INI_END()
172
173/*!re2c
174any = [\000-\377];
175N = (any\[<]);
176alpha = [a-zA-Z];
177alphanamespace = [a-zA-Z:];
178alphadash = ([a-zA-Z] | "-");
179*/
180
181#define YYFILL(n) goto done
182#define YYCTYPE unsigned char
183#define YYCURSOR p
184#define YYLIMIT q
185#define YYMARKER r
186
187static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
188{
189	php_url *url_parts;
190
191	smart_str_0(url); /* FIXME: Bug #70480 php_url_parse_ex() crashes by processing chars exceed len */
192	url_parts = php_url_parse_ex(ZSTR_VAL(url->s), ZSTR_LEN(url->s));
193
194	/* Ignore malformed URLs */
195	if (!url_parts) {
196		smart_str_append_smart_str(dest, url);
197		return;
198	}
199
200	/* Don't modify URLs of the format "#mark" */
201	if (url_parts->fragment && '#' == ZSTR_VAL(url->s)[0]) {
202		smart_str_append_smart_str(dest, url);
203		php_url_free(url_parts);
204		return;
205	}
206
207	/* Check protocol. Only http/https is allowed. */
208	if (url_parts->scheme
209		&& !zend_string_equals_literal_ci(url_parts->scheme, "http")
210		&& !zend_string_equals_literal_ci(url_parts->scheme, "https")) {
211		smart_str_append_smart_str(dest, url);
212		php_url_free(url_parts);
213		return;
214	}
215
216	/* Check host whitelist. If it's not listed, do nothing. */
217	if (url_parts->host) {
218		zend_string *tmp = zend_string_tolower(url_parts->host);
219		if (!zend_hash_exists(&BG(url_adapt_session_hosts_ht), tmp)) {
220			zend_string_release_ex(tmp, 0);
221			smart_str_append_smart_str(dest, url);
222			php_url_free(url_parts);
223			return;
224		}
225		zend_string_release_ex(tmp, 0);
226	}
227
228	/*
229	 * When URL does not have path and query string add "/?".
230	 * i.e. If URL is only "?foo=bar", should not add "/?".
231	 */
232	if (!url_parts->path && !url_parts->query && !url_parts->fragment) {
233		/* URL is http://php.net or like */
234		smart_str_append_smart_str(dest, url);
235		smart_str_appendc(dest, '/');
236		smart_str_appendc(dest, '?');
237		smart_str_append_smart_str(dest, url_app);
238		php_url_free(url_parts);
239		return;
240	}
241
242	if (url_parts->scheme) {
243		smart_str_appends(dest, ZSTR_VAL(url_parts->scheme));
244		smart_str_appends(dest, "://");
245	} else if (*(ZSTR_VAL(url->s)) == '/' && *(ZSTR_VAL(url->s)+1) == '/') {
246		smart_str_appends(dest, "//");
247	}
248	if (url_parts->user) {
249		smart_str_appends(dest, ZSTR_VAL(url_parts->user));
250		if (url_parts->pass) {
251			smart_str_appends(dest, ZSTR_VAL(url_parts->pass));
252			smart_str_appendc(dest, ':');
253		}
254		smart_str_appendc(dest, '@');
255	}
256	if (url_parts->host) {
257		smart_str_appends(dest, ZSTR_VAL(url_parts->host));
258	}
259	if (url_parts->port) {
260		smart_str_appendc(dest, ':');
261		smart_str_append_unsigned(dest, (long)url_parts->port);
262	}
263	if (url_parts->path) {
264		smart_str_appends(dest, ZSTR_VAL(url_parts->path));
265	}
266	smart_str_appendc(dest, '?');
267	if (url_parts->query) {
268		smart_str_appends(dest, ZSTR_VAL(url_parts->query));
269		smart_str_appends(dest, separator);
270		smart_str_append_smart_str(dest, url_app);
271	} else {
272		smart_str_append_smart_str(dest, url_app);
273	}
274	if (url_parts->fragment) {
275		smart_str_appendc(dest, '#');
276		smart_str_appends(dest, ZSTR_VAL(url_parts->fragment));
277	}
278	php_url_free(url_parts);
279}
280
281enum {
282	TAG_NORMAL = 0,
283	TAG_FORM
284};
285
286enum {
287	ATTR_NORMAL = 0,
288	ATTR_ACTION
289};
290
291#undef YYFILL
292#undef YYCTYPE
293#undef YYCURSOR
294#undef YYLIMIT
295#undef YYMARKER
296
297static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type)
298{
299	char f = 0;
300
301	/* arg.s is string WITHOUT NUL.
302	   To avoid partial match, NUL is added here */
303	ZSTR_VAL(ctx->arg.s)[ZSTR_LEN(ctx->arg.s)] = '\0';
304	if (!strcasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data)) {
305		f = 1;
306	}
307
308	if (quotes) {
309		smart_str_appendc(&ctx->result, type);
310	}
311	if (f) {
312		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
313	} else {
314		smart_str_append_smart_str(&ctx->result, &ctx->val);
315	}
316	if (quotes) {
317		smart_str_appendc(&ctx->result, type);
318	}
319}
320
321enum {
322	STATE_PLAIN = 0,
323	STATE_TAG,
324	STATE_NEXT_ARG,
325	STATE_ARG,
326	STATE_BEFORE_VAL,
327	STATE_VAL
328};
329
330#define YYFILL(n) goto stop
331#define YYCTYPE unsigned char
332#define YYCURSOR xp
333#define YYLIMIT end
334#define YYMARKER q
335#define STATE ctx->state
336
337#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR
338#define STD_ARGS ctx, start, xp
339
340#if SCANNER_DEBUG
341#define scdebug(x) printf x
342#else
343#define scdebug(x)
344#endif
345
346static inline void passthru(STD_PARA)
347{
348	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
349	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
350}
351
352
353static int check_http_host(char *target)
354{
355	zval *host, *tmp;
356	zend_string *host_tmp;
357	char *colon;
358
359	if ((tmp  = zend_hash_str_find(&EG(symbol_table), ZEND_STRL("_SERVER"))) &&
360		Z_TYPE_P(tmp) == IS_ARRAY &&
361		(host = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("HTTP_HOST"))) &&
362		Z_TYPE_P(host) == IS_STRING) {
363		host_tmp = zend_string_init(Z_STRVAL_P(host), Z_STRLEN_P(host), 0);
364		/* HTTP_HOST could be 'localhost:8888' etc. */
365		colon = strchr(ZSTR_VAL(host_tmp), ':');
366		if (colon) {
367			ZSTR_LEN(host_tmp) = colon - ZSTR_VAL(host_tmp);
368			ZSTR_VAL(host_tmp)[ZSTR_LEN(host_tmp)] = '\0';
369		}
370		if (!strcasecmp(ZSTR_VAL(host_tmp), target)) {
371			zend_string_release_ex(host_tmp, 0);
372			return SUCCESS;
373		}
374		zend_string_release_ex(host_tmp, 0);
375	}
376	return FAILURE;
377}
378
379static int check_host_whitelist(url_adapt_state_ex_t *ctx)
380{
381	php_url *url_parts = NULL;
382	HashTable *allowed_hosts = ctx->type ? &BG(url_adapt_session_hosts_ht) : &BG(url_adapt_output_hosts_ht);
383
384	ZEND_ASSERT(ctx->tag_type == TAG_FORM);
385
386	if (ctx->attr_val.s && ZSTR_LEN(ctx->attr_val.s)) {
387		url_parts = php_url_parse_ex(ZSTR_VAL(ctx->attr_val.s), ZSTR_LEN(ctx->attr_val.s));
388	} else {
389		return SUCCESS; /* empty URL is valid */
390	}
391
392	if (!url_parts) {
393		return FAILURE;
394	}
395	if (url_parts->scheme) {
396		/* Only http/https should be handled.
397		   A bit hacky check this here, but saves a URL parse. */
398		if (!zend_string_equals_literal_ci(url_parts->scheme, "http") &&
399			!zend_string_equals_literal_ci(url_parts->scheme, "https")) {
400		php_url_free(url_parts);
401		return FAILURE;
402		}
403	}
404	if (!url_parts->host) {
405		php_url_free(url_parts);
406		return SUCCESS;
407	}
408	if (!zend_hash_num_elements(allowed_hosts) &&
409		check_http_host(ZSTR_VAL(url_parts->host)) == SUCCESS) {
410		php_url_free(url_parts);
411		return SUCCESS;
412	}
413	if (!zend_hash_find(allowed_hosts, url_parts->host)) {
414		php_url_free(url_parts);
415		return FAILURE;
416	}
417	php_url_free(url_parts);
418	return SUCCESS;
419}
420
421/*
422 * This function appends a hidden input field after a <form>.
423 */
424static void handle_form(STD_PARA)
425{
426	int doit = 0;
427
428	if (ZSTR_LEN(ctx->form_app.s) > 0) {
429		switch (ZSTR_LEN(ctx->tag.s)) {
430			case sizeof("form") - 1:
431				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))
432					&& check_host_whitelist(ctx) == SUCCESS) {
433					doit = 1;
434				}
435				break;
436		}
437	}
438
439	if (doit) {
440		smart_str_append_smart_str(&ctx->result, &ctx->form_app);
441	}
442}
443
444/*
445 *  HANDLE_TAG copies the HTML Tag and checks whether we
446 *  have that tag in our table. If we might modify it,
447 *  we continue to scan the tag, otherwise we simply copy the complete
448 *  HTML stuff to the result buffer.
449 */
450
451static inline void handle_tag(STD_PARA)
452{
453	int ok = 0;
454	unsigned int i;
455
456	if (ctx->tag.s) {
457		ZSTR_LEN(ctx->tag.s) = 0;
458	}
459	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
460	for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++)
461		ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]);
462    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
463	if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL) {
464		ok = 1;
465		if (ZSTR_LEN(ctx->tag.s) == sizeof("form")-1
466			&& !strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))) {
467			ctx->tag_type = TAG_FORM;
468		} else {
469			ctx->tag_type = TAG_NORMAL;
470		}
471	}
472	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
473}
474
475static inline void handle_arg(STD_PARA)
476{
477	if (ctx->arg.s) {
478		ZSTR_LEN(ctx->arg.s) = 0;
479	}
480	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
481	if (ctx->tag_type == TAG_FORM &&
482		strncasecmp(ZSTR_VAL(ctx->arg.s), "action", ZSTR_LEN(ctx->arg.s)) == 0) {
483		ctx->attr_type = ATTR_ACTION;
484	} else {
485		ctx->attr_type = ATTR_NORMAL;
486	}
487}
488
489static inline void handle_val(STD_PARA, char quotes, char type)
490{
491	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
492	if (ctx->tag_type == TAG_FORM && ctx->attr_type == ATTR_ACTION) {
493		smart_str_setl(&ctx->attr_val, start + quotes, YYCURSOR - start - quotes * 2);
494	}
495	tag_arg(ctx, quotes, type);
496}
497
498static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
499{
500	char *end, *q;
501	char *xp;
502	char *start;
503	size_t rest;
504
505	smart_str_appendl(&ctx->buf, newdata, newlen);
506
507	YYCURSOR = ZSTR_VAL(ctx->buf.s);
508	YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s);
509
510	switch (STATE) {
511		case STATE_PLAIN: goto state_plain;
512		case STATE_TAG: goto state_tag;
513		case STATE_NEXT_ARG: goto state_next_arg;
514		case STATE_ARG: goto state_arg;
515		case STATE_BEFORE_VAL: goto state_before_val;
516		case STATE_VAL: goto state_val;
517	}
518
519
520state_plain_begin:
521	STATE = STATE_PLAIN;
522
523state_plain:
524	start = YYCURSOR;
525/*!re2c
526  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
527  N+ 				{ passthru(STD_ARGS); goto state_plain; }
528*/
529
530state_tag:
531	start = YYCURSOR;
532/*!re2c
533  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
534  any		{ passthru(STD_ARGS); goto state_plain_begin; }
535*/
536
537state_next_arg_begin:
538	STATE = STATE_NEXT_ARG;
539
540state_next_arg:
541	start = YYCURSOR;
542/*!re2c
543  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
544  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
545  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
546  any		{ passthru(STD_ARGS); goto state_plain_begin; }
547*/
548
549state_arg:
550	start = YYCURSOR;
551/*!re2c
552  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
553  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
554*/
555
556state_before_val:
557	start = YYCURSOR;
558/*!re2c
559  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
560  any				{ --YYCURSOR; goto state_next_arg_begin; }
561*/
562
563
564state_val:
565	start = YYCURSOR;
566/*!re2c
567  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
568  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
569  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
570  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
571*/
572
573stop:
574	if (YYLIMIT < start) {
575		/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
576		rest = 0;
577	} else {
578		rest = YYLIMIT - start;
579		scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
580	}
581
582	if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest);
583	ZSTR_LEN(ctx->buf.s) = rest;
584}
585
586
587PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, int encode)
588{
589	char *result;
590	smart_str surl = {0};
591	smart_str buf = {0};
592	smart_str url_app = {0};
593	zend_string *encoded;
594
595	smart_str_appendl(&surl, url, urllen);
596
597	if (encode) {
598		encoded = php_raw_url_encode(name, strlen(name));
599		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
600		zend_string_free(encoded);
601	} else {
602		smart_str_appends(&url_app, name);
603	}
604	smart_str_appendc(&url_app, '=');
605	if (encode) {
606		encoded = php_raw_url_encode(value, strlen(value));
607		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
608		zend_string_free(encoded);
609	} else {
610		smart_str_appends(&url_app, value);
611	}
612
613	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
614
615	smart_str_0(&buf);
616	if (newlen) *newlen = ZSTR_LEN(buf.s);
617	result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s));
618
619	smart_str_free(&url_app);
620	smart_str_free(&buf);
621
622	return result;
623}
624
625
626static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush, url_adapt_state_ex_t *ctx)
627{
628	char *retval;
629
630	xx_mainloop(ctx, src, srclen);
631
632	if (!ctx->result.s) {
633		smart_str_appendl(&ctx->result, "", 0);
634		*newlen = 0;
635	} else {
636		*newlen = ZSTR_LEN(ctx->result.s);
637	}
638	smart_str_0(&ctx->result);
639	if (do_flush) {
640		smart_str_append(&ctx->result, ctx->buf.s);
641		*newlen += ZSTR_LEN(ctx->buf.s);
642		smart_str_free(&ctx->buf);
643		smart_str_free(&ctx->val);
644		smart_str_free(&ctx->attr_val);
645	}
646	retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
647	smart_str_free(&ctx->result);
648	return retval;
649}
650
651static int php_url_scanner_ex_activate(int type)
652{
653	url_adapt_state_ex_t *ctx;
654
655	if (type) {
656		ctx = &BG(url_adapt_session_ex);
657	} else {
658		ctx = &BG(url_adapt_output_ex);
659	}
660
661	memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
662
663	return SUCCESS;
664}
665
666static int php_url_scanner_ex_deactivate(int type)
667{
668	url_adapt_state_ex_t *ctx;
669
670	if (type) {
671		ctx = &BG(url_adapt_session_ex);
672	} else {
673		ctx = &BG(url_adapt_output_ex);
674	}
675
676	smart_str_free(&ctx->result);
677	smart_str_free(&ctx->buf);
678	smart_str_free(&ctx->tag);
679	smart_str_free(&ctx->arg);
680	smart_str_free(&ctx->attr_val);
681
682	return SUCCESS;
683}
684
685static inline void php_url_scanner_session_handler_impl(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode, int type)
686{
687	size_t len;
688	url_adapt_state_ex_t *url_state;
689
690	if (type) {
691		url_state = &BG(url_adapt_session_ex);
692	} else {
693		url_state = &BG(url_adapt_output_ex);
694	}
695
696	if (ZSTR_LEN(url_state->url_app.s) != 0) {
697		*handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0), url_state);
698		if (sizeof(uint) < sizeof(size_t)) {
699			if (len > UINT_MAX)
700				len = UINT_MAX;
701		}
702		*handled_output_len = len;
703	} else if (ZSTR_LEN(url_state->url_app.s) == 0) {
704		url_adapt_state_ex_t *ctx = url_state;
705		if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) {
706			smart_str_append(&ctx->result, ctx->buf.s);
707			smart_str_appendl(&ctx->result, output, output_len);
708
709			*handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
710			*handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len;
711
712			smart_str_free(&ctx->buf);
713			smart_str_free(&ctx->result);
714		} else {
715			*handled_output = estrndup(output, *handled_output_len = output_len);
716		}
717	} else {
718		*handled_output = NULL;
719	}
720}
721
722static void php_url_scanner_session_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
723{
724	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 1);
725}
726
727static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
728{
729	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 0);
730}
731
732static inline int php_url_scanner_add_var_impl(char *name, size_t name_len, char *value, size_t value_len, int encode, int type)
733{
734	smart_str sname = {0};
735	smart_str svalue = {0};
736	smart_str hname = {0};
737	smart_str hvalue = {0};
738	zend_string *encoded;
739	url_adapt_state_ex_t *url_state;
740	php_output_handler_func_t handler;
741
742	if (type) {
743		url_state = &BG(url_adapt_session_ex);
744		handler = php_url_scanner_session_handler;
745	} else {
746		url_state = &BG(url_adapt_output_ex);
747		handler = php_url_scanner_output_handler;
748	}
749
750	if (!url_state->active) {
751		php_url_scanner_ex_activate(type);
752		php_output_start_internal(ZEND_STRL("URL-Rewriter"), handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS);
753		url_state->active = 1;
754	}
755
756	if (url_state->url_app.s && ZSTR_LEN(url_state->url_app.s) != 0) {
757		smart_str_appends(&url_state->url_app, PG(arg_separator).output);
758	}
759
760	if (encode) {
761		encoded = php_raw_url_encode(name, name_len);
762		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
763		encoded = php_raw_url_encode(value, value_len);
764		smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
765		encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
766		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
767		encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
768		smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
769	} else {
770		smart_str_appendl(&sname, name, name_len);
771		smart_str_appendl(&svalue, value, value_len);
772		smart_str_appendl(&hname, name, name_len);
773		smart_str_appendl(&hvalue, value, value_len);
774	}
775
776	smart_str_append_smart_str(&url_state->url_app, &sname);
777	smart_str_appendc(&url_state->url_app, '=');
778	smart_str_append_smart_str(&url_state->url_app, &svalue);
779
780	smart_str_appends(&url_state->form_app, "<input type=\"hidden\" name=\"");
781	smart_str_append_smart_str(&url_state->form_app, &hname);
782	smart_str_appends(&url_state->form_app, "\" value=\"");
783	smart_str_append_smart_str(&url_state->form_app, &hvalue);
784	smart_str_appends(&url_state->form_app, "\" />");
785
786	smart_str_free(&sname);
787	smart_str_free(&svalue);
788	smart_str_free(&hname);
789	smart_str_free(&hvalue);
790
791	return SUCCESS;
792}
793
794
795PHPAPI int php_url_scanner_add_session_var(char *name, size_t name_len, char *value, size_t value_len, int encode)
796{
797	return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 1);
798}
799
800
801PHPAPI int php_url_scanner_add_var(char *name, size_t name_len, char *value, size_t value_len, int encode)
802{
803	return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 0);
804}
805
806
807static inline void php_url_scanner_reset_vars_impl(int type) {
808	url_adapt_state_ex_t *url_state;
809
810	if (type) {
811		url_state = &BG(url_adapt_session_ex);
812	} else {
813		url_state = &BG(url_adapt_output_ex);
814	}
815
816	if (url_state->form_app.s) {
817		ZSTR_LEN(url_state->form_app.s) = 0;
818	}
819	if (url_state->url_app.s) {
820		ZSTR_LEN(url_state->url_app.s) = 0;
821	}
822}
823
824
825PHPAPI int php_url_scanner_reset_session_vars(void)
826{
827	php_url_scanner_reset_vars_impl(1);
828	return SUCCESS;
829}
830
831
832PHPAPI int php_url_scanner_reset_vars(void)
833{
834	php_url_scanner_reset_vars_impl(0);
835	return SUCCESS;
836}
837
838
839static inline int php_url_scanner_reset_var_impl(zend_string *name, int encode, int type)
840{
841	char *start, *end, *limit;
842	size_t separator_len;
843	smart_str sname = {0};
844	smart_str hname = {0};
845	smart_str url_app = {0};
846	smart_str form_app = {0};
847	zend_string *encoded;
848	int ret = SUCCESS;
849	zend_bool sep_removed = 0;
850	url_adapt_state_ex_t *url_state;
851
852	if (type) {
853		url_state = &BG(url_adapt_session_ex);
854	} else {
855		url_state = &BG(url_adapt_output_ex);
856	}
857
858	/* Short circuit check. Only check url_app. */
859	if (!url_state->url_app.s || !ZSTR_LEN(url_state->url_app.s)) {
860		return SUCCESS;
861	}
862
863	if (encode) {
864		encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name));
865		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
866		zend_string_free(encoded);
867		encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
868		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
869		zend_string_free(encoded);
870	} else {
871		smart_str_appendl(&sname, ZSTR_VAL(name), ZSTR_LEN(name));
872		smart_str_appendl(&hname, ZSTR_VAL(name), ZSTR_LEN(name));
873	}
874	smart_str_0(&sname);
875	smart_str_0(&hname);
876
877	smart_str_append_smart_str(&url_app, &sname);
878	smart_str_appendc(&url_app, '=');
879	smart_str_0(&url_app);
880
881	smart_str_appends(&form_app, "<input type=\"hidden\" name=\"");
882	smart_str_append_smart_str(&form_app, &hname);
883	smart_str_appends(&form_app, "\" value=\"");
884	smart_str_0(&form_app);
885
886	/* Short circuit check. Only check url_app. */
887	start = (char *) php_memnstr(ZSTR_VAL(url_state->url_app.s),
888								 ZSTR_VAL(url_app.s), ZSTR_LEN(url_app.s),
889								 ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s));
890	if (!start) {
891		ret = FAILURE;
892		goto finish;
893	}
894
895	/* Get end of url var */
896	limit = ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s);
897	end = start + ZSTR_LEN(url_app.s);
898	separator_len = strlen(PG(arg_separator).output);
899	while (end < limit) {
900		if (!memcmp(end, PG(arg_separator).output, separator_len)) {
901			end += separator_len;
902			sep_removed = 1;
903			break;
904		}
905		end++;
906	}
907	/* Remove all when this is the only rewrite var */
908	if (ZSTR_LEN(url_state->url_app.s) == end - start) {
909		php_url_scanner_reset_vars_impl(type);
910		goto finish;
911	}
912	/* Check preceding separator */
913	if (!sep_removed
914		&& (size_t)(start - PG(arg_separator).output) >= separator_len
915		&& !memcmp(start - separator_len, PG(arg_separator).output, separator_len)) {
916		start -= separator_len;
917	}
918	/* Remove partially */
919	memmove(start, end,
920			ZSTR_LEN(url_state->url_app.s) - (end - ZSTR_VAL(url_state->url_app.s)));
921	ZSTR_LEN(url_state->url_app.s) -= end - start;
922	ZSTR_VAL(url_state->url_app.s)[ZSTR_LEN(url_state->url_app.s)] = '\0';
923
924	/* Remove form var */
925	start = (char *) php_memnstr(ZSTR_VAL(url_state->form_app.s),
926						ZSTR_VAL(form_app.s), ZSTR_LEN(form_app.s),
927						ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s));
928	if (!start) {
929		/* Should not happen */
930		ret = FAILURE;
931		php_url_scanner_reset_vars_impl(type);
932		goto finish;
933	}
934	/* Get end of form var */
935	limit = ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s);
936	end = start + ZSTR_LEN(form_app.s);
937	while (end < limit) {
938		if (*end == '>') {
939			end += 1;
940			break;
941		}
942		end++;
943	}
944	/* Remove partially */
945	memmove(start, end,
946			ZSTR_LEN(url_state->form_app.s) - (end - ZSTR_VAL(url_state->form_app.s)));
947	ZSTR_LEN(url_state->form_app.s) -= end - start;
948	ZSTR_VAL(url_state->form_app.s)[ZSTR_LEN(url_state->form_app.s)] = '\0';
949
950finish:
951	smart_str_free(&url_app);
952	smart_str_free(&form_app);
953	smart_str_free(&sname);
954	smart_str_free(&hname);
955	return ret;
956}
957
958
959PHPAPI int php_url_scanner_reset_session_var(zend_string *name, int encode)
960{
961	return php_url_scanner_reset_var_impl(name, encode, 1);
962}
963
964
965PHPAPI int php_url_scanner_reset_var(zend_string *name, int encode)
966{
967	return php_url_scanner_reset_var_impl(name, encode, 0);
968}
969
970
971PHP_MINIT_FUNCTION(url_scanner)
972{
973	REGISTER_INI_ENTRIES();
974	return SUCCESS;
975}
976
977PHP_MSHUTDOWN_FUNCTION(url_scanner)
978{
979	UNREGISTER_INI_ENTRIES();
980
981	return SUCCESS;
982}
983
984PHP_RINIT_FUNCTION(url_scanner)
985{
986	BG(url_adapt_session_ex).active    = 0;
987	BG(url_adapt_session_ex).tag_type  = 0;
988	BG(url_adapt_session_ex).attr_type = 0;
989	BG(url_adapt_output_ex).active    = 0;
990	BG(url_adapt_output_ex).tag_type  = 0;
991	BG(url_adapt_output_ex).attr_type = 0;
992	return SUCCESS;
993}
994
995PHP_RSHUTDOWN_FUNCTION(url_scanner)
996{
997	if (BG(url_adapt_session_ex).active) {
998		php_url_scanner_ex_deactivate(1);
999		BG(url_adapt_session_ex).active    = 0;
1000		BG(url_adapt_session_ex).tag_type  = 0;
1001		BG(url_adapt_session_ex).attr_type = 0;
1002	}
1003	smart_str_free(&BG(url_adapt_session_ex).form_app);
1004	smart_str_free(&BG(url_adapt_session_ex).url_app);
1005
1006	if (BG(url_adapt_output_ex).active) {
1007		php_url_scanner_ex_deactivate(0);
1008		BG(url_adapt_output_ex).active    = 0;
1009		BG(url_adapt_output_ex).tag_type  = 0;
1010		BG(url_adapt_output_ex).attr_type = 0;
1011	}
1012	smart_str_free(&BG(url_adapt_output_ex).form_app);
1013	smart_str_free(&BG(url_adapt_output_ex).url_app);
1014
1015	return SUCCESS;
1016}
1017