xref: /PHP-8.1/ext/standard/url_scanner_ex.re (revision 01b3fc03)
1/*
2  +----------------------------------------------------------------------+
3  | Copyright (c) The PHP Group                                          |
4  +----------------------------------------------------------------------+
5  | This source file is subject to version 3.01 of the PHP license,      |
6  | that is bundled with this package in the file LICENSE, and is        |
7  | available through the world-wide-web at the following url:           |
8  | https://www.php.net/license/3_01.txt                                 |
9  | If you did not receive a copy of the PHP license and are unable to   |
10  | obtain it through the world-wide-web, please send a note to          |
11  | license@php.net so we can mail you a copy immediately.               |
12  +----------------------------------------------------------------------+
13  | Author: Sascha Schumann <sascha@schumann.cx>                         |
14  |         Yasuo Ohgaki <yohgaki@ohgaki.net>                            |
15  +----------------------------------------------------------------------+
16*/
17
18#include "php.h"
19
20#ifdef HAVE_UNISTD_H
21#include <unistd.h>
22#endif
23
24#include <limits.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28
29#include "SAPI.h"
30#include "php_ini.h"
31#include "php_globals.h"
32#include "php_string.h"
33#define STATE_TAG SOME_OTHER_STATE_TAG
34#include "basic_functions.h"
35#include "url.h"
36#include "html.h"
37#undef STATE_TAG
38
39#define url_scanner url_scanner_ex
40
41#include "zend_smart_str.h"
42
43static void tag_dtor(zval *zv)
44{
45	free(Z_PTR_P(zv));
46}
47
48static int php_ini_on_update_tags(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
49{
50	url_adapt_state_ex_t *ctx;
51	char *key;
52	char *tmp;
53	char *lasts = NULL;
54
55	if (type) {
56		ctx = &BG(url_adapt_session_ex);
57	} else {
58		ctx = &BG(url_adapt_output_ex);
59	}
60
61	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
62
63	if (ctx->tags)
64		zend_hash_destroy(ctx->tags);
65	else {
66		ctx->tags = malloc(sizeof(HashTable));
67		if (!ctx->tags) {
68			efree(tmp);
69			return FAILURE;
70		}
71	}
72
73	zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
74
75	for (key = php_strtok_r(tmp, ",", &lasts);
76		 key;
77		 key = php_strtok_r(NULL, ",", &lasts)) {
78		char *val;
79
80		val = strchr(key, '=');
81		if (val) {
82			char *q;
83			size_t keylen;
84			zend_string *str;
85
86			*val++ = '\0';
87			for (q = key; *q; q++) {
88				*q = tolower(*q);
89			}
90			keylen = q - key;
91			str = zend_string_init(key, keylen, 1);
92			GC_MAKE_PERSISTENT_LOCAL(str);
93			zend_hash_add_mem(ctx->tags, str, val, strlen(val)+1);
94			zend_string_release_ex(str, 1);
95		}
96	}
97
98	efree(tmp);
99
100	return SUCCESS;
101}
102
103static PHP_INI_MH(OnUpdateSessionTags)
104{
105	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
106}
107
108static PHP_INI_MH(OnUpdateOutputTags)
109{
110	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
111}
112
113static int php_ini_on_update_hosts(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
114{
115	HashTable *hosts;
116	char *key;
117	char *tmp;
118	char *lasts = NULL;
119
120	if (type) {
121		hosts = &BG(url_adapt_session_hosts_ht);
122	} else {
123		hosts = &BG(url_adapt_output_hosts_ht);
124	}
125	zend_hash_clean(hosts);
126
127	/* Use user supplied host whitelist */
128	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
129	for (key = php_strtok_r(tmp, ",", &lasts);
130		 key;
131		 key = php_strtok_r(NULL, ",", &lasts)) {
132		size_t keylen;
133		zend_string *tmp_key;
134		char *q;
135
136		for (q = key; *q; q++) {
137			*q = tolower(*q);
138		}
139		keylen = q - key;
140		if (keylen > 0) {
141			tmp_key = zend_string_init(key, keylen, 0);
142			zend_hash_add_empty_element(hosts, tmp_key);
143			zend_string_release_ex(tmp_key, 0);
144		}
145	}
146	efree(tmp);
147
148	return SUCCESS;
149}
150
151static PHP_INI_MH(OnUpdateSessionHosts)
152{
153	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
154}
155
156static PHP_INI_MH(OnUpdateOutputHosts)
157{
158	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
159}
160
161/* FIXME: OnUpdate*Hosts cannot set default to $_SERVER['HTTP_HOST'] at startup */
162PHP_INI_BEGIN()
163	STD_PHP_INI_ENTRY("session.trans_sid_tags", "a=href,area=href,frame=src,form=", PHP_INI_ALL, OnUpdateSessionTags, url_adapt_session_ex, php_basic_globals, basic_globals)
164	STD_PHP_INI_ENTRY("session.trans_sid_hosts", "", PHP_INI_ALL, OnUpdateSessionHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
165	STD_PHP_INI_ENTRY("url_rewriter.tags", "form=", PHP_INI_ALL, OnUpdateOutputTags, url_adapt_session_ex, php_basic_globals, basic_globals)
166	STD_PHP_INI_ENTRY("url_rewriter.hosts", "", PHP_INI_ALL, OnUpdateOutputHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
167PHP_INI_END()
168
169/*!re2c
170any = [\000-\377];
171N = (any\[<]);
172alpha = [a-zA-Z];
173alphanamespace = [a-zA-Z:];
174alphadash = ([a-zA-Z] | "-");
175*/
176
177#define YYFILL(n) goto done
178#define YYCTYPE unsigned char
179#define YYCURSOR p
180#define YYLIMIT q
181#define YYMARKER r
182
183static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
184{
185	php_url *url_parts;
186
187	smart_str_0(url); /* FIXME: Bug #70480 php_url_parse_ex() crashes by processing chars exceed len */
188	url_parts = php_url_parse_ex(ZSTR_VAL(url->s), ZSTR_LEN(url->s));
189
190	/* Ignore malformed URLs */
191	if (!url_parts) {
192		smart_str_append_smart_str(dest, url);
193		return;
194	}
195
196	/* Don't modify URLs of the format "#mark" */
197	if (url_parts->fragment && '#' == ZSTR_VAL(url->s)[0]) {
198		smart_str_append_smart_str(dest, url);
199		php_url_free(url_parts);
200		return;
201	}
202
203	/* Check protocol. Only http/https is allowed. */
204	if (url_parts->scheme
205		&& !zend_string_equals_literal_ci(url_parts->scheme, "http")
206		&& !zend_string_equals_literal_ci(url_parts->scheme, "https")) {
207		smart_str_append_smart_str(dest, url);
208		php_url_free(url_parts);
209		return;
210	}
211
212	/* Check host whitelist. If it's not listed, do nothing. */
213	if (url_parts->host) {
214		zend_string *tmp = zend_string_tolower(url_parts->host);
215		if (!zend_hash_exists(&BG(url_adapt_session_hosts_ht), tmp)) {
216			zend_string_release_ex(tmp, 0);
217			smart_str_append_smart_str(dest, url);
218			php_url_free(url_parts);
219			return;
220		}
221		zend_string_release_ex(tmp, 0);
222	}
223
224	/*
225	 * When URL does not have path and query string add "/?".
226	 * i.e. If URL is only "?foo=bar", should not add "/?".
227	 */
228	if (!url_parts->path && !url_parts->query && !url_parts->fragment) {
229		/* URL is http://php.net or like */
230		smart_str_append_smart_str(dest, url);
231		smart_str_appendc(dest, '/');
232		smart_str_appendc(dest, '?');
233		smart_str_append_smart_str(dest, url_app);
234		php_url_free(url_parts);
235		return;
236	}
237
238	if (url_parts->scheme) {
239		smart_str_appends(dest, ZSTR_VAL(url_parts->scheme));
240		smart_str_appends(dest, "://");
241	} else if (*(ZSTR_VAL(url->s)) == '/' && *(ZSTR_VAL(url->s)+1) == '/') {
242		smart_str_appends(dest, "//");
243	}
244	if (url_parts->user) {
245		smart_str_appends(dest, ZSTR_VAL(url_parts->user));
246		if (url_parts->pass) {
247			smart_str_appends(dest, ZSTR_VAL(url_parts->pass));
248			smart_str_appendc(dest, ':');
249		}
250		smart_str_appendc(dest, '@');
251	}
252	if (url_parts->host) {
253		smart_str_appends(dest, ZSTR_VAL(url_parts->host));
254	}
255	if (url_parts->port) {
256		smart_str_appendc(dest, ':');
257		smart_str_append_unsigned(dest, (long)url_parts->port);
258	}
259	if (url_parts->path) {
260		smart_str_appends(dest, ZSTR_VAL(url_parts->path));
261	}
262	smart_str_appendc(dest, '?');
263	if (url_parts->query) {
264		smart_str_appends(dest, ZSTR_VAL(url_parts->query));
265		smart_str_appends(dest, separator);
266		smart_str_append_smart_str(dest, url_app);
267	} else {
268		smart_str_append_smart_str(dest, url_app);
269	}
270	if (url_parts->fragment) {
271		smart_str_appendc(dest, '#');
272		smart_str_appends(dest, ZSTR_VAL(url_parts->fragment));
273	}
274	php_url_free(url_parts);
275}
276
277enum {
278	TAG_NORMAL = 0,
279	TAG_FORM
280};
281
282enum {
283	ATTR_NORMAL = 0,
284	ATTR_ACTION
285};
286
287#undef YYFILL
288#undef YYCTYPE
289#undef YYCURSOR
290#undef YYLIMIT
291#undef YYMARKER
292
293static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type)
294{
295	char f = 0;
296
297	/* arg.s is string WITHOUT NUL.
298	   To avoid partial match, NUL is added here */
299	ZSTR_VAL(ctx->arg.s)[ZSTR_LEN(ctx->arg.s)] = '\0';
300	if (!strcasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data)) {
301		f = 1;
302	}
303
304	if (quotes) {
305		smart_str_appendc(&ctx->result, type);
306	}
307	if (f) {
308		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
309	} else {
310		smart_str_append_smart_str(&ctx->result, &ctx->val);
311	}
312	if (quotes) {
313		smart_str_appendc(&ctx->result, type);
314	}
315}
316
317enum {
318	STATE_PLAIN = 0,
319	STATE_TAG,
320	STATE_NEXT_ARG,
321	STATE_ARG,
322	STATE_BEFORE_VAL,
323	STATE_VAL
324};
325
326#define YYFILL(n) goto stop
327#define YYCTYPE unsigned char
328#define YYCURSOR xp
329#define YYLIMIT end
330#define YYMARKER q
331#define STATE ctx->state
332
333#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR
334#define STD_ARGS ctx, start, xp
335
336#if SCANNER_DEBUG
337#define scdebug(x) printf x
338#else
339#define scdebug(x)
340#endif
341
342static inline void passthru(STD_PARA)
343{
344	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
345	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
346}
347
348
349static int check_http_host(char *target)
350{
351	zval *host, *tmp;
352	zend_string *host_tmp;
353	char *colon;
354
355	if ((tmp = zend_hash_find(&EG(symbol_table), ZSTR_KNOWN(ZEND_STR_AUTOGLOBAL_SERVER))) &&
356		Z_TYPE_P(tmp) == IS_ARRAY &&
357		(host = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("HTTP_HOST"))) &&
358		Z_TYPE_P(host) == IS_STRING) {
359		host_tmp = zend_string_init(Z_STRVAL_P(host), Z_STRLEN_P(host), 0);
360		/* HTTP_HOST could be 'localhost:8888' etc. */
361		colon = strchr(ZSTR_VAL(host_tmp), ':');
362		if (colon) {
363			ZSTR_LEN(host_tmp) = colon - ZSTR_VAL(host_tmp);
364			ZSTR_VAL(host_tmp)[ZSTR_LEN(host_tmp)] = '\0';
365		}
366		if (!strcasecmp(ZSTR_VAL(host_tmp), target)) {
367			zend_string_release_ex(host_tmp, 0);
368			return SUCCESS;
369		}
370		zend_string_release_ex(host_tmp, 0);
371	}
372	return FAILURE;
373}
374
375static int check_host_whitelist(url_adapt_state_ex_t *ctx)
376{
377	php_url *url_parts = NULL;
378	HashTable *allowed_hosts = ctx->type ? &BG(url_adapt_session_hosts_ht) : &BG(url_adapt_output_hosts_ht);
379
380	ZEND_ASSERT(ctx->tag_type == TAG_FORM);
381
382	if (ctx->attr_val.s && ZSTR_LEN(ctx->attr_val.s)) {
383		url_parts = php_url_parse_ex(ZSTR_VAL(ctx->attr_val.s), ZSTR_LEN(ctx->attr_val.s));
384	} else {
385		return SUCCESS; /* empty URL is valid */
386	}
387
388	if (!url_parts) {
389		return FAILURE;
390	}
391	if (url_parts->scheme) {
392		/* Only http/https should be handled.
393		   A bit hacky check this here, but saves a URL parse. */
394		if (!zend_string_equals_literal_ci(url_parts->scheme, "http") &&
395			!zend_string_equals_literal_ci(url_parts->scheme, "https")) {
396		php_url_free(url_parts);
397		return FAILURE;
398		}
399	}
400	if (!url_parts->host) {
401		php_url_free(url_parts);
402		return SUCCESS;
403	}
404	if (!zend_hash_num_elements(allowed_hosts) &&
405		check_http_host(ZSTR_VAL(url_parts->host)) == SUCCESS) {
406		php_url_free(url_parts);
407		return SUCCESS;
408	}
409	if (!zend_hash_find(allowed_hosts, url_parts->host)) {
410		php_url_free(url_parts);
411		return FAILURE;
412	}
413	php_url_free(url_parts);
414	return SUCCESS;
415}
416
417/*
418 * This function appends a hidden input field after a <form>.
419 */
420static void handle_form(STD_PARA)
421{
422	int doit = 0;
423
424	if (ZSTR_LEN(ctx->form_app.s) > 0) {
425		switch (ZSTR_LEN(ctx->tag.s)) {
426			case sizeof("form") - 1:
427				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))
428					&& check_host_whitelist(ctx) == SUCCESS) {
429					doit = 1;
430				}
431				break;
432		}
433	}
434
435	if (doit) {
436		smart_str_append_smart_str(&ctx->result, &ctx->form_app);
437	}
438}
439
440/*
441 *  HANDLE_TAG copies the HTML Tag and checks whether we
442 *  have that tag in our table. If we might modify it,
443 *  we continue to scan the tag, otherwise we simply copy the complete
444 *  HTML stuff to the result buffer.
445 */
446
447static inline void handle_tag(STD_PARA)
448{
449	int ok = 0;
450	unsigned int i;
451
452	if (ctx->tag.s) {
453		ZSTR_LEN(ctx->tag.s) = 0;
454	}
455	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
456	for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++)
457		ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]);
458    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
459	if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL) {
460		ok = 1;
461		if (ZSTR_LEN(ctx->tag.s) == sizeof("form")-1
462			&& !strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))) {
463			ctx->tag_type = TAG_FORM;
464		} else {
465			ctx->tag_type = TAG_NORMAL;
466		}
467	}
468	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
469}
470
471static inline void handle_arg(STD_PARA)
472{
473	if (ctx->arg.s) {
474		ZSTR_LEN(ctx->arg.s) = 0;
475	}
476	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
477	if (ctx->tag_type == TAG_FORM &&
478		strncasecmp(ZSTR_VAL(ctx->arg.s), "action", ZSTR_LEN(ctx->arg.s)) == 0) {
479		ctx->attr_type = ATTR_ACTION;
480	} else {
481		ctx->attr_type = ATTR_NORMAL;
482	}
483}
484
485static inline void handle_val(STD_PARA, char quotes, char type)
486{
487	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
488	if (ctx->tag_type == TAG_FORM && ctx->attr_type == ATTR_ACTION) {
489		smart_str_setl(&ctx->attr_val, start + quotes, YYCURSOR - start - quotes * 2);
490	}
491	tag_arg(ctx, quotes, type);
492}
493
494static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
495{
496	char *end, *q;
497	char *xp;
498	char *start;
499	size_t rest;
500
501	smart_str_appendl(&ctx->buf, newdata, newlen);
502
503	YYCURSOR = ZSTR_VAL(ctx->buf.s);
504	YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s);
505
506	switch (STATE) {
507		case STATE_PLAIN: goto state_plain;
508		case STATE_TAG: goto state_tag;
509		case STATE_NEXT_ARG: goto state_next_arg;
510		case STATE_ARG: goto state_arg;
511		case STATE_BEFORE_VAL: goto state_before_val;
512		case STATE_VAL: goto state_val;
513	}
514
515
516state_plain_begin:
517	STATE = STATE_PLAIN;
518
519state_plain:
520	start = YYCURSOR;
521/*!re2c
522  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
523  N+ 				{ passthru(STD_ARGS); goto state_plain; }
524*/
525
526state_tag:
527	start = YYCURSOR;
528/*!re2c
529  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
530  any		{ passthru(STD_ARGS); goto state_plain_begin; }
531*/
532
533state_next_arg_begin:
534	STATE = STATE_NEXT_ARG;
535
536state_next_arg:
537	start = YYCURSOR;
538/*!re2c
539  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
540  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
541  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
542  any		{ passthru(STD_ARGS); goto state_plain_begin; }
543*/
544
545state_arg:
546	start = YYCURSOR;
547/*!re2c
548  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
549  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
550*/
551
552state_before_val:
553	start = YYCURSOR;
554/*!re2c
555  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
556  any				{ --YYCURSOR; goto state_next_arg_begin; }
557*/
558
559
560state_val:
561	start = YYCURSOR;
562/*!re2c
563  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
564  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
565  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
566  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
567*/
568
569stop:
570	if (YYLIMIT < start) {
571		/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
572		rest = 0;
573	} else {
574		rest = YYLIMIT - start;
575		scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
576	}
577
578	if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest);
579	ZSTR_LEN(ctx->buf.s) = rest;
580}
581
582
583PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, int encode)
584{
585	char *result;
586	smart_str surl = {0};
587	smart_str buf = {0};
588	smart_str url_app = {0};
589	zend_string *encoded;
590
591	smart_str_appendl(&surl, url, urllen);
592
593	if (encode) {
594		encoded = php_raw_url_encode(name, strlen(name));
595		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
596		zend_string_free(encoded);
597	} else {
598		smart_str_appends(&url_app, name);
599	}
600	smart_str_appendc(&url_app, '=');
601	if (encode) {
602		encoded = php_raw_url_encode(value, strlen(value));
603		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
604		zend_string_free(encoded);
605	} else {
606		smart_str_appends(&url_app, value);
607	}
608
609	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
610
611	smart_str_0(&buf);
612	if (newlen) *newlen = ZSTR_LEN(buf.s);
613	result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s));
614
615	smart_str_free(&url_app);
616	smart_str_free(&buf);
617
618	return result;
619}
620
621
622static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, bool do_flush, url_adapt_state_ex_t *ctx)
623{
624	char *retval;
625
626	xx_mainloop(ctx, src, srclen);
627
628	if (!ctx->result.s) {
629		smart_str_appendl(&ctx->result, "", 0);
630		*newlen = 0;
631	} else {
632		*newlen = ZSTR_LEN(ctx->result.s);
633	}
634	smart_str_0(&ctx->result);
635	if (do_flush) {
636		smart_str_append(&ctx->result, ctx->buf.s);
637		*newlen += ZSTR_LEN(ctx->buf.s);
638		smart_str_free(&ctx->buf);
639		smart_str_free(&ctx->val);
640		smart_str_free(&ctx->attr_val);
641	}
642	retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
643	smart_str_free(&ctx->result);
644	return retval;
645}
646
647static int php_url_scanner_ex_activate(int type)
648{
649	url_adapt_state_ex_t *ctx;
650
651	if (type) {
652		ctx = &BG(url_adapt_session_ex);
653	} else {
654		ctx = &BG(url_adapt_output_ex);
655	}
656
657	memset(ctx, 0, XtOffsetOf(url_adapt_state_ex_t, tags));
658
659	return SUCCESS;
660}
661
662static int php_url_scanner_ex_deactivate(int type)
663{
664	url_adapt_state_ex_t *ctx;
665
666	if (type) {
667		ctx = &BG(url_adapt_session_ex);
668	} else {
669		ctx = &BG(url_adapt_output_ex);
670	}
671
672	smart_str_free(&ctx->result);
673	smart_str_free(&ctx->buf);
674	smart_str_free(&ctx->tag);
675	smart_str_free(&ctx->arg);
676	smart_str_free(&ctx->attr_val);
677
678	return SUCCESS;
679}
680
681static inline void php_url_scanner_session_handler_impl(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode, int type)
682{
683	size_t len;
684	url_adapt_state_ex_t *url_state;
685
686	if (type) {
687		url_state = &BG(url_adapt_session_ex);
688	} else {
689		url_state = &BG(url_adapt_output_ex);
690	}
691
692	if (ZSTR_LEN(url_state->url_app.s) != 0) {
693		*handled_output = url_adapt_ext(output, output_len, &len, (bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0), url_state);
694		if (sizeof(unsigned int) < sizeof(size_t)) {
695			if (len > UINT_MAX)
696				len = UINT_MAX;
697		}
698		*handled_output_len = len;
699	} else if (ZSTR_LEN(url_state->url_app.s) == 0) {
700		url_adapt_state_ex_t *ctx = url_state;
701		if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) {
702			smart_str_append(&ctx->result, ctx->buf.s);
703			smart_str_appendl(&ctx->result, output, output_len);
704
705			*handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
706			*handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len;
707
708			smart_str_free(&ctx->buf);
709			smart_str_free(&ctx->result);
710		} else {
711			*handled_output = estrndup(output, *handled_output_len = output_len);
712		}
713	} else {
714		*handled_output = NULL;
715	}
716}
717
718static void php_url_scanner_session_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
719{
720	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 1);
721}
722
723static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
724{
725	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 0);
726}
727
728static inline int php_url_scanner_add_var_impl(const char *name, size_t name_len, const char *value, size_t value_len, int encode, int type)
729{
730	smart_str sname = {0};
731	smart_str svalue = {0};
732	smart_str hname = {0};
733	smart_str hvalue = {0};
734	zend_string *encoded;
735	url_adapt_state_ex_t *url_state;
736	php_output_handler_func_t handler;
737
738	if (type) {
739		url_state = &BG(url_adapt_session_ex);
740		handler = php_url_scanner_session_handler;
741	} else {
742		url_state = &BG(url_adapt_output_ex);
743		handler = php_url_scanner_output_handler;
744	}
745
746	if (!url_state->active) {
747		php_url_scanner_ex_activate(type);
748		php_output_start_internal(ZEND_STRL("URL-Rewriter"), handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS);
749		url_state->active = 1;
750	}
751
752	if (url_state->url_app.s && ZSTR_LEN(url_state->url_app.s) != 0) {
753		smart_str_appends(&url_state->url_app, PG(arg_separator).output);
754	}
755
756	if (encode) {
757		encoded = php_raw_url_encode(name, name_len);
758		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
759		encoded = php_raw_url_encode(value, value_len);
760		smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
761		encoded = php_escape_html_entities_ex((const unsigned char *) name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
762		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
763		encoded = php_escape_html_entities_ex((const unsigned char *) value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
764		smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
765	} else {
766		smart_str_appendl(&sname, name, name_len);
767		smart_str_appendl(&svalue, value, value_len);
768		smart_str_appendl(&hname, name, name_len);
769		smart_str_appendl(&hvalue, value, value_len);
770	}
771
772	smart_str_append_smart_str(&url_state->url_app, &sname);
773	smart_str_appendc(&url_state->url_app, '=');
774	smart_str_append_smart_str(&url_state->url_app, &svalue);
775
776	smart_str_appends(&url_state->form_app, "<input type=\"hidden\" name=\"");
777	smart_str_append_smart_str(&url_state->form_app, &hname);
778	smart_str_appends(&url_state->form_app, "\" value=\"");
779	smart_str_append_smart_str(&url_state->form_app, &hvalue);
780	smart_str_appends(&url_state->form_app, "\" />");
781
782	smart_str_free(&sname);
783	smart_str_free(&svalue);
784	smart_str_free(&hname);
785	smart_str_free(&hvalue);
786
787	return SUCCESS;
788}
789
790
791PHPAPI int php_url_scanner_add_session_var(const char *name, size_t name_len, const char *value, size_t value_len, int encode)
792{
793	return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 1);
794}
795
796
797PHPAPI int php_url_scanner_add_var(const char *name, size_t name_len, const char *value, size_t value_len, int encode)
798{
799	return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 0);
800}
801
802
803static inline void php_url_scanner_reset_vars_impl(int type) {
804	url_adapt_state_ex_t *url_state;
805
806	if (type) {
807		url_state = &BG(url_adapt_session_ex);
808	} else {
809		url_state = &BG(url_adapt_output_ex);
810	}
811
812	if (url_state->form_app.s) {
813		ZSTR_LEN(url_state->form_app.s) = 0;
814	}
815	if (url_state->url_app.s) {
816		ZSTR_LEN(url_state->url_app.s) = 0;
817	}
818}
819
820
821PHPAPI int php_url_scanner_reset_session_vars(void)
822{
823	php_url_scanner_reset_vars_impl(1);
824	return SUCCESS;
825}
826
827
828PHPAPI int php_url_scanner_reset_vars(void)
829{
830	php_url_scanner_reset_vars_impl(0);
831	return SUCCESS;
832}
833
834
835static inline int php_url_scanner_reset_var_impl(zend_string *name, int encode, int type)
836{
837	char *start, *end, *limit;
838	size_t separator_len;
839	smart_str sname = {0};
840	smart_str hname = {0};
841	smart_str url_app = {0};
842	smart_str form_app = {0};
843	zend_string *encoded;
844	int ret = SUCCESS;
845	bool sep_removed = 0;
846	url_adapt_state_ex_t *url_state;
847
848	if (type) {
849		url_state = &BG(url_adapt_session_ex);
850	} else {
851		url_state = &BG(url_adapt_output_ex);
852	}
853
854	/* Short circuit check. Only check url_app. */
855	if (!url_state->url_app.s || !ZSTR_LEN(url_state->url_app.s)) {
856		return SUCCESS;
857	}
858
859	if (encode) {
860		encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name));
861		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
862		zend_string_free(encoded);
863		encoded = php_escape_html_entities_ex((const unsigned char *) ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), /* double_encode */ 0, /* quiet */ 1);
864		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
865		zend_string_free(encoded);
866	} else {
867		smart_str_appendl(&sname, ZSTR_VAL(name), ZSTR_LEN(name));
868		smart_str_appendl(&hname, ZSTR_VAL(name), ZSTR_LEN(name));
869	}
870	smart_str_0(&sname);
871	smart_str_0(&hname);
872
873	smart_str_append_smart_str(&url_app, &sname);
874	smart_str_appendc(&url_app, '=');
875	smart_str_0(&url_app);
876
877	smart_str_appends(&form_app, "<input type=\"hidden\" name=\"");
878	smart_str_append_smart_str(&form_app, &hname);
879	smart_str_appends(&form_app, "\" value=\"");
880	smart_str_0(&form_app);
881
882	/* Short circuit check. Only check url_app. */
883	start = (char *) php_memnstr(ZSTR_VAL(url_state->url_app.s),
884								 ZSTR_VAL(url_app.s), ZSTR_LEN(url_app.s),
885								 ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s));
886	if (!start) {
887		ret = FAILURE;
888		goto finish;
889	}
890
891	/* Get end of url var */
892	limit = ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s);
893	end = start + ZSTR_LEN(url_app.s);
894	separator_len = strlen(PG(arg_separator).output);
895	while (end < limit) {
896		if (!memcmp(end, PG(arg_separator).output, separator_len)) {
897			end += separator_len;
898			sep_removed = 1;
899			break;
900		}
901		end++;
902	}
903	/* Remove all when this is the only rewrite var */
904	if (ZSTR_LEN(url_state->url_app.s) == end - start) {
905		php_url_scanner_reset_vars_impl(type);
906		goto finish;
907	}
908	/* Check preceding separator */
909	if (!sep_removed
910		&& (size_t)(start - PG(arg_separator).output) >= separator_len
911		&& !memcmp(start - separator_len, PG(arg_separator).output, separator_len)) {
912		start -= separator_len;
913	}
914	/* Remove partially */
915	memmove(start, end,
916			ZSTR_LEN(url_state->url_app.s) - (end - ZSTR_VAL(url_state->url_app.s)));
917	ZSTR_LEN(url_state->url_app.s) -= end - start;
918	ZSTR_VAL(url_state->url_app.s)[ZSTR_LEN(url_state->url_app.s)] = '\0';
919
920	/* Remove form var */
921	start = (char *) php_memnstr(ZSTR_VAL(url_state->form_app.s),
922						ZSTR_VAL(form_app.s), ZSTR_LEN(form_app.s),
923						ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s));
924	if (!start) {
925		/* Should not happen */
926		ret = FAILURE;
927		php_url_scanner_reset_vars_impl(type);
928		goto finish;
929	}
930	/* Get end of form var */
931	limit = ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s);
932	end = start + ZSTR_LEN(form_app.s);
933	while (end < limit) {
934		if (*end == '>') {
935			end += 1;
936			break;
937		}
938		end++;
939	}
940	/* Remove partially */
941	memmove(start, end,
942			ZSTR_LEN(url_state->form_app.s) - (end - ZSTR_VAL(url_state->form_app.s)));
943	ZSTR_LEN(url_state->form_app.s) -= end - start;
944	ZSTR_VAL(url_state->form_app.s)[ZSTR_LEN(url_state->form_app.s)] = '\0';
945
946finish:
947	smart_str_free(&url_app);
948	smart_str_free(&form_app);
949	smart_str_free(&sname);
950	smart_str_free(&hname);
951	return ret;
952}
953
954
955PHPAPI int php_url_scanner_reset_session_var(zend_string *name, int encode)
956{
957	return php_url_scanner_reset_var_impl(name, encode, 1);
958}
959
960
961PHPAPI int php_url_scanner_reset_var(zend_string *name, int encode)
962{
963	return php_url_scanner_reset_var_impl(name, encode, 0);
964}
965
966
967PHP_MINIT_FUNCTION(url_scanner)
968{
969	REGISTER_INI_ENTRIES();
970	return SUCCESS;
971}
972
973PHP_MSHUTDOWN_FUNCTION(url_scanner)
974{
975	UNREGISTER_INI_ENTRIES();
976
977	return SUCCESS;
978}
979
980PHP_RINIT_FUNCTION(url_scanner)
981{
982	BG(url_adapt_session_ex).active    = 0;
983	BG(url_adapt_session_ex).tag_type  = 0;
984	BG(url_adapt_session_ex).attr_type = 0;
985	BG(url_adapt_output_ex).active    = 0;
986	BG(url_adapt_output_ex).tag_type  = 0;
987	BG(url_adapt_output_ex).attr_type = 0;
988	return SUCCESS;
989}
990
991PHP_RSHUTDOWN_FUNCTION(url_scanner)
992{
993	if (BG(url_adapt_session_ex).active) {
994		php_url_scanner_ex_deactivate(1);
995		BG(url_adapt_session_ex).active    = 0;
996		BG(url_adapt_session_ex).tag_type  = 0;
997		BG(url_adapt_session_ex).attr_type = 0;
998	}
999	smart_str_free(&BG(url_adapt_session_ex).form_app);
1000	smart_str_free(&BG(url_adapt_session_ex).url_app);
1001
1002	if (BG(url_adapt_output_ex).active) {
1003		php_url_scanner_ex_deactivate(0);
1004		BG(url_adapt_output_ex).active    = 0;
1005		BG(url_adapt_output_ex).tag_type  = 0;
1006		BG(url_adapt_output_ex).attr_type = 0;
1007	}
1008	smart_str_free(&BG(url_adapt_output_ex).form_app);
1009	smart_str_free(&BG(url_adapt_output_ex).url_app);
1010
1011	return SUCCESS;
1012}
1013