xref: /PHP-7.4/ext/standard/url_scanner_ex.re (revision ab4f725d)
1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) The PHP Group                                          |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  |         Yasuo Ohgaki <yohgaki@ohgaki.net>                            |
17  +----------------------------------------------------------------------+
18*/
19
20#include "php.h"
21
22#ifdef HAVE_UNISTD_H
23#include <unistd.h>
24#endif
25
26#include <limits.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "SAPI.h"
32#include "php_ini.h"
33#include "php_globals.h"
34#include "php_string.h"
35#define STATE_TAG SOME_OTHER_STATE_TAG
36#include "basic_functions.h"
37#include "url.h"
38#include "html.h"
39#undef STATE_TAG
40
41#define url_scanner url_scanner_ex
42
43#include "zend_smart_str.h"
44
45static void tag_dtor(zval *zv)
46{
47	free(Z_PTR_P(zv));
48}
49
50static int php_ini_on_update_tags(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
51{
52	url_adapt_state_ex_t *ctx;
53	char *key;
54	char *tmp;
55	char *lasts = NULL;
56
57	if (type) {
58		ctx = &BG(url_adapt_session_ex);
59	} else {
60		ctx = &BG(url_adapt_output_ex);
61	}
62
63	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
64
65	if (ctx->tags)
66		zend_hash_destroy(ctx->tags);
67	else {
68		ctx->tags = malloc(sizeof(HashTable));
69		if (!ctx->tags) {
70			efree(tmp);
71			return FAILURE;
72		}
73	}
74
75	zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
76
77	for (key = php_strtok_r(tmp, ",", &lasts);
78		 key;
79		 key = php_strtok_r(NULL, ",", &lasts)) {
80		char *val;
81
82		val = strchr(key, '=');
83		if (val) {
84			char *q;
85			size_t keylen;
86			zend_string *str;
87
88			*val++ = '\0';
89			for (q = key; *q; q++) {
90				*q = tolower(*q);
91			}
92			keylen = q - key;
93			str = zend_string_init(key, keylen, 1);
94			GC_MAKE_PERSISTENT_LOCAL(str);
95			zend_hash_add_mem(ctx->tags, str, val, strlen(val)+1);
96			zend_string_release_ex(str, 1);
97		}
98	}
99
100	efree(tmp);
101
102	return SUCCESS;
103}
104
105static PHP_INI_MH(OnUpdateSessionTags)
106{
107	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
108}
109
110static PHP_INI_MH(OnUpdateOutputTags)
111{
112	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
113}
114
115static int php_ini_on_update_hosts(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
116{
117	HashTable *hosts;
118	char *key;
119	char *tmp;
120	char *lasts = NULL;
121
122	if (type) {
123		hosts = &BG(url_adapt_session_hosts_ht);
124	} else {
125		hosts = &BG(url_adapt_output_hosts_ht);
126	}
127	zend_hash_clean(hosts);
128
129	/* Use user supplied host whitelist */
130	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
131	for (key = php_strtok_r(tmp, ",", &lasts);
132		 key;
133		 key = php_strtok_r(NULL, ",", &lasts)) {
134		size_t keylen;
135		zend_string *tmp_key;
136		char *q;
137
138		for (q = key; *q; q++) {
139			*q = tolower(*q);
140		}
141		keylen = q - key;
142		if (keylen > 0) {
143			tmp_key = zend_string_init(key, keylen, 0);
144			zend_hash_add_empty_element(hosts, tmp_key);
145			zend_string_release_ex(tmp_key, 0);
146		}
147	}
148	efree(tmp);
149
150	return SUCCESS;
151}
152
153static PHP_INI_MH(OnUpdateSessionHosts)
154{
155	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
156}
157
158static PHP_INI_MH(OnUpdateOutputHosts)
159{
160	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
161}
162
163/* FIXME: OnUpdate*Hosts cannot set default to $_SERVER['HTTP_HOST'] at startup */
164PHP_INI_BEGIN()
165	STD_PHP_INI_ENTRY("session.trans_sid_tags", "a=href,area=href,frame=src,form=", PHP_INI_ALL, OnUpdateSessionTags, url_adapt_session_ex, php_basic_globals, basic_globals)
166	STD_PHP_INI_ENTRY("session.trans_sid_hosts", "", PHP_INI_ALL, OnUpdateSessionHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
167	STD_PHP_INI_ENTRY("url_rewriter.tags", "form=", PHP_INI_ALL, OnUpdateOutputTags, url_adapt_session_ex, php_basic_globals, basic_globals)
168	STD_PHP_INI_ENTRY("url_rewriter.hosts", "", PHP_INI_ALL, OnUpdateOutputHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
169PHP_INI_END()
170
171/*!re2c
172any = [\000-\377];
173N = (any\[<]);
174alpha = [a-zA-Z];
175alphanamespace = [a-zA-Z:];
176alphadash = ([a-zA-Z] | "-");
177*/
178
179#define YYFILL(n) goto done
180#define YYCTYPE unsigned char
181#define YYCURSOR p
182#define YYLIMIT q
183#define YYMARKER r
184
185static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
186{
187	php_url *url_parts;
188
189	smart_str_0(url); /* FIXME: Bug #70480 php_url_parse_ex() crashes by processing chars exceed len */
190	url_parts = php_url_parse_ex(ZSTR_VAL(url->s), ZSTR_LEN(url->s));
191
192	/* Ignore malformed URLs */
193	if (!url_parts) {
194		smart_str_append_smart_str(dest, url);
195		return;
196	}
197
198	/* Don't modify URLs of the format "#mark" */
199	if (url_parts->fragment && '#' == ZSTR_VAL(url->s)[0]) {
200		smart_str_append_smart_str(dest, url);
201		php_url_free(url_parts);
202		return;
203	}
204
205	/* Check protocol. Only http/https is allowed. */
206	if (url_parts->scheme
207		&& !zend_string_equals_literal_ci(url_parts->scheme, "http")
208		&& !zend_string_equals_literal_ci(url_parts->scheme, "https")) {
209		smart_str_append_smart_str(dest, url);
210		php_url_free(url_parts);
211		return;
212	}
213
214	/* Check host whitelist. If it's not listed, do nothing. */
215	if (url_parts->host) {
216		zend_string *tmp = zend_string_tolower(url_parts->host);
217		if (!zend_hash_exists(&BG(url_adapt_session_hosts_ht), tmp)) {
218			zend_string_release_ex(tmp, 0);
219			smart_str_append_smart_str(dest, url);
220			php_url_free(url_parts);
221			return;
222		}
223		zend_string_release_ex(tmp, 0);
224	}
225
226	/*
227	 * When URL does not have path and query string add "/?".
228	 * i.e. If URL is only "?foo=bar", should not add "/?".
229	 */
230	if (!url_parts->path && !url_parts->query && !url_parts->fragment) {
231		/* URL is http://php.net or like */
232		smart_str_append_smart_str(dest, url);
233		smart_str_appendc(dest, '/');
234		smart_str_appendc(dest, '?');
235		smart_str_append_smart_str(dest, url_app);
236		php_url_free(url_parts);
237		return;
238	}
239
240	if (url_parts->scheme) {
241		smart_str_appends(dest, ZSTR_VAL(url_parts->scheme));
242		smart_str_appends(dest, "://");
243	} else if (*(ZSTR_VAL(url->s)) == '/' && *(ZSTR_VAL(url->s)+1) == '/') {
244		smart_str_appends(dest, "//");
245	}
246	if (url_parts->user) {
247		smart_str_appends(dest, ZSTR_VAL(url_parts->user));
248		if (url_parts->pass) {
249			smart_str_appends(dest, ZSTR_VAL(url_parts->pass));
250			smart_str_appendc(dest, ':');
251		}
252		smart_str_appendc(dest, '@');
253	}
254	if (url_parts->host) {
255		smart_str_appends(dest, ZSTR_VAL(url_parts->host));
256	}
257	if (url_parts->port) {
258		smart_str_appendc(dest, ':');
259		smart_str_append_unsigned(dest, (long)url_parts->port);
260	}
261	if (url_parts->path) {
262		smart_str_appends(dest, ZSTR_VAL(url_parts->path));
263	}
264	smart_str_appendc(dest, '?');
265	if (url_parts->query) {
266		smart_str_appends(dest, ZSTR_VAL(url_parts->query));
267		smart_str_appends(dest, separator);
268		smart_str_append_smart_str(dest, url_app);
269	} else {
270		smart_str_append_smart_str(dest, url_app);
271	}
272	if (url_parts->fragment) {
273		smart_str_appendc(dest, '#');
274		smart_str_appends(dest, ZSTR_VAL(url_parts->fragment));
275	}
276	php_url_free(url_parts);
277}
278
279enum {
280	TAG_NORMAL = 0,
281	TAG_FORM
282};
283
284enum {
285	ATTR_NORMAL = 0,
286	ATTR_ACTION
287};
288
289#undef YYFILL
290#undef YYCTYPE
291#undef YYCURSOR
292#undef YYLIMIT
293#undef YYMARKER
294
295static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type)
296{
297	char f = 0;
298
299	/* arg.s is string WITHOUT NUL.
300	   To avoid partial match, NUL is added here */
301	ZSTR_VAL(ctx->arg.s)[ZSTR_LEN(ctx->arg.s)] = '\0';
302	if (!strcasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data)) {
303		f = 1;
304	}
305
306	if (quotes) {
307		smart_str_appendc(&ctx->result, type);
308	}
309	if (f) {
310		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
311	} else {
312		smart_str_append_smart_str(&ctx->result, &ctx->val);
313	}
314	if (quotes) {
315		smart_str_appendc(&ctx->result, type);
316	}
317}
318
319enum {
320	STATE_PLAIN = 0,
321	STATE_TAG,
322	STATE_NEXT_ARG,
323	STATE_ARG,
324	STATE_BEFORE_VAL,
325	STATE_VAL
326};
327
328#define YYFILL(n) goto stop
329#define YYCTYPE unsigned char
330#define YYCURSOR xp
331#define YYLIMIT end
332#define YYMARKER q
333#define STATE ctx->state
334
335#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR
336#define STD_ARGS ctx, start, xp
337
338#if SCANNER_DEBUG
339#define scdebug(x) printf x
340#else
341#define scdebug(x)
342#endif
343
344static inline void passthru(STD_PARA)
345{
346	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
347	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
348}
349
350
351static int check_http_host(char *target)
352{
353	zval *host, *tmp;
354	zend_string *host_tmp;
355	char *colon;
356
357	if ((tmp  = zend_hash_str_find(&EG(symbol_table), ZEND_STRL("_SERVER"))) &&
358		Z_TYPE_P(tmp) == IS_ARRAY &&
359		(host = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("HTTP_HOST"))) &&
360		Z_TYPE_P(host) == IS_STRING) {
361		host_tmp = zend_string_init(Z_STRVAL_P(host), Z_STRLEN_P(host), 0);
362		/* HTTP_HOST could be 'localhost:8888' etc. */
363		colon = strchr(ZSTR_VAL(host_tmp), ':');
364		if (colon) {
365			ZSTR_LEN(host_tmp) = colon - ZSTR_VAL(host_tmp);
366			ZSTR_VAL(host_tmp)[ZSTR_LEN(host_tmp)] = '\0';
367		}
368		if (!strcasecmp(ZSTR_VAL(host_tmp), target)) {
369			zend_string_release_ex(host_tmp, 0);
370			return SUCCESS;
371		}
372		zend_string_release_ex(host_tmp, 0);
373	}
374	return FAILURE;
375}
376
377static int check_host_whitelist(url_adapt_state_ex_t *ctx)
378{
379	php_url *url_parts = NULL;
380	HashTable *allowed_hosts = ctx->type ? &BG(url_adapt_session_hosts_ht) : &BG(url_adapt_output_hosts_ht);
381
382	ZEND_ASSERT(ctx->tag_type == TAG_FORM);
383
384	if (ctx->attr_val.s && ZSTR_LEN(ctx->attr_val.s)) {
385		url_parts = php_url_parse_ex(ZSTR_VAL(ctx->attr_val.s), ZSTR_LEN(ctx->attr_val.s));
386	} else {
387		return SUCCESS; /* empty URL is valid */
388	}
389
390	if (!url_parts) {
391		return FAILURE;
392	}
393	if (url_parts->scheme) {
394		/* Only http/https should be handled.
395		   A bit hacky check this here, but saves a URL parse. */
396		if (!zend_string_equals_literal_ci(url_parts->scheme, "http") &&
397			!zend_string_equals_literal_ci(url_parts->scheme, "https")) {
398		php_url_free(url_parts);
399		return FAILURE;
400		}
401	}
402	if (!url_parts->host) {
403		php_url_free(url_parts);
404		return SUCCESS;
405	}
406	if (!zend_hash_num_elements(allowed_hosts) &&
407		check_http_host(ZSTR_VAL(url_parts->host)) == SUCCESS) {
408		php_url_free(url_parts);
409		return SUCCESS;
410	}
411	if (!zend_hash_find(allowed_hosts, url_parts->host)) {
412		php_url_free(url_parts);
413		return FAILURE;
414	}
415	php_url_free(url_parts);
416	return SUCCESS;
417}
418
419/*
420 * This function appends a hidden input field after a <form>.
421 */
422static void handle_form(STD_PARA)
423{
424	int doit = 0;
425
426	if (ZSTR_LEN(ctx->form_app.s) > 0) {
427		switch (ZSTR_LEN(ctx->tag.s)) {
428			case sizeof("form") - 1:
429				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))
430					&& check_host_whitelist(ctx) == SUCCESS) {
431					doit = 1;
432				}
433				break;
434		}
435	}
436
437	if (doit) {
438		smart_str_append_smart_str(&ctx->result, &ctx->form_app);
439	}
440}
441
442/*
443 *  HANDLE_TAG copies the HTML Tag and checks whether we
444 *  have that tag in our table. If we might modify it,
445 *  we continue to scan the tag, otherwise we simply copy the complete
446 *  HTML stuff to the result buffer.
447 */
448
449static inline void handle_tag(STD_PARA)
450{
451	int ok = 0;
452	unsigned int i;
453
454	if (ctx->tag.s) {
455		ZSTR_LEN(ctx->tag.s) = 0;
456	}
457	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
458	for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++)
459		ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]);
460    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
461	if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL) {
462		ok = 1;
463		if (ZSTR_LEN(ctx->tag.s) == sizeof("form")-1
464			&& !strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))) {
465			ctx->tag_type = TAG_FORM;
466		} else {
467			ctx->tag_type = TAG_NORMAL;
468		}
469	}
470	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
471}
472
473static inline void handle_arg(STD_PARA)
474{
475	if (ctx->arg.s) {
476		ZSTR_LEN(ctx->arg.s) = 0;
477	}
478	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
479	if (ctx->tag_type == TAG_FORM &&
480		strncasecmp(ZSTR_VAL(ctx->arg.s), "action", ZSTR_LEN(ctx->arg.s)) == 0) {
481		ctx->attr_type = ATTR_ACTION;
482	} else {
483		ctx->attr_type = ATTR_NORMAL;
484	}
485}
486
487static inline void handle_val(STD_PARA, char quotes, char type)
488{
489	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
490	if (ctx->tag_type == TAG_FORM && ctx->attr_type == ATTR_ACTION) {
491		smart_str_setl(&ctx->attr_val, start + quotes, YYCURSOR - start - quotes * 2);
492	}
493	tag_arg(ctx, quotes, type);
494}
495
496static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
497{
498	char *end, *q;
499	char *xp;
500	char *start;
501	size_t rest;
502
503	smart_str_appendl(&ctx->buf, newdata, newlen);
504
505	YYCURSOR = ZSTR_VAL(ctx->buf.s);
506	YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s);
507
508	switch (STATE) {
509		case STATE_PLAIN: goto state_plain;
510		case STATE_TAG: goto state_tag;
511		case STATE_NEXT_ARG: goto state_next_arg;
512		case STATE_ARG: goto state_arg;
513		case STATE_BEFORE_VAL: goto state_before_val;
514		case STATE_VAL: goto state_val;
515	}
516
517
518state_plain_begin:
519	STATE = STATE_PLAIN;
520
521state_plain:
522	start = YYCURSOR;
523/*!re2c
524  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
525  N+ 				{ passthru(STD_ARGS); goto state_plain; }
526*/
527
528state_tag:
529	start = YYCURSOR;
530/*!re2c
531  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
532  any		{ passthru(STD_ARGS); goto state_plain_begin; }
533*/
534
535state_next_arg_begin:
536	STATE = STATE_NEXT_ARG;
537
538state_next_arg:
539	start = YYCURSOR;
540/*!re2c
541  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
542  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
543  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
544  any		{ passthru(STD_ARGS); goto state_plain_begin; }
545*/
546
547state_arg:
548	start = YYCURSOR;
549/*!re2c
550  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
551  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
552*/
553
554state_before_val:
555	start = YYCURSOR;
556/*!re2c
557  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
558  any				{ --YYCURSOR; goto state_next_arg_begin; }
559*/
560
561
562state_val:
563	start = YYCURSOR;
564/*!re2c
565  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
566  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
567  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
568  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
569*/
570
571stop:
572	if (YYLIMIT < start) {
573		/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
574		rest = 0;
575	} else {
576		rest = YYLIMIT - start;
577		scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
578	}
579
580	if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest);
581	ZSTR_LEN(ctx->buf.s) = rest;
582}
583
584
585PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, int encode)
586{
587	char *result;
588	smart_str surl = {0};
589	smart_str buf = {0};
590	smart_str url_app = {0};
591	zend_string *encoded;
592
593	smart_str_appendl(&surl, url, urllen);
594
595	if (encode) {
596		encoded = php_raw_url_encode(name, strlen(name));
597		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
598		zend_string_free(encoded);
599	} else {
600		smart_str_appends(&url_app, name);
601	}
602	smart_str_appendc(&url_app, '=');
603	if (encode) {
604		encoded = php_raw_url_encode(value, strlen(value));
605		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
606		zend_string_free(encoded);
607	} else {
608		smart_str_appends(&url_app, value);
609	}
610
611	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
612
613	smart_str_0(&buf);
614	if (newlen) *newlen = ZSTR_LEN(buf.s);
615	result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s));
616
617	smart_str_free(&url_app);
618	smart_str_free(&buf);
619
620	return result;
621}
622
623
624static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush, url_adapt_state_ex_t *ctx)
625{
626	char *retval;
627
628	xx_mainloop(ctx, src, srclen);
629
630	if (!ctx->result.s) {
631		smart_str_appendl(&ctx->result, "", 0);
632		*newlen = 0;
633	} else {
634		*newlen = ZSTR_LEN(ctx->result.s);
635	}
636	smart_str_0(&ctx->result);
637	if (do_flush) {
638		smart_str_append(&ctx->result, ctx->buf.s);
639		*newlen += ZSTR_LEN(ctx->buf.s);
640		smart_str_free(&ctx->buf);
641		smart_str_free(&ctx->val);
642		smart_str_free(&ctx->attr_val);
643	}
644	retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
645	smart_str_free(&ctx->result);
646	return retval;
647}
648
649static int php_url_scanner_ex_activate(int type)
650{
651	url_adapt_state_ex_t *ctx;
652
653	if (type) {
654		ctx = &BG(url_adapt_session_ex);
655	} else {
656		ctx = &BG(url_adapt_output_ex);
657	}
658
659	memset(ctx, 0, XtOffsetOf(url_adapt_state_ex_t, tags));
660
661	return SUCCESS;
662}
663
664static int php_url_scanner_ex_deactivate(int type)
665{
666	url_adapt_state_ex_t *ctx;
667
668	if (type) {
669		ctx = &BG(url_adapt_session_ex);
670	} else {
671		ctx = &BG(url_adapt_output_ex);
672	}
673
674	smart_str_free(&ctx->result);
675	smart_str_free(&ctx->buf);
676	smart_str_free(&ctx->tag);
677	smart_str_free(&ctx->arg);
678	smart_str_free(&ctx->attr_val);
679
680	return SUCCESS;
681}
682
683static inline void php_url_scanner_session_handler_impl(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode, int type)
684{
685	size_t len;
686	url_adapt_state_ex_t *url_state;
687
688	if (type) {
689		url_state = &BG(url_adapt_session_ex);
690	} else {
691		url_state = &BG(url_adapt_output_ex);
692	}
693
694	if (ZSTR_LEN(url_state->url_app.s) != 0) {
695		*handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0), url_state);
696		if (sizeof(unsigned int) < sizeof(size_t)) {
697			if (len > UINT_MAX)
698				len = UINT_MAX;
699		}
700		*handled_output_len = len;
701	} else if (ZSTR_LEN(url_state->url_app.s) == 0) {
702		url_adapt_state_ex_t *ctx = url_state;
703		if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) {
704			smart_str_append(&ctx->result, ctx->buf.s);
705			smart_str_appendl(&ctx->result, output, output_len);
706
707			*handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
708			*handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len;
709
710			smart_str_free(&ctx->buf);
711			smart_str_free(&ctx->result);
712		} else {
713			*handled_output = estrndup(output, *handled_output_len = output_len);
714		}
715	} else {
716		*handled_output = NULL;
717	}
718}
719
720static void php_url_scanner_session_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
721{
722	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 1);
723}
724
725static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
726{
727	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 0);
728}
729
730static inline int php_url_scanner_add_var_impl(char *name, size_t name_len, char *value, size_t value_len, int encode, int type)
731{
732	smart_str sname = {0};
733	smart_str svalue = {0};
734	smart_str hname = {0};
735	smart_str hvalue = {0};
736	zend_string *encoded;
737	url_adapt_state_ex_t *url_state;
738	php_output_handler_func_t handler;
739
740	if (type) {
741		url_state = &BG(url_adapt_session_ex);
742		handler = php_url_scanner_session_handler;
743	} else {
744		url_state = &BG(url_adapt_output_ex);
745		handler = php_url_scanner_output_handler;
746	}
747
748	if (!url_state->active) {
749		php_url_scanner_ex_activate(type);
750		php_output_start_internal(ZEND_STRL("URL-Rewriter"), handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS);
751		url_state->active = 1;
752	}
753
754	if (url_state->url_app.s && ZSTR_LEN(url_state->url_app.s) != 0) {
755		smart_str_appends(&url_state->url_app, PG(arg_separator).output);
756	}
757
758	if (encode) {
759		encoded = php_raw_url_encode(name, name_len);
760		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
761		encoded = php_raw_url_encode(value, value_len);
762		smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
763		encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
764		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
765		encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
766		smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
767	} else {
768		smart_str_appendl(&sname, name, name_len);
769		smart_str_appendl(&svalue, value, value_len);
770		smart_str_appendl(&hname, name, name_len);
771		smart_str_appendl(&hvalue, value, value_len);
772	}
773
774	smart_str_append_smart_str(&url_state->url_app, &sname);
775	smart_str_appendc(&url_state->url_app, '=');
776	smart_str_append_smart_str(&url_state->url_app, &svalue);
777
778	smart_str_appends(&url_state->form_app, "<input type=\"hidden\" name=\"");
779	smart_str_append_smart_str(&url_state->form_app, &hname);
780	smart_str_appends(&url_state->form_app, "\" value=\"");
781	smart_str_append_smart_str(&url_state->form_app, &hvalue);
782	smart_str_appends(&url_state->form_app, "\" />");
783
784	smart_str_free(&sname);
785	smart_str_free(&svalue);
786	smart_str_free(&hname);
787	smart_str_free(&hvalue);
788
789	return SUCCESS;
790}
791
792
793PHPAPI int php_url_scanner_add_session_var(char *name, size_t name_len, char *value, size_t value_len, int encode)
794{
795	return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 1);
796}
797
798
799PHPAPI int php_url_scanner_add_var(char *name, size_t name_len, char *value, size_t value_len, int encode)
800{
801	return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 0);
802}
803
804
805static inline void php_url_scanner_reset_vars_impl(int type) {
806	url_adapt_state_ex_t *url_state;
807
808	if (type) {
809		url_state = &BG(url_adapt_session_ex);
810	} else {
811		url_state = &BG(url_adapt_output_ex);
812	}
813
814	if (url_state->form_app.s) {
815		ZSTR_LEN(url_state->form_app.s) = 0;
816	}
817	if (url_state->url_app.s) {
818		ZSTR_LEN(url_state->url_app.s) = 0;
819	}
820}
821
822
823PHPAPI int php_url_scanner_reset_session_vars(void)
824{
825	php_url_scanner_reset_vars_impl(1);
826	return SUCCESS;
827}
828
829
830PHPAPI int php_url_scanner_reset_vars(void)
831{
832	php_url_scanner_reset_vars_impl(0);
833	return SUCCESS;
834}
835
836
837static inline int php_url_scanner_reset_var_impl(zend_string *name, int encode, int type)
838{
839	char *start, *end, *limit;
840	size_t separator_len;
841	smart_str sname = {0};
842	smart_str hname = {0};
843	smart_str url_app = {0};
844	smart_str form_app = {0};
845	zend_string *encoded;
846	int ret = SUCCESS;
847	zend_bool sep_removed = 0;
848	url_adapt_state_ex_t *url_state;
849
850	if (type) {
851		url_state = &BG(url_adapt_session_ex);
852	} else {
853		url_state = &BG(url_adapt_output_ex);
854	}
855
856	/* Short circuit check. Only check url_app. */
857	if (!url_state->url_app.s || !ZSTR_LEN(url_state->url_app.s)) {
858		return SUCCESS;
859	}
860
861	if (encode) {
862		encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name));
863		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
864		zend_string_free(encoded);
865		encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
866		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
867		zend_string_free(encoded);
868	} else {
869		smart_str_appendl(&sname, ZSTR_VAL(name), ZSTR_LEN(name));
870		smart_str_appendl(&hname, ZSTR_VAL(name), ZSTR_LEN(name));
871	}
872	smart_str_0(&sname);
873	smart_str_0(&hname);
874
875	smart_str_append_smart_str(&url_app, &sname);
876	smart_str_appendc(&url_app, '=');
877	smart_str_0(&url_app);
878
879	smart_str_appends(&form_app, "<input type=\"hidden\" name=\"");
880	smart_str_append_smart_str(&form_app, &hname);
881	smart_str_appends(&form_app, "\" value=\"");
882	smart_str_0(&form_app);
883
884	/* Short circuit check. Only check url_app. */
885	start = (char *) php_memnstr(ZSTR_VAL(url_state->url_app.s),
886								 ZSTR_VAL(url_app.s), ZSTR_LEN(url_app.s),
887								 ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s));
888	if (!start) {
889		ret = FAILURE;
890		goto finish;
891	}
892
893	/* Get end of url var */
894	limit = ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s);
895	end = start + ZSTR_LEN(url_app.s);
896	separator_len = strlen(PG(arg_separator).output);
897	while (end < limit) {
898		if (!memcmp(end, PG(arg_separator).output, separator_len)) {
899			end += separator_len;
900			sep_removed = 1;
901			break;
902		}
903		end++;
904	}
905	/* Remove all when this is the only rewrite var */
906	if (ZSTR_LEN(url_state->url_app.s) == end - start) {
907		php_url_scanner_reset_vars_impl(type);
908		goto finish;
909	}
910	/* Check preceding separator */
911	if (!sep_removed
912		&& (size_t)(start - PG(arg_separator).output) >= separator_len
913		&& !memcmp(start - separator_len, PG(arg_separator).output, separator_len)) {
914		start -= separator_len;
915	}
916	/* Remove partially */
917	memmove(start, end,
918			ZSTR_LEN(url_state->url_app.s) - (end - ZSTR_VAL(url_state->url_app.s)));
919	ZSTR_LEN(url_state->url_app.s) -= end - start;
920	ZSTR_VAL(url_state->url_app.s)[ZSTR_LEN(url_state->url_app.s)] = '\0';
921
922	/* Remove form var */
923	start = (char *) php_memnstr(ZSTR_VAL(url_state->form_app.s),
924						ZSTR_VAL(form_app.s), ZSTR_LEN(form_app.s),
925						ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s));
926	if (!start) {
927		/* Should not happen */
928		ret = FAILURE;
929		php_url_scanner_reset_vars_impl(type);
930		goto finish;
931	}
932	/* Get end of form var */
933	limit = ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s);
934	end = start + ZSTR_LEN(form_app.s);
935	while (end < limit) {
936		if (*end == '>') {
937			end += 1;
938			break;
939		}
940		end++;
941	}
942	/* Remove partially */
943	memmove(start, end,
944			ZSTR_LEN(url_state->form_app.s) - (end - ZSTR_VAL(url_state->form_app.s)));
945	ZSTR_LEN(url_state->form_app.s) -= end - start;
946	ZSTR_VAL(url_state->form_app.s)[ZSTR_LEN(url_state->form_app.s)] = '\0';
947
948finish:
949	smart_str_free(&url_app);
950	smart_str_free(&form_app);
951	smart_str_free(&sname);
952	smart_str_free(&hname);
953	return ret;
954}
955
956
957PHPAPI int php_url_scanner_reset_session_var(zend_string *name, int encode)
958{
959	return php_url_scanner_reset_var_impl(name, encode, 1);
960}
961
962
963PHPAPI int php_url_scanner_reset_var(zend_string *name, int encode)
964{
965	return php_url_scanner_reset_var_impl(name, encode, 0);
966}
967
968
969PHP_MINIT_FUNCTION(url_scanner)
970{
971	REGISTER_INI_ENTRIES();
972	return SUCCESS;
973}
974
975PHP_MSHUTDOWN_FUNCTION(url_scanner)
976{
977	UNREGISTER_INI_ENTRIES();
978
979	return SUCCESS;
980}
981
982PHP_RINIT_FUNCTION(url_scanner)
983{
984	BG(url_adapt_session_ex).active    = 0;
985	BG(url_adapt_session_ex).tag_type  = 0;
986	BG(url_adapt_session_ex).attr_type = 0;
987	BG(url_adapt_output_ex).active    = 0;
988	BG(url_adapt_output_ex).tag_type  = 0;
989	BG(url_adapt_output_ex).attr_type = 0;
990	return SUCCESS;
991}
992
993PHP_RSHUTDOWN_FUNCTION(url_scanner)
994{
995	if (BG(url_adapt_session_ex).active) {
996		php_url_scanner_ex_deactivate(1);
997		BG(url_adapt_session_ex).active    = 0;
998		BG(url_adapt_session_ex).tag_type  = 0;
999		BG(url_adapt_session_ex).attr_type = 0;
1000	}
1001	smart_str_free(&BG(url_adapt_session_ex).form_app);
1002	smart_str_free(&BG(url_adapt_session_ex).url_app);
1003
1004	if (BG(url_adapt_output_ex).active) {
1005		php_url_scanner_ex_deactivate(0);
1006		BG(url_adapt_output_ex).active    = 0;
1007		BG(url_adapt_output_ex).tag_type  = 0;
1008		BG(url_adapt_output_ex).attr_type = 0;
1009	}
1010	smart_str_free(&BG(url_adapt_output_ex).form_app);
1011	smart_str_free(&BG(url_adapt_output_ex).url_app);
1012
1013	return SUCCESS;
1014}
1015