xref: /PHP-8.4/ext/standard/url_scanner_ex.re (revision c5bce0d8)
1/*
2  +----------------------------------------------------------------------+
3  | Copyright (c) The PHP Group                                          |
4  +----------------------------------------------------------------------+
5  | This source file is subject to version 3.01 of the PHP license,      |
6  | that is bundled with this package in the file LICENSE, and is        |
7  | available through the world-wide-web at the following url:           |
8  | https://www.php.net/license/3_01.txt                                 |
9  | If you did not receive a copy of the PHP license and are unable to   |
10  | obtain it through the world-wide-web, please send a note to          |
11  | license@php.net so we can mail you a copy immediately.               |
12  +----------------------------------------------------------------------+
13  | Author: Sascha Schumann <sascha@schumann.cx>                         |
14  |         Yasuo Ohgaki <yohgaki@ohgaki.net>                            |
15  +----------------------------------------------------------------------+
16*/
17
18#include "php.h"
19
20#ifdef HAVE_UNISTD_H
21#include <unistd.h>
22#endif
23
24#include <limits.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28
29#include "SAPI.h"
30#include "php_ini.h"
31#include "php_globals.h"
32#include "php_string.h"
33#define STATE_TAG SOME_OTHER_STATE_TAG
34#include "basic_functions.h"
35#include "url.h"
36#include "html.h"
37#undef STATE_TAG
38
39#define url_scanner url_scanner_ex
40
41#include "zend_smart_str.h"
42
43static void tag_dtor(zval *zv)
44{
45	free(Z_PTR_P(zv));
46}
47
48static zend_result php_ini_on_update_tags(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, bool is_session)
49{
50	url_adapt_state_ex_t *ctx;
51	char *key;
52	char *tmp;
53	char *lasts = NULL;
54
55	if (is_session) {
56		ctx = &BG(url_adapt_session_ex);
57	} else {
58		ctx = &BG(url_adapt_output_ex);
59	}
60
61	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
62
63	if (ctx->tags)
64		zend_hash_destroy(ctx->tags);
65	else {
66		ctx->tags = malloc(sizeof(HashTable));
67		if (!ctx->tags) {
68			efree(tmp);
69			return FAILURE;
70		}
71	}
72
73	zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
74
75	for (key = php_strtok_r(tmp, ",", &lasts);
76		 key;
77		 key = php_strtok_r(NULL, ",", &lasts)) {
78		char *val;
79
80		val = strchr(key, '=');
81		if (val) {
82			char *q;
83			size_t keylen;
84			zend_string *str;
85
86			*val++ = '\0';
87			for (q = key; *q; q++) {
88				*q = tolower(*q);
89			}
90			keylen = q - key;
91			str = zend_string_init(key, keylen, 1);
92			GC_MAKE_PERSISTENT_LOCAL(str);
93			zend_hash_add_mem(ctx->tags, str, val, strlen(val)+1);
94			zend_string_release_ex(str, 1);
95		}
96	}
97
98	efree(tmp);
99
100	return SUCCESS;
101}
102
103static PHP_INI_MH(OnUpdateSessionTags)
104{
105	if (!zend_string_starts_with_literal(new_value, "a=href,area=href,frame=src,form=")) {
106		php_error_docref("session.configuration", E_DEPRECATED, "Usage of session.trans_sid_tags INI setting is deprecated");
107	}
108	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, /* is_session */ true);
109}
110
111static PHP_INI_MH(OnUpdateOutputTags)
112{
113	return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, /* is_session */ false);
114}
115
116static zend_result php_ini_on_update_hosts(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, bool is_session)
117{
118	HashTable *hosts;
119	char *key;
120	char *tmp;
121	char *lasts = NULL;
122
123	if (is_session) {
124		hosts = &BG(url_adapt_session_hosts_ht);
125	} else {
126		hosts = &BG(url_adapt_output_hosts_ht);
127	}
128	zend_hash_clean(hosts);
129
130	/* Use user supplied host whitelist */
131	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
132	for (key = php_strtok_r(tmp, ",", &lasts);
133		 key;
134		 key = php_strtok_r(NULL, ",", &lasts)) {
135		size_t keylen;
136		zend_string *tmp_key;
137		char *q;
138
139		for (q = key; *q; q++) {
140			*q = tolower(*q);
141		}
142		keylen = q - key;
143		if (keylen > 0) {
144			/* Note: the hash table is persistently allocated, so the strings must be too! */
145			tmp_key = zend_string_init(key, keylen, true);
146			GC_MAKE_PERSISTENT_LOCAL(tmp_key);
147			zend_hash_add_empty_element(hosts, tmp_key);
148			zend_string_release_ex(tmp_key, true);
149		}
150	}
151	efree(tmp);
152
153	return SUCCESS;
154}
155
156static PHP_INI_MH(OnUpdateSessionHosts)
157{
158	if (ZSTR_LEN(new_value) != 0) {
159		php_error_docref("session.configuration", E_DEPRECATED, "Usage of session.trans_sid_hosts INI setting is deprecated");
160	}
161	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, /* is_session */ true);
162}
163
164static PHP_INI_MH(OnUpdateOutputHosts)
165{
166	return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, /* is_session */ false);
167}
168
169/* FIXME: OnUpdate*Hosts cannot set default to $_SERVER['HTTP_HOST'] at startup */
170PHP_INI_BEGIN()
171	STD_PHP_INI_ENTRY("session.trans_sid_tags", "a=href,area=href,frame=src,form=", PHP_INI_ALL, OnUpdateSessionTags, url_adapt_session_ex, php_basic_globals, basic_globals)
172	STD_PHP_INI_ENTRY("session.trans_sid_hosts", "", PHP_INI_ALL, OnUpdateSessionHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
173	STD_PHP_INI_ENTRY("url_rewriter.tags", "form=", PHP_INI_ALL, OnUpdateOutputTags, url_adapt_session_ex, php_basic_globals, basic_globals)
174	STD_PHP_INI_ENTRY("url_rewriter.hosts", "", PHP_INI_ALL, OnUpdateOutputHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
175PHP_INI_END()
176
177/*!re2c
178any = [\000-\377];
179N = (any\[<]);
180alpha = [a-zA-Z];
181alphanamespace = [a-zA-Z:];
182alphadash = ([a-zA-Z] | "-");
183*/
184
185#define YYFILL(n) goto done
186#define YYCTYPE unsigned char
187#define YYCURSOR p
188#define YYLIMIT q
189#define YYMARKER r
190
191static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator, int type)
192{
193	php_url *url_parts;
194
195	smart_str_0(url); /* FIXME: Bug #70480 php_url_parse_ex() crashes by processing chars exceed len */
196	url_parts = php_url_parse_ex(ZSTR_VAL(url->s), ZSTR_LEN(url->s));
197
198	/* Ignore malformed URLs */
199	if (!url_parts) {
200		smart_str_append_smart_str(dest, url);
201		return;
202	}
203
204	/* Don't modify URLs of the format "#mark" */
205	if (url_parts->fragment && '#' == ZSTR_VAL(url->s)[0]) {
206		smart_str_append_smart_str(dest, url);
207		php_url_free(url_parts);
208		return;
209	}
210
211	/* Check protocol. Only http/https is allowed. */
212	if (url_parts->scheme
213		&& !zend_string_equals_literal_ci(url_parts->scheme, "http")
214		&& !zend_string_equals_literal_ci(url_parts->scheme, "https")) {
215		smart_str_append_smart_str(dest, url);
216		php_url_free(url_parts);
217		return;
218	}
219
220	/* Check host whitelist. If it's not listed, do nothing. */
221	if (url_parts->host) {
222		zend_string *tmp = zend_string_tolower(url_parts->host);
223		HashTable *allowed_hosts = type ? &BG(url_adapt_session_hosts_ht) : &BG(url_adapt_output_hosts_ht);
224		if (!zend_hash_exists(allowed_hosts, tmp)) {
225			zend_string_release_ex(tmp, 0);
226			smart_str_append_smart_str(dest, url);
227			php_url_free(url_parts);
228			return;
229		}
230		zend_string_release_ex(tmp, 0);
231	}
232
233	/*
234	 * When URL does not have path and query string add "/?".
235	 * i.e. If URL is only "?foo=bar", should not add "/?".
236	 */
237	if (!url_parts->path && !url_parts->query && !url_parts->fragment) {
238		/* URL is http://php.net or like */
239		smart_str_append_smart_str(dest, url);
240		smart_str_appendc(dest, '/');
241		smart_str_appendc(dest, '?');
242		smart_str_append_smart_str(dest, url_app);
243		php_url_free(url_parts);
244		return;
245	}
246
247	if (url_parts->scheme) {
248		smart_str_appends(dest, ZSTR_VAL(url_parts->scheme));
249		smart_str_appends(dest, "://");
250	} else if (*(ZSTR_VAL(url->s)) == '/' && *(ZSTR_VAL(url->s)+1) == '/') {
251		smart_str_appends(dest, "//");
252	}
253	if (url_parts->user) {
254		smart_str_appends(dest, ZSTR_VAL(url_parts->user));
255		if (url_parts->pass) {
256			smart_str_appends(dest, ZSTR_VAL(url_parts->pass));
257			smart_str_appendc(dest, ':');
258		}
259		smart_str_appendc(dest, '@');
260	}
261	if (url_parts->host) {
262		smart_str_appends(dest, ZSTR_VAL(url_parts->host));
263	}
264	if (url_parts->port) {
265		smart_str_appendc(dest, ':');
266		smart_str_append_unsigned(dest, (long)url_parts->port);
267	}
268	if (url_parts->path) {
269		smart_str_appends(dest, ZSTR_VAL(url_parts->path));
270	}
271	smart_str_appendc(dest, '?');
272	if (url_parts->query) {
273		smart_str_appends(dest, ZSTR_VAL(url_parts->query));
274		smart_str_appends(dest, separator);
275		smart_str_append_smart_str(dest, url_app);
276	} else {
277		smart_str_append_smart_str(dest, url_app);
278	}
279	if (url_parts->fragment) {
280		smart_str_appendc(dest, '#');
281		smart_str_appends(dest, ZSTR_VAL(url_parts->fragment));
282	}
283	php_url_free(url_parts);
284}
285
286enum {
287	TAG_NORMAL = 0,
288	TAG_FORM
289};
290
291enum {
292	ATTR_NORMAL = 0,
293	ATTR_ACTION
294};
295
296#undef YYFILL
297#undef YYCTYPE
298#undef YYCURSOR
299#undef YYLIMIT
300#undef YYMARKER
301
302static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type)
303{
304	char f = 0;
305
306	/* arg.s is string WITHOUT NUL.
307	   To avoid partial match, NUL is added here */
308	ZSTR_VAL(ctx->arg.s)[ZSTR_LEN(ctx->arg.s)] = '\0';
309	if (!strcasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data)) {
310		f = 1;
311	}
312
313	if (quotes) {
314		smart_str_appendc(&ctx->result, type);
315	}
316	if (f) {
317		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output, ctx->type);
318	} else {
319		smart_str_append_smart_str(&ctx->result, &ctx->val);
320	}
321	if (quotes) {
322		smart_str_appendc(&ctx->result, type);
323	}
324}
325
326enum {
327	STATE_PLAIN = 0,
328	STATE_TAG,
329	STATE_NEXT_ARG,
330	STATE_ARG,
331	STATE_BEFORE_VAL,
332	STATE_VAL
333};
334
335#define YYFILL(n) goto stop
336#define YYCTYPE unsigned char
337#define YYCURSOR xp
338#define YYLIMIT end
339#define YYMARKER q
340#define STATE ctx->state
341
342#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR
343#define STD_ARGS ctx, start, xp
344
345#ifdef SCANNER_DEBUG
346#define scdebug(x) printf x
347#else
348#define scdebug(x)
349#endif
350
351static inline void passthru(STD_PARA)
352{
353	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
354	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
355}
356
357
358static zend_result check_http_host(char *target)
359{
360	zval *host, *tmp;
361	zend_string *host_tmp;
362	char *colon;
363
364	if ((tmp = zend_hash_find(&EG(symbol_table), ZSTR_KNOWN(ZEND_STR_AUTOGLOBAL_SERVER))) &&
365		Z_TYPE_P(tmp) == IS_ARRAY &&
366		(host = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("HTTP_HOST"))) &&
367		Z_TYPE_P(host) == IS_STRING) {
368		host_tmp = zend_string_init(Z_STRVAL_P(host), Z_STRLEN_P(host), 0);
369		/* HTTP_HOST could be 'localhost:8888' etc. */
370		colon = strchr(ZSTR_VAL(host_tmp), ':');
371		if (colon) {
372			ZSTR_LEN(host_tmp) = colon - ZSTR_VAL(host_tmp);
373			ZSTR_VAL(host_tmp)[ZSTR_LEN(host_tmp)] = '\0';
374		}
375		if (!strcasecmp(ZSTR_VAL(host_tmp), target)) {
376			zend_string_release_ex(host_tmp, 0);
377			return SUCCESS;
378		}
379		zend_string_release_ex(host_tmp, 0);
380	}
381	return FAILURE;
382}
383
384static zend_result check_host_whitelist(url_adapt_state_ex_t *ctx)
385{
386	php_url *url_parts = NULL;
387	HashTable *allowed_hosts = ctx->type ? &BG(url_adapt_session_hosts_ht) : &BG(url_adapt_output_hosts_ht);
388
389	ZEND_ASSERT(ctx->tag_type == TAG_FORM);
390
391	if (ctx->attr_val.s && ZSTR_LEN(ctx->attr_val.s)) {
392		url_parts = php_url_parse_ex(ZSTR_VAL(ctx->attr_val.s), ZSTR_LEN(ctx->attr_val.s));
393	} else {
394		return SUCCESS; /* empty URL is valid */
395	}
396
397	if (!url_parts) {
398		return FAILURE;
399	}
400	if (url_parts->scheme) {
401		/* Only http/https should be handled.
402		   A bit hacky check this here, but saves a URL parse. */
403		if (!zend_string_equals_literal_ci(url_parts->scheme, "http") &&
404			!zend_string_equals_literal_ci(url_parts->scheme, "https")) {
405		php_url_free(url_parts);
406		return FAILURE;
407		}
408	}
409	if (!url_parts->host) {
410		php_url_free(url_parts);
411		return SUCCESS;
412	}
413	if (!zend_hash_num_elements(allowed_hosts) &&
414		check_http_host(ZSTR_VAL(url_parts->host)) == SUCCESS) {
415		php_url_free(url_parts);
416		return SUCCESS;
417	}
418	if (!zend_hash_find(allowed_hosts, url_parts->host)) {
419		php_url_free(url_parts);
420		return FAILURE;
421	}
422	php_url_free(url_parts);
423	return SUCCESS;
424}
425
426/*
427 * This function appends a hidden input field after a <form>.
428 */
429static void handle_form(STD_PARA)
430{
431	int doit = 0;
432
433	if (ZSTR_LEN(ctx->form_app.s) > 0) {
434		switch (ZSTR_LEN(ctx->tag.s)) {
435			case sizeof("form") - 1:
436				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))
437					&& check_host_whitelist(ctx) == SUCCESS) {
438					doit = 1;
439				}
440				break;
441		}
442	}
443
444	if (doit) {
445		smart_str_append_smart_str(&ctx->result, &ctx->form_app);
446	}
447}
448
449/*
450 *  HANDLE_TAG copies the HTML Tag and checks whether we
451 *  have that tag in our table. If we might modify it,
452 *  we continue to scan the tag, otherwise we simply copy the complete
453 *  HTML stuff to the result buffer.
454 */
455
456static inline void handle_tag(STD_PARA)
457{
458	int ok = 0;
459	unsigned int i;
460
461	if (ctx->tag.s) {
462		ZSTR_LEN(ctx->tag.s) = 0;
463	}
464	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
465	for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++)
466		ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]);
467    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
468	if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL) {
469		ok = 1;
470		if (ZSTR_LEN(ctx->tag.s) == sizeof("form")-1
471			&& !strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))) {
472			ctx->tag_type = TAG_FORM;
473		} else {
474			ctx->tag_type = TAG_NORMAL;
475		}
476	}
477	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
478}
479
480static inline void handle_arg(STD_PARA)
481{
482	if (ctx->arg.s) {
483		ZSTR_LEN(ctx->arg.s) = 0;
484	}
485	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
486	if (ctx->tag_type == TAG_FORM &&
487		strncasecmp(ZSTR_VAL(ctx->arg.s), "action", ZSTR_LEN(ctx->arg.s)) == 0) {
488		ctx->attr_type = ATTR_ACTION;
489	} else {
490		ctx->attr_type = ATTR_NORMAL;
491	}
492}
493
494static inline void handle_val(STD_PARA, char quotes, char type)
495{
496	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
497	if (ctx->tag_type == TAG_FORM && ctx->attr_type == ATTR_ACTION) {
498		smart_str_setl(&ctx->attr_val, start + quotes, YYCURSOR - start - quotes * 2);
499	}
500	tag_arg(ctx, quotes, type);
501}
502
503static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
504{
505	char *end, *q;
506	char *xp;
507	char *start;
508	size_t rest;
509
510	smart_str_appendl(&ctx->buf, newdata, newlen);
511
512	YYCURSOR = ZSTR_VAL(ctx->buf.s);
513	YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s);
514
515	switch (STATE) {
516		case STATE_PLAIN: goto state_plain;
517		case STATE_TAG: goto state_tag;
518		case STATE_NEXT_ARG: goto state_next_arg;
519		case STATE_ARG: goto state_arg;
520		case STATE_BEFORE_VAL: goto state_before_val;
521		case STATE_VAL: goto state_val;
522	}
523
524
525state_plain_begin:
526	STATE = STATE_PLAIN;
527
528state_plain:
529	start = YYCURSOR;
530/*!re2c
531  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
532  N+ 				{ passthru(STD_ARGS); goto state_plain; }
533*/
534
535state_tag:
536	start = YYCURSOR;
537/*!re2c
538  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
539  any		{ passthru(STD_ARGS); goto state_plain_begin; }
540*/
541
542state_next_arg_begin:
543	STATE = STATE_NEXT_ARG;
544
545state_next_arg:
546	start = YYCURSOR;
547/*!re2c
548  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
549  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
550  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
551  any		{ passthru(STD_ARGS); goto state_plain_begin; }
552*/
553
554state_arg:
555	start = YYCURSOR;
556/*!re2c
557  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
558  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
559*/
560
561state_before_val:
562	start = YYCURSOR;
563/*!re2c
564  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
565  any				{ --YYCURSOR; goto state_next_arg_begin; }
566*/
567
568
569state_val:
570	start = YYCURSOR;
571/*!re2c
572  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
573  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
574  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
575  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
576*/
577
578stop:
579	if (YYLIMIT < start) {
580		/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
581		rest = 0;
582	} else {
583		rest = YYLIMIT - start;
584		scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
585	}
586
587	if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest);
588	ZSTR_LEN(ctx->buf.s) = rest;
589}
590
591
592PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, bool encode)
593{
594	char *result;
595	smart_str surl = {0};
596	smart_str buf = {0};
597	smart_str url_app = {0};
598	zend_string *encoded;
599
600	smart_str_appendl(&surl, url, urllen);
601
602	if (encode) {
603		encoded = php_raw_url_encode(name, strlen(name));
604		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
605		zend_string_free(encoded);
606	} else {
607		smart_str_appends(&url_app, name);
608	}
609	smart_str_appendc(&url_app, '=');
610	if (encode) {
611		encoded = php_raw_url_encode(value, strlen(value));
612		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
613		zend_string_free(encoded);
614	} else {
615		smart_str_appends(&url_app, value);
616	}
617
618	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output, 1);
619
620	smart_str_0(&buf);
621	if (newlen) *newlen = ZSTR_LEN(buf.s);
622	result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s));
623
624	smart_str_free(&url_app);
625	smart_str_free(&buf);
626
627	return result;
628}
629
630
631static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, bool do_flush, url_adapt_state_ex_t *ctx)
632{
633	char *retval;
634
635	xx_mainloop(ctx, src, srclen);
636
637	if (!ctx->result.s) {
638		smart_str_appendl(&ctx->result, "", 0);
639		*newlen = 0;
640	} else {
641		*newlen = ZSTR_LEN(ctx->result.s);
642	}
643	smart_str_0(&ctx->result);
644	if (do_flush) {
645		smart_str_append(&ctx->result, ctx->buf.s);
646		*newlen += ZSTR_LEN(ctx->buf.s);
647		smart_str_free(&ctx->buf);
648		smart_str_free(&ctx->val);
649		smart_str_free(&ctx->attr_val);
650	}
651	retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
652	smart_str_free(&ctx->result);
653	return retval;
654}
655
656static void php_url_scanner_ex_activate(bool is_session)
657{
658	url_adapt_state_ex_t *ctx;
659
660	if (is_session) {
661		ctx = &BG(url_adapt_session_ex);
662	} else {
663		ctx = &BG(url_adapt_output_ex);
664	}
665
666	memset(ctx, 0, XtOffsetOf(url_adapt_state_ex_t, tags));
667}
668
669static void php_url_scanner_ex_deactivate(bool is_session)
670{
671	url_adapt_state_ex_t *ctx;
672
673	if (is_session) {
674		ctx = &BG(url_adapt_session_ex);
675	} else {
676		ctx = &BG(url_adapt_output_ex);
677	}
678
679	smart_str_free(&ctx->result);
680	smart_str_free(&ctx->buf);
681	smart_str_free(&ctx->tag);
682	smart_str_free(&ctx->arg);
683	smart_str_free(&ctx->attr_val);
684}
685
686static inline void php_url_scanner_session_handler_impl(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode, bool is_session)
687{
688	size_t len;
689	url_adapt_state_ex_t *url_state;
690
691	if (is_session) {
692		url_state = &BG(url_adapt_session_ex);
693	} else {
694		url_state = &BG(url_adapt_output_ex);
695	}
696
697	if (ZSTR_LEN(url_state->url_app.s) != 0) {
698		*handled_output = url_adapt_ext(output, output_len, &len, (bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0), url_state);
699		if (sizeof(unsigned int) < sizeof(size_t)) {
700			if (len > UINT_MAX)
701				len = UINT_MAX;
702		}
703		*handled_output_len = len;
704	} else if (ZSTR_LEN(url_state->url_app.s) == 0) {
705		url_adapt_state_ex_t *ctx = url_state;
706		if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) {
707			smart_str_append(&ctx->result, ctx->buf.s);
708			smart_str_appendl(&ctx->result, output, output_len);
709
710			*handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
711			*handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len;
712
713			smart_str_free(&ctx->buf);
714			smart_str_free(&ctx->result);
715		} else {
716			*handled_output = estrndup(output, *handled_output_len = output_len);
717		}
718	} else {
719		*handled_output = NULL;
720	}
721}
722
723static void php_url_scanner_session_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
724{
725	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, /* is_session */ true);
726}
727
728static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
729{
730	php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, /* is_session */ false);
731}
732
733static inline void php_url_scanner_add_var_impl(const char *name, size_t name_len, const char *value, size_t value_len, bool encode, bool is_session)
734{
735	smart_str sname = {0};
736	smart_str svalue = {0};
737	smart_str hname = {0};
738	smart_str hvalue = {0};
739	zend_string *encoded;
740	url_adapt_state_ex_t *url_state;
741	php_output_handler_func_t handler;
742	bool should_start = false;
743
744	if (is_session) {
745		url_state = &BG(url_adapt_session_ex);
746		handler = php_url_scanner_session_handler;
747	} else {
748		url_state = &BG(url_adapt_output_ex);
749		handler = php_url_scanner_output_handler;
750	}
751
752	if (!url_state->active) {
753		php_url_scanner_ex_activate(is_session);
754		should_start = true;
755		url_state->active = 1;
756		url_state->type = is_session;
757	}
758
759	if (url_state->url_app.s && ZSTR_LEN(url_state->url_app.s) != 0) {
760		smart_str_appends(&url_state->url_app, PG(arg_separator).output);
761	}
762
763	if (encode) {
764		encoded = php_raw_url_encode(name, name_len);
765		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
766		encoded = php_raw_url_encode(value, value_len);
767		smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
768		encoded = php_escape_html_entities_ex((const unsigned char *) name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
769		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
770		encoded = php_escape_html_entities_ex((const unsigned char *) value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
771		smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
772	} else {
773		smart_str_appendl(&sname, name, name_len);
774		smart_str_appendl(&svalue, value, value_len);
775		smart_str_appendl(&hname, name, name_len);
776		smart_str_appendl(&hvalue, value, value_len);
777	}
778
779	smart_str_append_smart_str(&url_state->url_app, &sname);
780	smart_str_appendc(&url_state->url_app, '=');
781	smart_str_append_smart_str(&url_state->url_app, &svalue);
782
783	smart_str_appends(&url_state->form_app, "<input type=\"hidden\" name=\"");
784	smart_str_append_smart_str(&url_state->form_app, &hname);
785	smart_str_appends(&url_state->form_app, "\" value=\"");
786	smart_str_append_smart_str(&url_state->form_app, &hvalue);
787	smart_str_appends(&url_state->form_app, "\" />");
788
789	smart_str_free(&sname);
790	smart_str_free(&svalue);
791	smart_str_free(&hname);
792	smart_str_free(&hvalue);
793
794	if (should_start) {
795		php_output_start_internal(ZEND_STRL("URL-Rewriter"), handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS);
796	}
797}
798
799
800PHPAPI zend_result php_url_scanner_add_session_var(const char *name, size_t name_len, const char *value, size_t value_len, bool encode)
801{
802	php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, /* is_session */ true);
803	return SUCCESS;
804}
805
806
807PHPAPI zend_result php_url_scanner_add_var(const char *name, size_t name_len, const char *value, size_t value_len, bool encode)
808{
809	php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, /* is_session */ false);
810	return SUCCESS;
811}
812
813
814static inline void php_url_scanner_reset_vars_impl(bool is_session) {
815	url_adapt_state_ex_t *url_state;
816
817	if (is_session) {
818		url_state = &BG(url_adapt_session_ex);
819	} else {
820		url_state = &BG(url_adapt_output_ex);
821	}
822
823	if (url_state->form_app.s) {
824		ZSTR_LEN(url_state->form_app.s) = 0;
825	}
826	if (url_state->url_app.s) {
827		ZSTR_LEN(url_state->url_app.s) = 0;
828	}
829}
830
831
832PHPAPI zend_result php_url_scanner_reset_session_vars(void)
833{
834	php_url_scanner_reset_vars_impl(true);
835	return SUCCESS;
836}
837
838
839PHPAPI zend_result php_url_scanner_reset_vars(void)
840{
841	php_url_scanner_reset_vars_impl(false);
842	return SUCCESS;
843}
844
845
846static inline zend_result php_url_scanner_reset_var_impl(zend_string *name, int encode, bool is_session)
847{
848	char *start, *end, *limit;
849	size_t separator_len;
850	smart_str sname = {0};
851	smart_str hname = {0};
852	smart_str url_app = {0};
853	smart_str form_app = {0};
854	zend_string *encoded;
855	int ret = SUCCESS;
856	bool sep_removed = 0;
857	url_adapt_state_ex_t *url_state;
858
859	if (is_session) {
860		url_state = &BG(url_adapt_session_ex);
861	} else {
862		url_state = &BG(url_adapt_output_ex);
863	}
864
865	/* Short circuit check. Only check url_app. */
866	if (!url_state->url_app.s || !ZSTR_LEN(url_state->url_app.s)) {
867		return SUCCESS;
868	}
869
870	if (encode) {
871		encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name));
872		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
873		zend_string_free(encoded);
874		encoded = php_escape_html_entities_ex((const unsigned char *) ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), /* double_encode */ 0, /* quiet */ 1);
875		smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
876		zend_string_free(encoded);
877	} else {
878		smart_str_appendl(&sname, ZSTR_VAL(name), ZSTR_LEN(name));
879		smart_str_appendl(&hname, ZSTR_VAL(name), ZSTR_LEN(name));
880	}
881	smart_str_0(&sname);
882	smart_str_0(&hname);
883
884	smart_str_append_smart_str(&url_app, &sname);
885	smart_str_appendc(&url_app, '=');
886	smart_str_0(&url_app);
887
888	smart_str_appends(&form_app, "<input type=\"hidden\" name=\"");
889	smart_str_append_smart_str(&form_app, &hname);
890	smart_str_appends(&form_app, "\" value=\"");
891	smart_str_0(&form_app);
892
893	/* Short circuit check. Only check url_app. */
894	start = (char *) php_memnstr(ZSTR_VAL(url_state->url_app.s),
895								 ZSTR_VAL(url_app.s), ZSTR_LEN(url_app.s),
896								 ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s));
897	if (!start) {
898		ret = FAILURE;
899		goto finish;
900	}
901
902	/* Get end of url var */
903	limit = ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s);
904	end = start + ZSTR_LEN(url_app.s);
905	separator_len = strlen(PG(arg_separator).output);
906	while (end < limit) {
907		if (!memcmp(end, PG(arg_separator).output, separator_len)) {
908			end += separator_len;
909			sep_removed = 1;
910			break;
911		}
912		end++;
913	}
914	/* Remove all when this is the only rewrite var */
915	if (ZSTR_LEN(url_state->url_app.s) == end - start) {
916		php_url_scanner_reset_vars_impl(is_session);
917		goto finish;
918	}
919	/* Check preceding separator */
920	if (!sep_removed
921		&& (size_t)(start - PG(arg_separator).output) >= separator_len
922		&& !memcmp(start - separator_len, PG(arg_separator).output, separator_len)) {
923		start -= separator_len;
924	}
925	/* Remove partially */
926	memmove(start, end,
927			ZSTR_LEN(url_state->url_app.s) - (end - ZSTR_VAL(url_state->url_app.s)));
928	ZSTR_LEN(url_state->url_app.s) -= end - start;
929	ZSTR_VAL(url_state->url_app.s)[ZSTR_LEN(url_state->url_app.s)] = '\0';
930
931	/* Remove form var */
932	start = (char *) php_memnstr(ZSTR_VAL(url_state->form_app.s),
933						ZSTR_VAL(form_app.s), ZSTR_LEN(form_app.s),
934						ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s));
935	if (!start) {
936		/* Should not happen */
937		ret = FAILURE;
938		php_url_scanner_reset_vars_impl(is_session);
939		goto finish;
940	}
941	/* Get end of form var */
942	limit = ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s);
943	end = start + ZSTR_LEN(form_app.s);
944	while (end < limit) {
945		if (*end == '>') {
946			end += 1;
947			break;
948		}
949		end++;
950	}
951	/* Remove partially */
952	memmove(start, end,
953			ZSTR_LEN(url_state->form_app.s) - (end - ZSTR_VAL(url_state->form_app.s)));
954	ZSTR_LEN(url_state->form_app.s) -= end - start;
955	ZSTR_VAL(url_state->form_app.s)[ZSTR_LEN(url_state->form_app.s)] = '\0';
956
957finish:
958	smart_str_free(&url_app);
959	smart_str_free(&form_app);
960	smart_str_free(&sname);
961	smart_str_free(&hname);
962	return ret;
963}
964
965
966PHPAPI zend_result php_url_scanner_reset_session_var(zend_string *name, int encode)
967{
968	return php_url_scanner_reset_var_impl(name, encode, /* is_session */ true);
969}
970
971
972PHPAPI zend_result php_url_scanner_reset_var(zend_string *name, int encode)
973{
974	return php_url_scanner_reset_var_impl(name, encode, /* is_session */ false);
975}
976
977
978PHP_MINIT_FUNCTION(url_scanner)
979{
980	REGISTER_INI_ENTRIES();
981	return SUCCESS;
982}
983
984PHP_MSHUTDOWN_FUNCTION(url_scanner)
985{
986	UNREGISTER_INI_ENTRIES();
987
988	return SUCCESS;
989}
990
991PHP_RINIT_FUNCTION(url_scanner)
992{
993	BG(url_adapt_session_ex).active    = 0;
994	BG(url_adapt_session_ex).tag_type  = 0;
995	BG(url_adapt_session_ex).attr_type = 0;
996	BG(url_adapt_output_ex).active    = 0;
997	BG(url_adapt_output_ex).tag_type  = 0;
998	BG(url_adapt_output_ex).attr_type = 0;
999	return SUCCESS;
1000}
1001
1002PHP_RSHUTDOWN_FUNCTION(url_scanner)
1003{
1004	if (BG(url_adapt_session_ex).active) {
1005		php_url_scanner_ex_deactivate(true);
1006		BG(url_adapt_session_ex).active    = 0;
1007		BG(url_adapt_session_ex).tag_type  = 0;
1008		BG(url_adapt_session_ex).attr_type = 0;
1009	}
1010	smart_str_free(&BG(url_adapt_session_ex).form_app);
1011	smart_str_free(&BG(url_adapt_session_ex).url_app);
1012
1013	if (BG(url_adapt_output_ex).active) {
1014		php_url_scanner_ex_deactivate(false);
1015		BG(url_adapt_output_ex).active    = 0;
1016		BG(url_adapt_output_ex).tag_type  = 0;
1017		BG(url_adapt_output_ex).attr_type = 0;
1018	}
1019	smart_str_free(&BG(url_adapt_output_ex).form_app);
1020	smart_str_free(&BG(url_adapt_output_ex).url_app);
1021
1022	return SUCCESS;
1023}
1024