xref: /PHP-7.0/ext/standard/url_scanner_ex.re (revision cf3ef363)
1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2017 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "php_ini.h"
35#include "php_globals.h"
36#include "php_string.h"
37#define STATE_TAG SOME_OTHER_STATE_TAG
38#include "basic_functions.h"
39#include "url.h"
40#undef STATE_TAG
41
42#define url_scanner url_scanner_ex
43
44#include "zend_smart_str.h"
45
46static void tag_dtor(zval *zv)
47{
48	free(Z_PTR_P(zv));
49}
50
51static PHP_INI_MH(OnUpdateTags)
52{
53	url_adapt_state_ex_t *ctx;
54	char *key;
55	char *tmp;
56	char *lasts = NULL;
57
58	ctx = &BG(url_adapt_state_ex);
59
60	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
61
62	if (ctx->tags)
63		zend_hash_destroy(ctx->tags);
64	else {
65		ctx->tags = malloc(sizeof(HashTable));
66		if (!ctx->tags) {
67			return FAILURE;
68		}
69	}
70
71	zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
72
73	for (key = php_strtok_r(tmp, ",", &lasts);
74			key;
75			key = php_strtok_r(NULL, ",", &lasts)) {
76		char *val;
77
78		val = strchr(key, '=');
79		if (val) {
80			char *q;
81			size_t keylen;
82
83			*val++ = '\0';
84			for (q = key; *q; q++)
85				*q = tolower(*q);
86			keylen = q - key;
87			/* key is stored withOUT NUL
88			   val is stored WITH    NUL */
89			zend_hash_str_add_mem(ctx->tags, key, keylen, val, strlen(val)+1);
90		}
91	}
92
93	efree(tmp);
94
95	return SUCCESS;
96}
97
98PHP_INI_BEGIN()
99	STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
100PHP_INI_END()
101
102/*!re2c
103any = [\000-\377];
104N = (any\[<]);
105alpha = [a-zA-Z];
106alphanamespace = [a-zA-Z:];
107alphadash = ([a-zA-Z] | "-");
108*/
109
110#define YYFILL(n) goto done
111#define YYCTYPE unsigned char
112#define YYCURSOR p
113#define YYLIMIT q
114#define YYMARKER r
115
116static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
117{
118	register const char *p, *q;
119	const char *bash = NULL;
120	const char *sep = "?";
121
122	/*
123	 * Don't modify "//example.com" full path, unless
124	 * HTTP_HOST matches.
125	 */
126	if (ZSTR_LEN(url->s) > 2 && ZSTR_VAL(url->s)[0] == '/' && ZSTR_VAL(url->s)[1] == '/') {
127		const char *end_chars = "/\"'?>\r\n";
128		zval *tmp = NULL, *http_host = NULL;
129		size_t target_len, host_len;
130		if ((!(tmp = zend_hash_str_find(&EG(symbol_table), ZEND_STRL("_SERVER"))))
131			|| Z_TYPE_P(tmp) != IS_ARRAY
132			|| !(http_host = zend_hash_str_find(HASH_OF(tmp), ZEND_STRL("HTTP_HOST")))
133			|| Z_TYPE_P(http_host) != IS_STRING) {
134			smart_str_append_smart_str(dest, url);
135			return;
136		}
137
138		/* HTTP_HOST could be "example.com:8888", etc. */
139		/* Need to find end of URL in buffer */
140		host_len   = strcspn(Z_STRVAL_P(http_host), ":");
141		target_len = php_strcspn(
142			ZSTR_VAL(url->s) + 2, (char *) end_chars,
143			ZSTR_VAL(url->s) + ZSTR_LEN(url->s), (char *) end_chars + strlen(end_chars));
144		if (host_len
145			&& host_len == target_len
146			&& strncasecmp(Z_STRVAL_P(http_host), ZSTR_VAL(url->s)+2, host_len)) {
147			smart_str_append_smart_str(dest, url);
148			return;
149		}
150	}
151
152	q = (p = ZSTR_VAL(url->s)) + ZSTR_LEN(url->s);
153
154scan:
155/*!re2c
156  ":"		{ smart_str_append_smart_str(dest, url); return; }
157  "?"		{ sep = separator; goto scan; }
158  "#"		{ bash = p - 1; goto done; }
159  (any\[:?#])+		{ goto scan; }
160*/
161done:
162
163	/* Don't modify URLs of the format "#mark" */
164	if (bash && bash - ZSTR_VAL(url->s) == 0) {
165		smart_str_append_smart_str(dest, url);
166		return;
167	}
168
169	if (bash)
170		smart_str_appendl(dest, ZSTR_VAL(url->s), bash - ZSTR_VAL(url->s));
171	else
172		smart_str_append_smart_str(dest, url);
173
174	smart_str_appends(dest, sep);
175	smart_str_append_smart_str(dest, url_app);
176
177	if (bash)
178		smart_str_appendl(dest, bash, q - bash);
179}
180
181
182#undef YYFILL
183#undef YYCTYPE
184#undef YYCURSOR
185#undef YYLIMIT
186#undef YYMARKER
187
188static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type)
189{
190	char f = 0;
191
192	if (strncasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data, ZSTR_LEN(ctx->arg.s)) == 0)
193		f = 1;
194
195	if (quotes)
196		smart_str_appendc(&ctx->result, type);
197	if (f) {
198		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
199	} else {
200		smart_str_append_smart_str(&ctx->result, &ctx->val);
201	}
202	if (quotes)
203		smart_str_appendc(&ctx->result, type);
204}
205
206enum {
207	STATE_PLAIN = 0,
208	STATE_TAG,
209	STATE_NEXT_ARG,
210	STATE_ARG,
211	STATE_BEFORE_VAL,
212	STATE_VAL
213};
214
215#define YYFILL(n) goto stop
216#define YYCTYPE unsigned char
217#define YYCURSOR xp
218#define YYLIMIT end
219#define YYMARKER q
220#define STATE ctx->state
221
222#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR
223#define STD_ARGS ctx, start, xp
224
225#if SCANNER_DEBUG
226#define scdebug(x) printf x
227#else
228#define scdebug(x)
229#endif
230
231static inline void passthru(STD_PARA)
232{
233	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
234	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
235}
236
237/*
238 * This function appends a hidden input field after a <form> or
239 * <fieldset>.  The latter is important for XHTML.
240 */
241
242static void handle_form(STD_PARA)
243{
244	int doit = 0;
245
246	if (ZSTR_LEN(ctx->form_app.s) > 0) {
247		switch (ZSTR_LEN(ctx->tag.s)) {
248			case sizeof("form") - 1:
249				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", sizeof("form") - 1)) {
250					doit = 1;
251				}
252				if (doit && ctx->val.s && ctx->lookup_data && *ctx->lookup_data) {
253					char *e, *p = (char *)zend_memnstr(ZSTR_VAL(ctx->val.s), "://", sizeof("://") - 1, ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s));
254					if (p) {
255						e = memchr(p, '/', (ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s)) - p);
256						if (!e) {
257							e = ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s);
258						}
259						if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
260							doit = 0;
261						}
262					}
263				}
264				break;
265
266			case sizeof("fieldset") - 1:
267				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "fieldset", sizeof("fieldset") - 1)) {
268					doit = 1;
269				}
270				break;
271		}
272
273		if (doit)
274			smart_str_append_smart_str(&ctx->result, &ctx->form_app);
275	}
276}
277
278/*
279 *  HANDLE_TAG copies the HTML Tag and checks whether we
280 *  have that tag in our table. If we might modify it,
281 *  we continue to scan the tag, otherwise we simply copy the complete
282 *  HTML stuff to the result buffer.
283 */
284
285static inline void handle_tag(STD_PARA)
286{
287	int ok = 0;
288	unsigned int i;
289
290	if (ctx->tag.s) {
291		ZSTR_LEN(ctx->tag.s) = 0;
292	}
293	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
294	for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++)
295		ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]);
296    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
297	if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL)
298		ok = 1;
299	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
300}
301
302static inline void handle_arg(STD_PARA)
303{
304	if (ctx->arg.s) {
305		ZSTR_LEN(ctx->arg.s) = 0;
306	}
307	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
308}
309
310static inline void handle_val(STD_PARA, char quotes, char type)
311{
312	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
313	tag_arg(ctx, quotes, type);
314}
315
316static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
317{
318	char *end, *q;
319	char *xp;
320	char *start;
321	size_t rest;
322
323	smart_str_appendl(&ctx->buf, newdata, newlen);
324
325	YYCURSOR = ZSTR_VAL(ctx->buf.s);
326	YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s);
327
328	switch (STATE) {
329		case STATE_PLAIN: goto state_plain;
330		case STATE_TAG: goto state_tag;
331		case STATE_NEXT_ARG: goto state_next_arg;
332		case STATE_ARG: goto state_arg;
333		case STATE_BEFORE_VAL: goto state_before_val;
334		case STATE_VAL: goto state_val;
335	}
336
337
338state_plain_begin:
339	STATE = STATE_PLAIN;
340
341state_plain:
342	start = YYCURSOR;
343/*!re2c
344  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
345  N+ 				{ passthru(STD_ARGS); goto state_plain; }
346*/
347
348state_tag:
349	start = YYCURSOR;
350/*!re2c
351  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
352  any		{ passthru(STD_ARGS); goto state_plain_begin; }
353*/
354
355state_next_arg_begin:
356	STATE = STATE_NEXT_ARG;
357
358state_next_arg:
359	start = YYCURSOR;
360/*!re2c
361  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
362  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
363  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
364  any		{ passthru(STD_ARGS); goto state_plain_begin; }
365*/
366
367state_arg:
368	start = YYCURSOR;
369/*!re2c
370  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
371  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
372*/
373
374state_before_val:
375	start = YYCURSOR;
376/*!re2c
377  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
378  any				{ --YYCURSOR; goto state_next_arg_begin; }
379*/
380
381
382state_val:
383	start = YYCURSOR;
384/*!re2c
385  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
386  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
387  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
388  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
389*/
390
391stop:
392	if (YYLIMIT < start) {
393		/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
394		rest = 0;
395	} else {
396		rest = YYLIMIT - start;
397		scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
398	}
399
400	if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest);
401	ZSTR_LEN(ctx->buf.s) = rest;
402}
403
404
405PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, int urlencode)
406{
407	char *result;
408	smart_str surl = {0};
409	smart_str buf = {0};
410	smart_str url_app = {0};
411	zend_string *encoded;
412
413	smart_str_appendl(&surl, url, urllen);
414
415	if (urlencode) {
416		encoded = php_raw_url_encode(name, strlen(name));
417		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
418		zend_string_free(encoded);
419	} else {
420		smart_str_appends(&url_app, name);
421	}
422	smart_str_appendc(&url_app, '=');
423	if (urlencode) {
424		encoded = php_raw_url_encode(value, strlen(value));
425		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
426		zend_string_free(encoded);
427	} else {
428		smart_str_appends(&url_app, value);
429	}
430
431	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
432
433	smart_str_0(&buf);
434	if (newlen) *newlen = ZSTR_LEN(buf.s);
435	result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s));
436
437	smart_str_free(&url_app);
438	smart_str_free(&buf);
439
440	return result;
441}
442
443
444static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush)
445{
446	url_adapt_state_ex_t *ctx;
447	char *retval;
448
449	ctx = &BG(url_adapt_state_ex);
450
451	xx_mainloop(ctx, src, srclen);
452
453	if (!ctx->result.s) {
454		smart_str_appendl(&ctx->result, "", 0);
455		*newlen = 0;
456	} else {
457		*newlen = ZSTR_LEN(ctx->result.s);
458	}
459	smart_str_0(&ctx->result);
460	if (do_flush) {
461		smart_str_append(&ctx->result, ctx->buf.s);
462		*newlen += ZSTR_LEN(ctx->buf.s);
463		smart_str_free(&ctx->buf);
464		smart_str_free(&ctx->val);
465	}
466	retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
467	smart_str_free(&ctx->result);
468	return retval;
469}
470
471static int php_url_scanner_ex_activate(void)
472{
473	url_adapt_state_ex_t *ctx;
474
475	ctx = &BG(url_adapt_state_ex);
476
477	memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
478
479	return SUCCESS;
480}
481
482static int php_url_scanner_ex_deactivate(void)
483{
484	url_adapt_state_ex_t *ctx;
485
486	ctx = &BG(url_adapt_state_ex);
487
488	smart_str_free(&ctx->result);
489	smart_str_free(&ctx->buf);
490	smart_str_free(&ctx->tag);
491	smart_str_free(&ctx->arg);
492
493	return SUCCESS;
494}
495
496static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
497{
498	size_t len;
499
500	if (ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) != 0) {
501		*handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0));
502		if (sizeof(uint) < sizeof(size_t)) {
503			if (len > UINT_MAX)
504				len = UINT_MAX;
505		}
506		*handled_output_len = len;
507	} else if (ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) == 0) {
508		url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
509		if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) {
510			smart_str_append(&ctx->result, ctx->buf.s);
511			smart_str_appendl(&ctx->result, output, output_len);
512
513			*handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
514			*handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len;
515
516			smart_str_free(&ctx->buf);
517			smart_str_free(&ctx->result);
518		} else {
519			*handled_output = estrndup(output, *handled_output_len = output_len);
520		}
521	} else {
522		*handled_output = NULL;
523	}
524}
525
526PHPAPI int php_url_scanner_add_var(char *name, size_t name_len, char *value, size_t value_len, int urlencode)
527{
528	smart_str sname = {0};
529	smart_str svalue = {0};
530	zend_string *encoded;
531
532	if (!BG(url_adapt_state_ex).active) {
533		php_url_scanner_ex_activate();
534		php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS);
535		BG(url_adapt_state_ex).active = 1;
536	}
537
538	if (BG(url_adapt_state_ex).url_app.s && ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) != 0) {
539		smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
540	}
541
542	if (urlencode) {
543		encoded = php_raw_url_encode(name, name_len);
544		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
545		zend_string_free(encoded);
546		encoded = php_raw_url_encode(value, value_len);
547		smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
548		zend_string_free(encoded);
549	} else {
550		smart_str_appendl(&sname, name, name_len);
551		smart_str_appendl(&svalue, value, value_len);
552	}
553
554	smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &sname);
555	smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
556	smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &svalue);
557
558	smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
559	smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &sname);
560	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
561	smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &svalue);
562	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
563
564	smart_str_free(&sname);
565	smart_str_free(&svalue);
566
567	return SUCCESS;
568}
569
570PHPAPI int php_url_scanner_reset_vars(void)
571{
572	if (BG(url_adapt_state_ex).form_app.s) {
573		ZSTR_LEN(BG(url_adapt_state_ex).form_app.s) = 0;
574	}
575	if (BG(url_adapt_state_ex).url_app.s) {
576		ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) = 0;
577	}
578
579	return SUCCESS;
580}
581
582PHP_MINIT_FUNCTION(url_scanner)
583{
584	BG(url_adapt_state_ex).tags = NULL;
585
586	BG(url_adapt_state_ex).form_app.s = BG(url_adapt_state_ex).url_app.s = NULL;
587
588	REGISTER_INI_ENTRIES();
589	return SUCCESS;
590}
591
592PHP_MSHUTDOWN_FUNCTION(url_scanner)
593{
594	UNREGISTER_INI_ENTRIES();
595
596	return SUCCESS;
597}
598
599PHP_RINIT_FUNCTION(url_scanner)
600{
601	BG(url_adapt_state_ex).active = 0;
602
603	return SUCCESS;
604}
605
606PHP_RSHUTDOWN_FUNCTION(url_scanner)
607{
608	if (BG(url_adapt_state_ex).active) {
609		php_url_scanner_ex_deactivate();
610		BG(url_adapt_state_ex).active = 0;
611	}
612
613	smart_str_free(&BG(url_adapt_state_ex).form_app);
614	smart_str_free(&BG(url_adapt_state_ex).url_app);
615
616	return SUCCESS;
617}
618