xref: /PHP-5.6/ext/standard/url_scanner_ex.re (revision f19578ad)
1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 5                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2016 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "php_ini.h"
35#include "php_globals.h"
36#define STATE_TAG SOME_OTHER_STATE_TAG
37#include "basic_functions.h"
38#include "url.h"
39#undef STATE_TAG
40
41#define url_scanner url_scanner_ex
42
43#include "php_smart_str.h"
44
45static PHP_INI_MH(OnUpdateTags)
46{
47	url_adapt_state_ex_t *ctx;
48	char *key;
49	char *lasts;
50	char *tmp;
51
52	ctx = &BG(url_adapt_state_ex);
53
54	tmp = estrndup(new_value, new_value_length);
55
56	if (ctx->tags)
57		zend_hash_destroy(ctx->tags);
58	else {
59		ctx->tags = malloc(sizeof(HashTable));
60		if (!ctx->tags) {
61			return FAILURE;
62		}
63	}
64
65	zend_hash_init(ctx->tags, 0, NULL, NULL, 1);
66
67	for (key = php_strtok_r(tmp, ",", &lasts);
68			key;
69			key = php_strtok_r(NULL, ",", &lasts)) {
70		char *val;
71
72		val = strchr(key, '=');
73		if (val) {
74			char *q;
75			int keylen;
76
77			*val++ = '\0';
78			for (q = key; *q; q++)
79				*q = tolower(*q);
80			keylen = q - key;
81			/* key is stored withOUT NUL
82			   val is stored WITH    NUL */
83			zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL);
84		}
85	}
86
87	efree(tmp);
88
89	return SUCCESS;
90}
91
92PHP_INI_BEGIN()
93	STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
94PHP_INI_END()
95
96/*!re2c
97any = [\000-\377];
98N = (any\[<]);
99alpha = [a-zA-Z];
100alphanamespace = [a-zA-Z:];
101alphadash = ([a-zA-Z] | "-");
102*/
103
104#define YYFILL(n) goto done
105#define YYCTYPE unsigned char
106#define YYCURSOR p
107#define YYLIMIT q
108#define YYMARKER r
109
110static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator TSRMLS_DC)
111{
112	register const char *p, *q;
113	const char *bash = NULL;
114	const char *sep = "?";
115
116	/*
117	 * Don't modify "//example.com" full path, unless
118	 * HTTP_HOST matches.
119	 */
120	if (url->c[0] == '/' && url->c[1] == '/') {
121		zval **tmp, **http_host;
122		size_t target_len, host_len;
123		if (zend_hash_find(&EG(symbol_table), "_SERVER", sizeof("_SERVER"), (void **)&tmp) == FAILURE
124			|| Z_TYPE_PP(tmp) != IS_ARRAY
125			|| zend_hash_find(Z_ARRVAL_PP(tmp), "HTTP_HOST", sizeof("HTTP_HOST"), (void **)&http_host) == FAILURE
126			|| Z_TYPE_PP(http_host) != IS_STRING) {
127			smart_str_append(dest, url);
128			return;
129		}
130		/* HTTP_HOST could be "example.com:8888", etc. */
131		/* Need to find end of URL in buffer */
132		host_len   = strcspn(Z_STRVAL_PP(http_host), ":");
133		target_len = strcspn(url->c+2, "/\"'?>\r\n");
134		if (host_len
135			&& host_len == target_len
136			&& strncasecmp(Z_STRVAL_PP(http_host), url->c+2, host_len)) {
137			smart_str_append(dest, url);
138			return;
139		}
140	}
141
142	q = (p = url->c) + url->len;
143
144scan:
145/*!re2c
146  ":"		{ smart_str_append(dest, url); return; }
147  "?"		{ sep = separator; goto scan; }
148  "#"		{ bash = p - 1; goto done; }
149  (any\[:?#])+		{ goto scan; }
150*/
151done:
152
153	/* Don't modify URLs of the format "#mark" */
154	if (bash && bash - url->c == 0) {
155		smart_str_append(dest, url);
156		return;
157	}
158
159	if (bash)
160		smart_str_appendl(dest, url->c, bash - url->c);
161	else
162		smart_str_append(dest, url);
163
164	smart_str_appends(dest, sep);
165	smart_str_append(dest, url_app);
166
167	if (bash)
168		smart_str_appendl(dest, bash, q - bash);
169}
170
171
172#undef YYFILL
173#undef YYCTYPE
174#undef YYCURSOR
175#undef YYLIMIT
176#undef YYMARKER
177
178static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
179{
180	char f = 0;
181
182	if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0)
183		f = 1;
184
185	if (quotes)
186		smart_str_appendc(&ctx->result, type);
187	if (f) {
188		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output TSRMLS_CC);
189	} else {
190		smart_str_append(&ctx->result, &ctx->val);
191	}
192	if (quotes)
193		smart_str_appendc(&ctx->result, type);
194}
195
196enum {
197	STATE_PLAIN = 0,
198	STATE_TAG,
199	STATE_NEXT_ARG,
200	STATE_ARG,
201	STATE_BEFORE_VAL,
202	STATE_VAL
203};
204
205#define YYFILL(n) goto stop
206#define YYCTYPE unsigned char
207#define YYCURSOR xp
208#define YYLIMIT end
209#define YYMARKER q
210#define STATE ctx->state
211
212#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
213#define STD_ARGS ctx, start, xp TSRMLS_CC
214
215#if SCANNER_DEBUG
216#define scdebug(x) printf x
217#else
218#define scdebug(x)
219#endif
220
221static inline void passthru(STD_PARA)
222{
223	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
224	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
225}
226
227/*
228 * This function appends a hidden input field after a <form> or
229 * <fieldset>.  The latter is important for XHTML.
230 */
231
232static void handle_form(STD_PARA)
233{
234	int doit = 0;
235
236	if (ctx->form_app.len > 0) {
237		switch (ctx->tag.len) {
238			case sizeof("form") - 1:
239				if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) {
240					doit = 1;
241				}
242				if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) {
243					char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len);
244					if (p) {
245						e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p);
246						if (!e) {
247							e = ctx->val.c + ctx->val.len;
248						}
249						if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
250							doit = 0;
251						}
252					}
253				}
254				break;
255
256			case sizeof("fieldset") - 1:
257				if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) {
258					doit = 1;
259				}
260				break;
261		}
262
263		if (doit)
264			smart_str_append(&ctx->result, &ctx->form_app);
265	}
266}
267
268/*
269 *  HANDLE_TAG copies the HTML Tag and checks whether we
270 *  have that tag in our table. If we might modify it,
271 *  we continue to scan the tag, otherwise we simply copy the complete
272 *  HTML stuff to the result buffer.
273 */
274
275static inline void handle_tag(STD_PARA)
276{
277	int ok = 0;
278	unsigned int i;
279
280	ctx->tag.len = 0;
281	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
282	for (i = 0; i < ctx->tag.len; i++)
283		ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]);
284	if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS)
285		ok = 1;
286	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
287}
288
289static inline void handle_arg(STD_PARA)
290{
291	ctx->arg.len = 0;
292	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
293}
294
295static inline void handle_val(STD_PARA, char quotes, char type)
296{
297	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
298	tag_arg(ctx, quotes, type TSRMLS_CC);
299}
300
301static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
302{
303	char *end, *q;
304	char *xp;
305	char *start;
306	int rest;
307
308	smart_str_appendl(&ctx->buf, newdata, newlen);
309
310	YYCURSOR = ctx->buf.c;
311	YYLIMIT = ctx->buf.c + ctx->buf.len;
312
313	switch (STATE) {
314		case STATE_PLAIN: goto state_plain;
315		case STATE_TAG: goto state_tag;
316		case STATE_NEXT_ARG: goto state_next_arg;
317		case STATE_ARG: goto state_arg;
318		case STATE_BEFORE_VAL: goto state_before_val;
319		case STATE_VAL: goto state_val;
320	}
321
322
323state_plain_begin:
324	STATE = STATE_PLAIN;
325
326state_plain:
327	start = YYCURSOR;
328/*!re2c
329  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
330  N+ 				{ passthru(STD_ARGS); goto state_plain; }
331*/
332
333state_tag:
334	start = YYCURSOR;
335/*!re2c
336  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
337  any		{ passthru(STD_ARGS); goto state_plain_begin; }
338*/
339
340state_next_arg_begin:
341	STATE = STATE_NEXT_ARG;
342
343state_next_arg:
344	start = YYCURSOR;
345/*!re2c
346  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
347  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
348  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
349  any		{ passthru(STD_ARGS); goto state_plain_begin; }
350*/
351
352state_arg:
353	start = YYCURSOR;
354/*!re2c
355  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
356  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
357*/
358
359state_before_val:
360	start = YYCURSOR;
361/*!re2c
362  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
363  any				{ --YYCURSOR; goto state_next_arg_begin; }
364*/
365
366
367state_val:
368	start = YYCURSOR;
369/*!re2c
370  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
371  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
372  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
373  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
374*/
375
376stop:
377	rest = YYLIMIT - start;
378	scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
379	/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
380	if (rest < 0) rest = 0;
381
382	if (rest) memmove(ctx->buf.c, start, rest);
383	ctx->buf.len = rest;
384}
385
386char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
387{
388	smart_str surl = {0};
389	smart_str buf = {0};
390	smart_str url_app = {0};
391
392	smart_str_setl(&surl, url, urllen);
393
394	smart_str_appends(&url_app, name);
395	smart_str_appendc(&url_app, '=');
396	smart_str_appends(&url_app, value);
397
398	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output TSRMLS_CC);
399
400	smart_str_0(&buf);
401	if (newlen) *newlen = buf.len;
402
403	smart_str_free(&url_app);
404
405	return buf.c;
406}
407
408
409static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
410{
411	url_adapt_state_ex_t *ctx;
412	char *retval;
413
414	ctx = &BG(url_adapt_state_ex);
415
416	xx_mainloop(ctx, src, srclen TSRMLS_CC);
417
418	*newlen = ctx->result.len;
419	if (!ctx->result.c) {
420		smart_str_appendl(&ctx->result, "", 0);
421	}
422	smart_str_0(&ctx->result);
423	if (do_flush) {
424		smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
425		*newlen += ctx->buf.len;
426		smart_str_free(&ctx->buf);
427	}
428	retval = ctx->result.c;
429	ctx->result.c = NULL;
430	ctx->result.len = 0;
431	return retval;
432}
433
434static int php_url_scanner_ex_activate(TSRMLS_D)
435{
436	url_adapt_state_ex_t *ctx;
437
438	ctx = &BG(url_adapt_state_ex);
439
440	memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
441
442	return SUCCESS;
443}
444
445static int php_url_scanner_ex_deactivate(TSRMLS_D)
446{
447	url_adapt_state_ex_t *ctx;
448
449	ctx = &BG(url_adapt_state_ex);
450
451	smart_str_free(&ctx->result);
452	smart_str_free(&ctx->buf);
453	smart_str_free(&ctx->tag);
454	smart_str_free(&ctx->arg);
455
456	return SUCCESS;
457}
458
459static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
460{
461	size_t len;
462
463	if (BG(url_adapt_state_ex).url_app.len != 0) {
464		*handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
465		if (sizeof(uint) < sizeof(size_t)) {
466			if (len > UINT_MAX)
467				len = UINT_MAX;
468		}
469		*handled_output_len = len;
470	} else if (BG(url_adapt_state_ex).url_app.len == 0) {
471		url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
472		if (ctx->buf.len) {
473			smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
474			smart_str_appendl(&ctx->result, output, output_len);
475
476			*handled_output = ctx->result.c;
477			*handled_output_len = ctx->buf.len + output_len;
478
479			ctx->result.c = NULL;
480			ctx->result.len = 0;
481			smart_str_free(&ctx->buf);
482		} else {
483			*handled_output = estrndup(output, *handled_output_len = output_len);
484		}
485	} else {
486		*handled_output = NULL;
487	}
488}
489
490PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
491{
492	char *encoded = NULL;
493	int encoded_len;
494	smart_str val;
495
496	if (! BG(url_adapt_state_ex).active) {
497		php_url_scanner_ex_activate(TSRMLS_C);
498		php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
499		BG(url_adapt_state_ex).active = 1;
500	}
501
502
503	if (BG(url_adapt_state_ex).url_app.len != 0) {
504		smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
505	}
506
507	if (urlencode) {
508		encoded = php_url_encode(value, value_len, &encoded_len);
509		smart_str_setl(&val, encoded, encoded_len);
510	} else {
511		smart_str_setl(&val, value, value_len);
512	}
513
514	smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
515	smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
516	smart_str_append(&BG(url_adapt_state_ex).url_app, &val);
517
518	smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
519	smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
520	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
521	smart_str_append(&BG(url_adapt_state_ex).form_app, &val);
522	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
523
524	if (urlencode)
525		efree(encoded);
526
527	return SUCCESS;
528}
529
530PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
531{
532	BG(url_adapt_state_ex).form_app.len = 0;
533	BG(url_adapt_state_ex).url_app.len = 0;
534
535	return SUCCESS;
536}
537
538PHP_MINIT_FUNCTION(url_scanner)
539{
540	BG(url_adapt_state_ex).tags = NULL;
541
542	BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0;
543	BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0;
544
545	REGISTER_INI_ENTRIES();
546	return SUCCESS;
547}
548
549PHP_MSHUTDOWN_FUNCTION(url_scanner)
550{
551	UNREGISTER_INI_ENTRIES();
552
553	return SUCCESS;
554}
555
556PHP_RINIT_FUNCTION(url_scanner)
557{
558	BG(url_adapt_state_ex).active = 0;
559
560	return SUCCESS;
561}
562
563PHP_RSHUTDOWN_FUNCTION(url_scanner)
564{
565	if (BG(url_adapt_state_ex).active) {
566		php_url_scanner_ex_deactivate(TSRMLS_C);
567		BG(url_adapt_state_ex).active = 0;
568	}
569
570	smart_str_free(&BG(url_adapt_state_ex).form_app);
571	smart_str_free(&BG(url_adapt_state_ex).url_app);
572
573	return SUCCESS;
574}
575