xref: /PHP-5.4/ext/standard/url_scanner_ex.re (revision e667d231)
1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 5                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2014 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "php_ini.h"
35#include "php_globals.h"
36#define STATE_TAG SOME_OTHER_STATE_TAG
37#include "basic_functions.h"
38#include "url.h"
39#undef STATE_TAG
40
41#define url_scanner url_scanner_ex
42
43#include "php_smart_str.h"
44
45static PHP_INI_MH(OnUpdateTags)
46{
47	url_adapt_state_ex_t *ctx;
48	char *key;
49	char *lasts;
50	char *tmp;
51
52	ctx = &BG(url_adapt_state_ex);
53
54	tmp = estrndup(new_value, new_value_length);
55
56	if (ctx->tags)
57		zend_hash_destroy(ctx->tags);
58	else {
59		ctx->tags = malloc(sizeof(HashTable));
60		if (!ctx->tags) {
61			return FAILURE;
62		}
63	}
64
65	zend_hash_init(ctx->tags, 0, NULL, NULL, 1);
66
67	for (key = php_strtok_r(tmp, ",", &lasts);
68			key;
69			key = php_strtok_r(NULL, ",", &lasts)) {
70		char *val;
71
72		val = strchr(key, '=');
73		if (val) {
74			char *q;
75			int keylen;
76
77			*val++ = '\0';
78			for (q = key; *q; q++)
79				*q = tolower(*q);
80			keylen = q - key;
81			/* key is stored withOUT NUL
82			   val is stored WITH    NUL */
83			zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL);
84		}
85	}
86
87	efree(tmp);
88
89	return SUCCESS;
90}
91
92PHP_INI_BEGIN()
93	STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
94PHP_INI_END()
95
96/*!re2c
97any = [\000-\377];
98N = (any\[<]);
99alpha = [a-zA-Z];
100alphanamespace = [a-zA-Z:];
101alphadash = ([a-zA-Z] | "-");
102*/
103
104#define YYFILL(n) goto done
105#define YYCTYPE unsigned char
106#define YYCURSOR p
107#define YYLIMIT q
108#define YYMARKER r
109
110static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
111{
112	register const char *p, *q;
113	const char *bash = NULL;
114	const char *sep = "?";
115
116	q = (p = url->c) + url->len;
117
118scan:
119/*!re2c
120  ":"		{ smart_str_append(dest, url); return; }
121  "?"		{ sep = separator; goto scan; }
122  "#"		{ bash = p - 1; goto done; }
123  (any\[:?#])+		{ goto scan; }
124*/
125done:
126
127	/* Don't modify URLs of the format "#mark" */
128	if (bash && bash - url->c == 0) {
129		smart_str_append(dest, url);
130		return;
131	}
132
133	if (bash)
134		smart_str_appendl(dest, url->c, bash - url->c);
135	else
136		smart_str_append(dest, url);
137
138	smart_str_appends(dest, sep);
139	smart_str_append(dest, url_app);
140
141	if (bash)
142		smart_str_appendl(dest, bash, q - bash);
143}
144
145
146#undef YYFILL
147#undef YYCTYPE
148#undef YYCURSOR
149#undef YYLIMIT
150#undef YYMARKER
151
152static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
153{
154	char f = 0;
155
156	if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0)
157		f = 1;
158
159	if (quotes)
160		smart_str_appendc(&ctx->result, type);
161	if (f) {
162		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
163	} else {
164		smart_str_append(&ctx->result, &ctx->val);
165	}
166	if (quotes)
167		smart_str_appendc(&ctx->result, type);
168}
169
170enum {
171	STATE_PLAIN = 0,
172	STATE_TAG,
173	STATE_NEXT_ARG,
174	STATE_ARG,
175	STATE_BEFORE_VAL,
176	STATE_VAL
177};
178
179#define YYFILL(n) goto stop
180#define YYCTYPE unsigned char
181#define YYCURSOR xp
182#define YYLIMIT end
183#define YYMARKER q
184#define STATE ctx->state
185
186#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
187#define STD_ARGS ctx, start, xp TSRMLS_CC
188
189#if SCANNER_DEBUG
190#define scdebug(x) printf x
191#else
192#define scdebug(x)
193#endif
194
195static inline void passthru(STD_PARA)
196{
197	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
198	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
199}
200
201/*
202 * This function appends a hidden input field after a <form> or
203 * <fieldset>.  The latter is important for XHTML.
204 */
205
206static void handle_form(STD_PARA)
207{
208	int doit = 0;
209
210	if (ctx->form_app.len > 0) {
211		switch (ctx->tag.len) {
212			case sizeof("form") - 1:
213				if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) {
214					doit = 1;
215				}
216				if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) {
217					char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len);
218					if (p) {
219						e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p);
220						if (!e) {
221							e = ctx->val.c + ctx->val.len;
222						}
223						if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
224							doit = 0;
225						}
226					}
227				}
228				break;
229
230			case sizeof("fieldset") - 1:
231				if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) {
232					doit = 1;
233				}
234				break;
235		}
236
237		if (doit)
238			smart_str_append(&ctx->result, &ctx->form_app);
239	}
240}
241
242/*
243 *  HANDLE_TAG copies the HTML Tag and checks whether we
244 *  have that tag in our table. If we might modify it,
245 *  we continue to scan the tag, otherwise we simply copy the complete
246 *  HTML stuff to the result buffer.
247 */
248
249static inline void handle_tag(STD_PARA)
250{
251	int ok = 0;
252	unsigned int i;
253
254	ctx->tag.len = 0;
255	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
256	for (i = 0; i < ctx->tag.len; i++)
257		ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]);
258	if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS)
259		ok = 1;
260	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
261}
262
263static inline void handle_arg(STD_PARA)
264{
265	ctx->arg.len = 0;
266	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
267}
268
269static inline void handle_val(STD_PARA, char quotes, char type)
270{
271	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
272	tag_arg(ctx, quotes, type TSRMLS_CC);
273}
274
275static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
276{
277	char *end, *q;
278	char *xp;
279	char *start;
280	int rest;
281
282	smart_str_appendl(&ctx->buf, newdata, newlen);
283
284	YYCURSOR = ctx->buf.c;
285	YYLIMIT = ctx->buf.c + ctx->buf.len;
286
287	switch (STATE) {
288		case STATE_PLAIN: goto state_plain;
289		case STATE_TAG: goto state_tag;
290		case STATE_NEXT_ARG: goto state_next_arg;
291		case STATE_ARG: goto state_arg;
292		case STATE_BEFORE_VAL: goto state_before_val;
293		case STATE_VAL: goto state_val;
294	}
295
296
297state_plain_begin:
298	STATE = STATE_PLAIN;
299
300state_plain:
301	start = YYCURSOR;
302/*!re2c
303  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
304  N+ 				{ passthru(STD_ARGS); goto state_plain; }
305*/
306
307state_tag:
308	start = YYCURSOR;
309/*!re2c
310  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
311  any		{ passthru(STD_ARGS); goto state_plain_begin; }
312*/
313
314state_next_arg_begin:
315	STATE = STATE_NEXT_ARG;
316
317state_next_arg:
318	start = YYCURSOR;
319/*!re2c
320  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
321  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
322  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
323  any		{ passthru(STD_ARGS); goto state_plain_begin; }
324*/
325
326state_arg:
327	start = YYCURSOR;
328/*!re2c
329  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
330  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
331*/
332
333state_before_val:
334	start = YYCURSOR;
335/*!re2c
336  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
337  any				{ --YYCURSOR; goto state_next_arg_begin; }
338*/
339
340
341state_val:
342	start = YYCURSOR;
343/*!re2c
344  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
345  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
346  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
347  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
348*/
349
350stop:
351	rest = YYLIMIT - start;
352	scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
353	/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
354	if (rest < 0) rest = 0;
355
356	if (rest) memmove(ctx->buf.c, start, rest);
357	ctx->buf.len = rest;
358}
359
360char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
361{
362	smart_str surl = {0};
363	smart_str buf = {0};
364	smart_str url_app = {0};
365
366	smart_str_setl(&surl, url, urllen);
367
368	smart_str_appends(&url_app, name);
369	smart_str_appendc(&url_app, '=');
370	smart_str_appends(&url_app, value);
371
372	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
373
374	smart_str_0(&buf);
375	if (newlen) *newlen = buf.len;
376
377	smart_str_free(&url_app);
378
379	return buf.c;
380}
381
382
383static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
384{
385	url_adapt_state_ex_t *ctx;
386	char *retval;
387
388	ctx = &BG(url_adapt_state_ex);
389
390	xx_mainloop(ctx, src, srclen TSRMLS_CC);
391
392	*newlen = ctx->result.len;
393	if (!ctx->result.c) {
394		smart_str_appendl(&ctx->result, "", 0);
395	}
396	smart_str_0(&ctx->result);
397	if (do_flush) {
398		smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
399		*newlen += ctx->buf.len;
400		smart_str_free(&ctx->buf);
401	}
402	retval = ctx->result.c;
403	ctx->result.c = NULL;
404	ctx->result.len = 0;
405	return retval;
406}
407
408static int php_url_scanner_ex_activate(TSRMLS_D)
409{
410	url_adapt_state_ex_t *ctx;
411
412	ctx = &BG(url_adapt_state_ex);
413
414	memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
415
416	return SUCCESS;
417}
418
419static int php_url_scanner_ex_deactivate(TSRMLS_D)
420{
421	url_adapt_state_ex_t *ctx;
422
423	ctx = &BG(url_adapt_state_ex);
424
425	smart_str_free(&ctx->result);
426	smart_str_free(&ctx->buf);
427	smart_str_free(&ctx->tag);
428	smart_str_free(&ctx->arg);
429
430	return SUCCESS;
431}
432
433static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
434{
435	size_t len;
436
437	if (BG(url_adapt_state_ex).url_app.len != 0) {
438		*handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
439		if (sizeof(uint) < sizeof(size_t)) {
440			if (len > UINT_MAX)
441				len = UINT_MAX;
442		}
443		*handled_output_len = len;
444	} else if (BG(url_adapt_state_ex).url_app.len == 0) {
445		url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
446		if (ctx->buf.len) {
447			smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
448			smart_str_appendl(&ctx->result, output, output_len);
449
450			*handled_output = ctx->result.c;
451			*handled_output_len = ctx->buf.len + output_len;
452
453			ctx->result.c = NULL;
454			ctx->result.len = 0;
455			smart_str_free(&ctx->buf);
456		} else {
457			*handled_output = estrndup(output, *handled_output_len = output_len);
458		}
459	} else {
460		*handled_output = NULL;
461	}
462}
463
464PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
465{
466	char *encoded = NULL;
467	int encoded_len;
468	smart_str val;
469
470	if (! BG(url_adapt_state_ex).active) {
471		php_url_scanner_ex_activate(TSRMLS_C);
472		php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
473		BG(url_adapt_state_ex).active = 1;
474	}
475
476
477	if (BG(url_adapt_state_ex).url_app.len != 0) {
478		smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
479	}
480
481	if (urlencode) {
482		encoded = php_url_encode(value, value_len, &encoded_len);
483		smart_str_setl(&val, encoded, encoded_len);
484	} else {
485		smart_str_setl(&val, value, value_len);
486	}
487
488	smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
489	smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
490	smart_str_append(&BG(url_adapt_state_ex).url_app, &val);
491
492	smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
493	smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
494	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
495	smart_str_append(&BG(url_adapt_state_ex).form_app, &val);
496	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
497
498	if (urlencode)
499		efree(encoded);
500
501	return SUCCESS;
502}
503
504PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
505{
506	BG(url_adapt_state_ex).form_app.len = 0;
507	BG(url_adapt_state_ex).url_app.len = 0;
508
509	return SUCCESS;
510}
511
512PHP_MINIT_FUNCTION(url_scanner)
513{
514	BG(url_adapt_state_ex).tags = NULL;
515
516	BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0;
517	BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0;
518
519	REGISTER_INI_ENTRIES();
520	return SUCCESS;
521}
522
523PHP_MSHUTDOWN_FUNCTION(url_scanner)
524{
525	UNREGISTER_INI_ENTRIES();
526
527	return SUCCESS;
528}
529
530PHP_RINIT_FUNCTION(url_scanner)
531{
532	BG(url_adapt_state_ex).active = 0;
533
534	return SUCCESS;
535}
536
537PHP_RSHUTDOWN_FUNCTION(url_scanner)
538{
539	if (BG(url_adapt_state_ex).active) {
540		php_url_scanner_ex_deactivate(TSRMLS_C);
541		BG(url_adapt_state_ex).active = 0;
542	}
543
544	smart_str_free(&BG(url_adapt_state_ex).form_app);
545	smart_str_free(&BG(url_adapt_state_ex).url_app);
546
547	return SUCCESS;
548}
549