1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2014 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Sascha Schumann <sascha@schumann.cx> | 16 +----------------------------------------------------------------------+ 17*/ 18 19/* $Id$ */ 20 21#include "php.h" 22 23#ifdef HAVE_UNISTD_H 24#include <unistd.h> 25#endif 26#ifdef HAVE_LIMITS_H 27#include <limits.h> 28#endif 29 30#include <stdio.h> 31#include <stdlib.h> 32#include <string.h> 33 34#include "php_ini.h" 35#include "php_globals.h" 36#define STATE_TAG SOME_OTHER_STATE_TAG 37#include "basic_functions.h" 38#include "url.h" 39#undef STATE_TAG 40 41#define url_scanner url_scanner_ex 42 43#include "php_smart_str.h" 44 45static PHP_INI_MH(OnUpdateTags) 46{ 47 url_adapt_state_ex_t *ctx; 48 char *key; 49 char *lasts; 50 char *tmp; 51 52 ctx = &BG(url_adapt_state_ex); 53 54 tmp = estrndup(new_value, new_value_length); 55 56 if (ctx->tags) 57 zend_hash_destroy(ctx->tags); 58 else { 59 ctx->tags = malloc(sizeof(HashTable)); 60 if (!ctx->tags) { 61 return FAILURE; 62 } 63 } 64 65 zend_hash_init(ctx->tags, 0, NULL, NULL, 1); 66 67 for (key = php_strtok_r(tmp, ",", &lasts); 68 key; 69 key = php_strtok_r(NULL, ",", &lasts)) { 70 char *val; 71 72 val = strchr(key, '='); 73 if (val) { 74 char *q; 75 int keylen; 76 77 *val++ = '\0'; 78 for (q = key; *q; q++) 79 *q = tolower(*q); 80 keylen = q - key; 81 /* key is stored withOUT NUL 82 val is stored WITH NUL */ 83 zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL); 84 } 85 } 86 87 efree(tmp); 88 89 return SUCCESS; 90} 91 92PHP_INI_BEGIN() 93 STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals) 94PHP_INI_END() 95 96/*!re2c 97any = [\000-\377]; 98N = (any\[<]); 99alpha = [a-zA-Z]; 100alphanamespace = [a-zA-Z:]; 101alphadash = ([a-zA-Z] | "-"); 102*/ 103 104#define YYFILL(n) goto done 105#define YYCTYPE unsigned char 106#define YYCURSOR p 107#define YYLIMIT q 108#define YYMARKER r 109 110static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator) 111{ 112 register const char *p, *q; 113 const char *bash = NULL; 114 const char *sep = "?"; 115 116 q = (p = url->c) + url->len; 117 118scan: 119/*!re2c 120 ":" { smart_str_append(dest, url); return; } 121 "?" { sep = separator; goto scan; } 122 "#" { bash = p - 1; goto done; } 123 (any\[:?#])+ { goto scan; } 124*/ 125done: 126 127 /* Don't modify URLs of the format "#mark" */ 128 if (bash && bash - url->c == 0) { 129 smart_str_append(dest, url); 130 return; 131 } 132 133 if (bash) 134 smart_str_appendl(dest, url->c, bash - url->c); 135 else 136 smart_str_append(dest, url); 137 138 smart_str_appends(dest, sep); 139 smart_str_append(dest, url_app); 140 141 if (bash) 142 smart_str_appendl(dest, bash, q - bash); 143} 144 145 146#undef YYFILL 147#undef YYCTYPE 148#undef YYCURSOR 149#undef YYLIMIT 150#undef YYMARKER 151 152static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC) 153{ 154 char f = 0; 155 156 if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0) 157 f = 1; 158 159 if (quotes) 160 smart_str_appendc(&ctx->result, type); 161 if (f) { 162 append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output); 163 } else { 164 smart_str_append(&ctx->result, &ctx->val); 165 } 166 if (quotes) 167 smart_str_appendc(&ctx->result, type); 168} 169 170enum { 171 STATE_PLAIN = 0, 172 STATE_TAG, 173 STATE_NEXT_ARG, 174 STATE_ARG, 175 STATE_BEFORE_VAL, 176 STATE_VAL 177}; 178 179#define YYFILL(n) goto stop 180#define YYCTYPE unsigned char 181#define YYCURSOR xp 182#define YYLIMIT end 183#define YYMARKER q 184#define STATE ctx->state 185 186#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC 187#define STD_ARGS ctx, start, xp TSRMLS_CC 188 189#if SCANNER_DEBUG 190#define scdebug(x) printf x 191#else 192#define scdebug(x) 193#endif 194 195static inline void passthru(STD_PARA) 196{ 197 scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start)); 198 smart_str_appendl(&ctx->result, start, YYCURSOR - start); 199} 200 201/* 202 * This function appends a hidden input field after a <form> or 203 * <fieldset>. The latter is important for XHTML. 204 */ 205 206static void handle_form(STD_PARA) 207{ 208 int doit = 0; 209 210 if (ctx->form_app.len > 0) { 211 switch (ctx->tag.len) { 212 case sizeof("form") - 1: 213 if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) { 214 doit = 1; 215 } 216 if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) { 217 char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len); 218 if (p) { 219 e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p); 220 if (!e) { 221 e = ctx->val.c + ctx->val.len; 222 } 223 if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) { 224 doit = 0; 225 } 226 } 227 } 228 break; 229 230 case sizeof("fieldset") - 1: 231 if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) { 232 doit = 1; 233 } 234 break; 235 } 236 237 if (doit) 238 smart_str_append(&ctx->result, &ctx->form_app); 239 } 240} 241 242/* 243 * HANDLE_TAG copies the HTML Tag and checks whether we 244 * have that tag in our table. If we might modify it, 245 * we continue to scan the tag, otherwise we simply copy the complete 246 * HTML stuff to the result buffer. 247 */ 248 249static inline void handle_tag(STD_PARA) 250{ 251 int ok = 0; 252 unsigned int i; 253 254 ctx->tag.len = 0; 255 smart_str_appendl(&ctx->tag, start, YYCURSOR - start); 256 for (i = 0; i < ctx->tag.len; i++) 257 ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]); 258 if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS) 259 ok = 1; 260 STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN; 261} 262 263static inline void handle_arg(STD_PARA) 264{ 265 ctx->arg.len = 0; 266 smart_str_appendl(&ctx->arg, start, YYCURSOR - start); 267} 268 269static inline void handle_val(STD_PARA, char quotes, char type) 270{ 271 smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); 272 tag_arg(ctx, quotes, type TSRMLS_CC); 273} 274 275static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC) 276{ 277 char *end, *q; 278 char *xp; 279 char *start; 280 int rest; 281 282 smart_str_appendl(&ctx->buf, newdata, newlen); 283 284 YYCURSOR = ctx->buf.c; 285 YYLIMIT = ctx->buf.c + ctx->buf.len; 286 287 switch (STATE) { 288 case STATE_PLAIN: goto state_plain; 289 case STATE_TAG: goto state_tag; 290 case STATE_NEXT_ARG: goto state_next_arg; 291 case STATE_ARG: goto state_arg; 292 case STATE_BEFORE_VAL: goto state_before_val; 293 case STATE_VAL: goto state_val; 294 } 295 296 297state_plain_begin: 298 STATE = STATE_PLAIN; 299 300state_plain: 301 start = YYCURSOR; 302/*!re2c 303 "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; } 304 N+ { passthru(STD_ARGS); goto state_plain; } 305*/ 306 307state_tag: 308 start = YYCURSOR; 309/*!re2c 310 alphanamespace+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; } 311 any { passthru(STD_ARGS); goto state_plain_begin; } 312*/ 313 314state_next_arg_begin: 315 STATE = STATE_NEXT_ARG; 316 317state_next_arg: 318 start = YYCURSOR; 319/*!re2c 320 [/]? [>] { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; } 321 [ \v\r\t\n]+ { passthru(STD_ARGS); goto state_next_arg; } 322 alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; } 323 any { passthru(STD_ARGS); goto state_plain_begin; } 324*/ 325 326state_arg: 327 start = YYCURSOR; 328/*!re2c 329 alpha alphadash* { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; } 330 any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; } 331*/ 332 333state_before_val: 334 start = YYCURSOR; 335/*!re2c 336 [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; } 337 any { --YYCURSOR; goto state_next_arg_begin; } 338*/ 339 340 341state_val: 342 start = YYCURSOR; 343/*!re2c 344 ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; } 345 ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; } 346 (any\[ \r\t\n>'"])+ { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; } 347 any { passthru(STD_ARGS); goto state_next_arg_begin; } 348*/ 349 350stop: 351 rest = YYLIMIT - start; 352 scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest)); 353 /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */ 354 if (rest < 0) rest = 0; 355 356 if (rest) memmove(ctx->buf.c, start, rest); 357 ctx->buf.len = rest; 358} 359 360char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC) 361{ 362 smart_str surl = {0}; 363 smart_str buf = {0}; 364 smart_str url_app = {0}; 365 366 smart_str_setl(&surl, url, urllen); 367 368 smart_str_appends(&url_app, name); 369 smart_str_appendc(&url_app, '='); 370 smart_str_appends(&url_app, value); 371 372 append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output); 373 374 smart_str_0(&buf); 375 if (newlen) *newlen = buf.len; 376 377 smart_str_free(&url_app); 378 379 return buf.c; 380} 381 382 383static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC) 384{ 385 url_adapt_state_ex_t *ctx; 386 char *retval; 387 388 ctx = &BG(url_adapt_state_ex); 389 390 xx_mainloop(ctx, src, srclen TSRMLS_CC); 391 392 *newlen = ctx->result.len; 393 if (!ctx->result.c) { 394 smart_str_appendl(&ctx->result, "", 0); 395 } 396 smart_str_0(&ctx->result); 397 if (do_flush) { 398 smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len); 399 *newlen += ctx->buf.len; 400 smart_str_free(&ctx->buf); 401 } 402 retval = ctx->result.c; 403 ctx->result.c = NULL; 404 ctx->result.len = 0; 405 return retval; 406} 407 408static int php_url_scanner_ex_activate(TSRMLS_D) 409{ 410 url_adapt_state_ex_t *ctx; 411 412 ctx = &BG(url_adapt_state_ex); 413 414 memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags)); 415 416 return SUCCESS; 417} 418 419static int php_url_scanner_ex_deactivate(TSRMLS_D) 420{ 421 url_adapt_state_ex_t *ctx; 422 423 ctx = &BG(url_adapt_state_ex); 424 425 smart_str_free(&ctx->result); 426 smart_str_free(&ctx->buf); 427 smart_str_free(&ctx->tag); 428 smart_str_free(&ctx->arg); 429 430 return SUCCESS; 431} 432 433static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC) 434{ 435 size_t len; 436 437 if (BG(url_adapt_state_ex).url_app.len != 0) { 438 *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC); 439 if (sizeof(uint) < sizeof(size_t)) { 440 if (len > UINT_MAX) 441 len = UINT_MAX; 442 } 443 *handled_output_len = len; 444 } else if (BG(url_adapt_state_ex).url_app.len == 0) { 445 url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex); 446 if (ctx->buf.len) { 447 smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len); 448 smart_str_appendl(&ctx->result, output, output_len); 449 450 *handled_output = ctx->result.c; 451 *handled_output_len = ctx->buf.len + output_len; 452 453 ctx->result.c = NULL; 454 ctx->result.len = 0; 455 smart_str_free(&ctx->buf); 456 } else { 457 *handled_output = estrndup(output, *handled_output_len = output_len); 458 } 459 } else { 460 *handled_output = NULL; 461 } 462} 463 464PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC) 465{ 466 char *encoded = NULL; 467 int encoded_len; 468 smart_str val; 469 470 if (! BG(url_adapt_state_ex).active) { 471 php_url_scanner_ex_activate(TSRMLS_C); 472 php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC); 473 BG(url_adapt_state_ex).active = 1; 474 } 475 476 477 if (BG(url_adapt_state_ex).url_app.len != 0) { 478 smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output); 479 } 480 481 if (urlencode) { 482 encoded = php_url_encode(value, value_len, &encoded_len); 483 smart_str_setl(&val, encoded, encoded_len); 484 } else { 485 smart_str_setl(&val, value, value_len); 486 } 487 488 smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len); 489 smart_str_appendc(&BG(url_adapt_state_ex).url_app, '='); 490 smart_str_append(&BG(url_adapt_state_ex).url_app, &val); 491 492 smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\""); 493 smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len); 494 smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\""); 495 smart_str_append(&BG(url_adapt_state_ex).form_app, &val); 496 smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />"); 497 498 if (urlencode) 499 efree(encoded); 500 501 return SUCCESS; 502} 503 504PHPAPI int php_url_scanner_reset_vars(TSRMLS_D) 505{ 506 BG(url_adapt_state_ex).form_app.len = 0; 507 BG(url_adapt_state_ex).url_app.len = 0; 508 509 return SUCCESS; 510} 511 512PHP_MINIT_FUNCTION(url_scanner) 513{ 514 BG(url_adapt_state_ex).tags = NULL; 515 516 BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0; 517 BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0; 518 519 REGISTER_INI_ENTRIES(); 520 return SUCCESS; 521} 522 523PHP_MSHUTDOWN_FUNCTION(url_scanner) 524{ 525 UNREGISTER_INI_ENTRIES(); 526 527 return SUCCESS; 528} 529 530PHP_RINIT_FUNCTION(url_scanner) 531{ 532 BG(url_adapt_state_ex).active = 0; 533 534 return SUCCESS; 535} 536 537PHP_RSHUTDOWN_FUNCTION(url_scanner) 538{ 539 if (BG(url_adapt_state_ex).active) { 540 php_url_scanner_ex_deactivate(TSRMLS_C); 541 BG(url_adapt_state_ex).active = 0; 542 } 543 544 smart_str_free(&BG(url_adapt_state_ex).form_app); 545 smart_str_free(&BG(url_adapt_state_ex).url_app); 546 547 return SUCCESS; 548} 549