1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 7 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2017 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Sascha Schumann <sascha@schumann.cx> | 16 +----------------------------------------------------------------------+ 17*/ 18 19/* $Id$ */ 20 21#include "php.h" 22 23#ifdef HAVE_UNISTD_H 24#include <unistd.h> 25#endif 26#ifdef HAVE_LIMITS_H 27#include <limits.h> 28#endif 29 30#include <stdio.h> 31#include <stdlib.h> 32#include <string.h> 33 34#include "php_ini.h" 35#include "php_globals.h" 36#include "php_string.h" 37#define STATE_TAG SOME_OTHER_STATE_TAG 38#include "basic_functions.h" 39#include "url.h" 40#undef STATE_TAG 41 42#define url_scanner url_scanner_ex 43 44#include "zend_smart_str.h" 45 46static void tag_dtor(zval *zv) 47{ 48 free(Z_PTR_P(zv)); 49} 50 51static PHP_INI_MH(OnUpdateTags) 52{ 53 url_adapt_state_ex_t *ctx; 54 char *key; 55 char *tmp; 56 char *lasts = NULL; 57 58 ctx = &BG(url_adapt_state_ex); 59 60 tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value)); 61 62 if (ctx->tags) 63 zend_hash_destroy(ctx->tags); 64 else { 65 ctx->tags = malloc(sizeof(HashTable)); 66 if (!ctx->tags) { 67 return FAILURE; 68 } 69 } 70 71 zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1); 72 73 for (key = php_strtok_r(tmp, ",", &lasts); 74 key; 75 key = php_strtok_r(NULL, ",", &lasts)) { 76 char *val; 77 78 val = strchr(key, '='); 79 if (val) { 80 char *q; 81 size_t keylen; 82 83 *val++ = '\0'; 84 for (q = key; *q; q++) 85 *q = tolower(*q); 86 keylen = q - key; 87 /* key is stored withOUT NUL 88 val is stored WITH NUL */ 89 zend_hash_str_add_mem(ctx->tags, key, keylen, val, strlen(val)+1); 90 } 91 } 92 93 efree(tmp); 94 95 return SUCCESS; 96} 97 98PHP_INI_BEGIN() 99 STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals) 100PHP_INI_END() 101 102/*!re2c 103any = [\000-\377]; 104N = (any\[<]); 105alpha = [a-zA-Z]; 106alphanamespace = [a-zA-Z:]; 107alphadash = ([a-zA-Z] | "-"); 108*/ 109 110#define YYFILL(n) goto done 111#define YYCTYPE unsigned char 112#define YYCURSOR p 113#define YYLIMIT q 114#define YYMARKER r 115 116static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator) 117{ 118 register const char *p, *q; 119 const char *bash = NULL; 120 const char *sep = "?"; 121 122 /* 123 * Don't modify "//example.com" full path, unless 124 * HTTP_HOST matches. 125 */ 126 if (ZSTR_LEN(url->s) > 2 && ZSTR_VAL(url->s)[0] == '/' && ZSTR_VAL(url->s)[1] == '/') { 127 const char *end_chars = "/\"'?>\r\n"; 128 zval *tmp = NULL, *http_host = NULL; 129 size_t target_len, host_len; 130 if ((!(tmp = zend_hash_str_find(&EG(symbol_table), ZEND_STRL("_SERVER")))) 131 || Z_TYPE_P(tmp) != IS_ARRAY 132 || !(http_host = zend_hash_str_find(HASH_OF(tmp), ZEND_STRL("HTTP_HOST"))) 133 || Z_TYPE_P(http_host) != IS_STRING) { 134 smart_str_append_smart_str(dest, url); 135 return; 136 } 137 138 /* HTTP_HOST could be "example.com:8888", etc. */ 139 /* Need to find end of URL in buffer */ 140 host_len = strcspn(Z_STRVAL_P(http_host), ":"); 141 target_len = php_strcspn( 142 ZSTR_VAL(url->s) + 2, (char *) end_chars, 143 ZSTR_VAL(url->s) + ZSTR_LEN(url->s), (char *) end_chars + strlen(end_chars)); 144 if (host_len 145 && host_len == target_len 146 && strncasecmp(Z_STRVAL_P(http_host), ZSTR_VAL(url->s)+2, host_len)) { 147 smart_str_append_smart_str(dest, url); 148 return; 149 } 150 } 151 152 q = (p = ZSTR_VAL(url->s)) + ZSTR_LEN(url->s); 153 154scan: 155/*!re2c 156 ":" { smart_str_append_smart_str(dest, url); return; } 157 "?" { sep = separator; goto scan; } 158 "#" { bash = p - 1; goto done; } 159 (any\[:?#])+ { goto scan; } 160*/ 161done: 162 163 /* Don't modify URLs of the format "#mark" */ 164 if (bash && bash - ZSTR_VAL(url->s) == 0) { 165 smart_str_append_smart_str(dest, url); 166 return; 167 } 168 169 if (bash) 170 smart_str_appendl(dest, ZSTR_VAL(url->s), bash - ZSTR_VAL(url->s)); 171 else 172 smart_str_append_smart_str(dest, url); 173 174 smart_str_appends(dest, sep); 175 smart_str_append_smart_str(dest, url_app); 176 177 if (bash) 178 smart_str_appendl(dest, bash, q - bash); 179} 180 181 182#undef YYFILL 183#undef YYCTYPE 184#undef YYCURSOR 185#undef YYLIMIT 186#undef YYMARKER 187 188static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type) 189{ 190 char f = 0; 191 192 if (strncasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data, ZSTR_LEN(ctx->arg.s)) == 0) 193 f = 1; 194 195 if (quotes) 196 smart_str_appendc(&ctx->result, type); 197 if (f) { 198 append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output); 199 } else { 200 smart_str_append_smart_str(&ctx->result, &ctx->val); 201 } 202 if (quotes) 203 smart_str_appendc(&ctx->result, type); 204} 205 206enum { 207 STATE_PLAIN = 0, 208 STATE_TAG, 209 STATE_NEXT_ARG, 210 STATE_ARG, 211 STATE_BEFORE_VAL, 212 STATE_VAL 213}; 214 215#define YYFILL(n) goto stop 216#define YYCTYPE unsigned char 217#define YYCURSOR xp 218#define YYLIMIT end 219#define YYMARKER q 220#define STATE ctx->state 221 222#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR 223#define STD_ARGS ctx, start, xp 224 225#if SCANNER_DEBUG 226#define scdebug(x) printf x 227#else 228#define scdebug(x) 229#endif 230 231static inline void passthru(STD_PARA) 232{ 233 scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start)); 234 smart_str_appendl(&ctx->result, start, YYCURSOR - start); 235} 236 237/* 238 * This function appends a hidden input field after a <form> or 239 * <fieldset>. The latter is important for XHTML. 240 */ 241 242static void handle_form(STD_PARA) 243{ 244 int doit = 0; 245 246 if (ZSTR_LEN(ctx->form_app.s) > 0) { 247 switch (ZSTR_LEN(ctx->tag.s)) { 248 case sizeof("form") - 1: 249 if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", sizeof("form") - 1)) { 250 doit = 1; 251 } 252 if (doit && ctx->val.s && ctx->lookup_data && *ctx->lookup_data) { 253 char *e, *p = (char *)zend_memnstr(ZSTR_VAL(ctx->val.s), "://", sizeof("://") - 1, ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s)); 254 if (p) { 255 e = memchr(p, '/', (ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s)) - p); 256 if (!e) { 257 e = ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s); 258 } 259 if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) { 260 doit = 0; 261 } 262 } 263 } 264 break; 265 266 case sizeof("fieldset") - 1: 267 if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "fieldset", sizeof("fieldset") - 1)) { 268 doit = 1; 269 } 270 break; 271 } 272 273 if (doit) 274 smart_str_append_smart_str(&ctx->result, &ctx->form_app); 275 } 276} 277 278/* 279 * HANDLE_TAG copies the HTML Tag and checks whether we 280 * have that tag in our table. If we might modify it, 281 * we continue to scan the tag, otherwise we simply copy the complete 282 * HTML stuff to the result buffer. 283 */ 284 285static inline void handle_tag(STD_PARA) 286{ 287 int ok = 0; 288 unsigned int i; 289 290 if (ctx->tag.s) { 291 ZSTR_LEN(ctx->tag.s) = 0; 292 } 293 smart_str_appendl(&ctx->tag, start, YYCURSOR - start); 294 for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++) 295 ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]); 296 /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */ 297 if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL) 298 ok = 1; 299 STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN; 300} 301 302static inline void handle_arg(STD_PARA) 303{ 304 if (ctx->arg.s) { 305 ZSTR_LEN(ctx->arg.s) = 0; 306 } 307 smart_str_appendl(&ctx->arg, start, YYCURSOR - start); 308} 309 310static inline void handle_val(STD_PARA, char quotes, char type) 311{ 312 smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); 313 tag_arg(ctx, quotes, type); 314} 315 316static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen) 317{ 318 char *end, *q; 319 char *xp; 320 char *start; 321 size_t rest; 322 323 smart_str_appendl(&ctx->buf, newdata, newlen); 324 325 YYCURSOR = ZSTR_VAL(ctx->buf.s); 326 YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s); 327 328 switch (STATE) { 329 case STATE_PLAIN: goto state_plain; 330 case STATE_TAG: goto state_tag; 331 case STATE_NEXT_ARG: goto state_next_arg; 332 case STATE_ARG: goto state_arg; 333 case STATE_BEFORE_VAL: goto state_before_val; 334 case STATE_VAL: goto state_val; 335 } 336 337 338state_plain_begin: 339 STATE = STATE_PLAIN; 340 341state_plain: 342 start = YYCURSOR; 343/*!re2c 344 "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; } 345 N+ { passthru(STD_ARGS); goto state_plain; } 346*/ 347 348state_tag: 349 start = YYCURSOR; 350/*!re2c 351 alphanamespace+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; } 352 any { passthru(STD_ARGS); goto state_plain_begin; } 353*/ 354 355state_next_arg_begin: 356 STATE = STATE_NEXT_ARG; 357 358state_next_arg: 359 start = YYCURSOR; 360/*!re2c 361 [/]? [>] { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; } 362 [ \v\r\t\n]+ { passthru(STD_ARGS); goto state_next_arg; } 363 alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; } 364 any { passthru(STD_ARGS); goto state_plain_begin; } 365*/ 366 367state_arg: 368 start = YYCURSOR; 369/*!re2c 370 alpha alphadash* { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; } 371 any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; } 372*/ 373 374state_before_val: 375 start = YYCURSOR; 376/*!re2c 377 [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; } 378 any { --YYCURSOR; goto state_next_arg_begin; } 379*/ 380 381 382state_val: 383 start = YYCURSOR; 384/*!re2c 385 ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; } 386 ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; } 387 (any\[ \r\t\n>'"])+ { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; } 388 any { passthru(STD_ARGS); goto state_next_arg_begin; } 389*/ 390 391stop: 392 if (YYLIMIT < start) { 393 /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */ 394 rest = 0; 395 } else { 396 rest = YYLIMIT - start; 397 scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest)); 398 } 399 400 if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest); 401 ZSTR_LEN(ctx->buf.s) = rest; 402} 403 404 405PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, int urlencode) 406{ 407 char *result; 408 smart_str surl = {0}; 409 smart_str buf = {0}; 410 smart_str url_app = {0}; 411 zend_string *encoded; 412 413 smart_str_appendl(&surl, url, urllen); 414 415 if (urlencode) { 416 encoded = php_raw_url_encode(name, strlen(name)); 417 smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); 418 zend_string_free(encoded); 419 } else { 420 smart_str_appends(&url_app, name); 421 } 422 smart_str_appendc(&url_app, '='); 423 if (urlencode) { 424 encoded = php_raw_url_encode(value, strlen(value)); 425 smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); 426 zend_string_free(encoded); 427 } else { 428 smart_str_appends(&url_app, value); 429 } 430 431 append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output); 432 433 smart_str_0(&buf); 434 if (newlen) *newlen = ZSTR_LEN(buf.s); 435 result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s)); 436 437 smart_str_free(&url_app); 438 smart_str_free(&buf); 439 440 return result; 441} 442 443 444static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush) 445{ 446 url_adapt_state_ex_t *ctx; 447 char *retval; 448 449 ctx = &BG(url_adapt_state_ex); 450 451 xx_mainloop(ctx, src, srclen); 452 453 if (!ctx->result.s) { 454 smart_str_appendl(&ctx->result, "", 0); 455 *newlen = 0; 456 } else { 457 *newlen = ZSTR_LEN(ctx->result.s); 458 } 459 smart_str_0(&ctx->result); 460 if (do_flush) { 461 smart_str_append(&ctx->result, ctx->buf.s); 462 *newlen += ZSTR_LEN(ctx->buf.s); 463 smart_str_free(&ctx->buf); 464 smart_str_free(&ctx->val); 465 } 466 retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s)); 467 smart_str_free(&ctx->result); 468 return retval; 469} 470 471static int php_url_scanner_ex_activate(void) 472{ 473 url_adapt_state_ex_t *ctx; 474 475 ctx = &BG(url_adapt_state_ex); 476 477 memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags)); 478 479 return SUCCESS; 480} 481 482static int php_url_scanner_ex_deactivate(void) 483{ 484 url_adapt_state_ex_t *ctx; 485 486 ctx = &BG(url_adapt_state_ex); 487 488 smart_str_free(&ctx->result); 489 smart_str_free(&ctx->buf); 490 smart_str_free(&ctx->tag); 491 smart_str_free(&ctx->arg); 492 493 return SUCCESS; 494} 495 496static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode) 497{ 498 size_t len; 499 500 if (ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) != 0) { 501 *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0)); 502 if (sizeof(uint) < sizeof(size_t)) { 503 if (len > UINT_MAX) 504 len = UINT_MAX; 505 } 506 *handled_output_len = len; 507 } else if (ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) == 0) { 508 url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex); 509 if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) { 510 smart_str_append(&ctx->result, ctx->buf.s); 511 smart_str_appendl(&ctx->result, output, output_len); 512 513 *handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s)); 514 *handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len; 515 516 smart_str_free(&ctx->buf); 517 smart_str_free(&ctx->result); 518 } else { 519 *handled_output = estrndup(output, *handled_output_len = output_len); 520 } 521 } else { 522 *handled_output = NULL; 523 } 524} 525 526PHPAPI int php_url_scanner_add_var(char *name, size_t name_len, char *value, size_t value_len, int urlencode) 527{ 528 smart_str sname = {0}; 529 smart_str svalue = {0}; 530 zend_string *encoded; 531 532 if (!BG(url_adapt_state_ex).active) { 533 php_url_scanner_ex_activate(); 534 php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS); 535 BG(url_adapt_state_ex).active = 1; 536 } 537 538 if (BG(url_adapt_state_ex).url_app.s && ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) != 0) { 539 smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output); 540 } 541 542 if (urlencode) { 543 encoded = php_raw_url_encode(name, name_len); 544 smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); 545 zend_string_free(encoded); 546 encoded = php_raw_url_encode(value, value_len); 547 smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); 548 zend_string_free(encoded); 549 } else { 550 smart_str_appendl(&sname, name, name_len); 551 smart_str_appendl(&svalue, value, value_len); 552 } 553 554 smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &sname); 555 smart_str_appendc(&BG(url_adapt_state_ex).url_app, '='); 556 smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &svalue); 557 558 smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\""); 559 smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &sname); 560 smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\""); 561 smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &svalue); 562 smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />"); 563 564 smart_str_free(&sname); 565 smart_str_free(&svalue); 566 567 return SUCCESS; 568} 569 570PHPAPI int php_url_scanner_reset_vars(void) 571{ 572 if (BG(url_adapt_state_ex).form_app.s) { 573 ZSTR_LEN(BG(url_adapt_state_ex).form_app.s) = 0; 574 } 575 if (BG(url_adapt_state_ex).url_app.s) { 576 ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) = 0; 577 } 578 579 return SUCCESS; 580} 581 582PHP_MINIT_FUNCTION(url_scanner) 583{ 584 BG(url_adapt_state_ex).tags = NULL; 585 586 BG(url_adapt_state_ex).form_app.s = BG(url_adapt_state_ex).url_app.s = NULL; 587 588 REGISTER_INI_ENTRIES(); 589 return SUCCESS; 590} 591 592PHP_MSHUTDOWN_FUNCTION(url_scanner) 593{ 594 UNREGISTER_INI_ENTRIES(); 595 596 return SUCCESS; 597} 598 599PHP_RINIT_FUNCTION(url_scanner) 600{ 601 BG(url_adapt_state_ex).active = 0; 602 603 return SUCCESS; 604} 605 606PHP_RSHUTDOWN_FUNCTION(url_scanner) 607{ 608 if (BG(url_adapt_state_ex).active) { 609 php_url_scanner_ex_deactivate(); 610 BG(url_adapt_state_ex).active = 0; 611 } 612 613 smart_str_free(&BG(url_adapt_state_ex).form_app); 614 smart_str_free(&BG(url_adapt_state_ex).url_app); 615 616 return SUCCESS; 617} 618