1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2016 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Sascha Schumann <sascha@schumann.cx> | 16 +----------------------------------------------------------------------+ 17*/ 18 19/* $Id$ */ 20 21#include "php.h" 22 23#ifdef HAVE_UNISTD_H 24#include <unistd.h> 25#endif 26#ifdef HAVE_LIMITS_H 27#include <limits.h> 28#endif 29 30#include <stdio.h> 31#include <stdlib.h> 32#include <string.h> 33 34#include "php_ini.h" 35#include "php_globals.h" 36#define STATE_TAG SOME_OTHER_STATE_TAG 37#include "basic_functions.h" 38#include "url.h" 39#undef STATE_TAG 40 41#define url_scanner url_scanner_ex 42 43#include "php_smart_str.h" 44 45static PHP_INI_MH(OnUpdateTags) 46{ 47 url_adapt_state_ex_t *ctx; 48 char *key; 49 char *lasts; 50 char *tmp; 51 52 ctx = &BG(url_adapt_state_ex); 53 54 tmp = estrndup(new_value, new_value_length); 55 56 if (ctx->tags) 57 zend_hash_destroy(ctx->tags); 58 else { 59 ctx->tags = malloc(sizeof(HashTable)); 60 if (!ctx->tags) { 61 return FAILURE; 62 } 63 } 64 65 zend_hash_init(ctx->tags, 0, NULL, NULL, 1); 66 67 for (key = php_strtok_r(tmp, ",", &lasts); 68 key; 69 key = php_strtok_r(NULL, ",", &lasts)) { 70 char *val; 71 72 val = strchr(key, '='); 73 if (val) { 74 char *q; 75 int keylen; 76 77 *val++ = '\0'; 78 for (q = key; *q; q++) 79 *q = tolower(*q); 80 keylen = q - key; 81 /* key is stored withOUT NUL 82 val is stored WITH NUL */ 83 zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL); 84 } 85 } 86 87 efree(tmp); 88 89 return SUCCESS; 90} 91 92PHP_INI_BEGIN() 93 STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals) 94PHP_INI_END() 95 96/*!re2c 97any = [\000-\377]; 98N = (any\[<]); 99alpha = [a-zA-Z]; 100alphanamespace = [a-zA-Z:]; 101alphadash = ([a-zA-Z] | "-"); 102*/ 103 104#define YYFILL(n) goto done 105#define YYCTYPE unsigned char 106#define YYCURSOR p 107#define YYLIMIT q 108#define YYMARKER r 109 110static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator TSRMLS_DC) 111{ 112 register const char *p, *q; 113 const char *bash = NULL; 114 const char *sep = "?"; 115 116 /* 117 * Don't modify "//example.com" full path, unless 118 * HTTP_HOST matches. 119 */ 120 if (url->c[0] == '/' && url->c[1] == '/') { 121 zval **tmp, **http_host; 122 size_t target_len, host_len; 123 if (zend_hash_find(&EG(symbol_table), "_SERVER", sizeof("_SERVER"), (void **)&tmp) == FAILURE 124 || Z_TYPE_PP(tmp) != IS_ARRAY 125 || zend_hash_find(Z_ARRVAL_PP(tmp), "HTTP_HOST", sizeof("HTTP_HOST"), (void **)&http_host) == FAILURE 126 || Z_TYPE_PP(http_host) != IS_STRING) { 127 smart_str_append(dest, url); 128 return; 129 } 130 /* HTTP_HOST could be "example.com:8888", etc. */ 131 /* Need to find end of URL in buffer */ 132 host_len = strcspn(Z_STRVAL_PP(http_host), ":"); 133 target_len = strcspn(url->c+2, "/\"'?>\r\n"); 134 if (host_len 135 && host_len == target_len 136 && strncasecmp(Z_STRVAL_PP(http_host), url->c+2, host_len)) { 137 smart_str_append(dest, url); 138 return; 139 } 140 } 141 142 q = (p = url->c) + url->len; 143 144scan: 145/*!re2c 146 ":" { smart_str_append(dest, url); return; } 147 "?" { sep = separator; goto scan; } 148 "#" { bash = p - 1; goto done; } 149 (any\[:?#])+ { goto scan; } 150*/ 151done: 152 153 /* Don't modify URLs of the format "#mark" */ 154 if (bash && bash - url->c == 0) { 155 smart_str_append(dest, url); 156 return; 157 } 158 159 if (bash) 160 smart_str_appendl(dest, url->c, bash - url->c); 161 else 162 smart_str_append(dest, url); 163 164 smart_str_appends(dest, sep); 165 smart_str_append(dest, url_app); 166 167 if (bash) 168 smart_str_appendl(dest, bash, q - bash); 169} 170 171 172#undef YYFILL 173#undef YYCTYPE 174#undef YYCURSOR 175#undef YYLIMIT 176#undef YYMARKER 177 178static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC) 179{ 180 char f = 0; 181 182 if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0) 183 f = 1; 184 185 if (quotes) 186 smart_str_appendc(&ctx->result, type); 187 if (f) { 188 append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output TSRMLS_CC); 189 } else { 190 smart_str_append(&ctx->result, &ctx->val); 191 } 192 if (quotes) 193 smart_str_appendc(&ctx->result, type); 194} 195 196enum { 197 STATE_PLAIN = 0, 198 STATE_TAG, 199 STATE_NEXT_ARG, 200 STATE_ARG, 201 STATE_BEFORE_VAL, 202 STATE_VAL 203}; 204 205#define YYFILL(n) goto stop 206#define YYCTYPE unsigned char 207#define YYCURSOR xp 208#define YYLIMIT end 209#define YYMARKER q 210#define STATE ctx->state 211 212#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC 213#define STD_ARGS ctx, start, xp TSRMLS_CC 214 215#if SCANNER_DEBUG 216#define scdebug(x) printf x 217#else 218#define scdebug(x) 219#endif 220 221static inline void passthru(STD_PARA) 222{ 223 scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start)); 224 smart_str_appendl(&ctx->result, start, YYCURSOR - start); 225} 226 227/* 228 * This function appends a hidden input field after a <form> or 229 * <fieldset>. The latter is important for XHTML. 230 */ 231 232static void handle_form(STD_PARA) 233{ 234 int doit = 0; 235 236 if (ctx->form_app.len > 0) { 237 switch (ctx->tag.len) { 238 case sizeof("form") - 1: 239 if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) { 240 doit = 1; 241 } 242 if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) { 243 char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len); 244 if (p) { 245 e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p); 246 if (!e) { 247 e = ctx->val.c + ctx->val.len; 248 } 249 if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) { 250 doit = 0; 251 } 252 } 253 } 254 break; 255 256 case sizeof("fieldset") - 1: 257 if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) { 258 doit = 1; 259 } 260 break; 261 } 262 263 if (doit) 264 smart_str_append(&ctx->result, &ctx->form_app); 265 } 266} 267 268/* 269 * HANDLE_TAG copies the HTML Tag and checks whether we 270 * have that tag in our table. If we might modify it, 271 * we continue to scan the tag, otherwise we simply copy the complete 272 * HTML stuff to the result buffer. 273 */ 274 275static inline void handle_tag(STD_PARA) 276{ 277 int ok = 0; 278 unsigned int i; 279 280 ctx->tag.len = 0; 281 smart_str_appendl(&ctx->tag, start, YYCURSOR - start); 282 for (i = 0; i < ctx->tag.len; i++) 283 ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]); 284 if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS) 285 ok = 1; 286 STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN; 287} 288 289static inline void handle_arg(STD_PARA) 290{ 291 ctx->arg.len = 0; 292 smart_str_appendl(&ctx->arg, start, YYCURSOR - start); 293} 294 295static inline void handle_val(STD_PARA, char quotes, char type) 296{ 297 smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); 298 tag_arg(ctx, quotes, type TSRMLS_CC); 299} 300 301static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC) 302{ 303 char *end, *q; 304 char *xp; 305 char *start; 306 int rest; 307 308 smart_str_appendl(&ctx->buf, newdata, newlen); 309 310 YYCURSOR = ctx->buf.c; 311 YYLIMIT = ctx->buf.c + ctx->buf.len; 312 313 switch (STATE) { 314 case STATE_PLAIN: goto state_plain; 315 case STATE_TAG: goto state_tag; 316 case STATE_NEXT_ARG: goto state_next_arg; 317 case STATE_ARG: goto state_arg; 318 case STATE_BEFORE_VAL: goto state_before_val; 319 case STATE_VAL: goto state_val; 320 } 321 322 323state_plain_begin: 324 STATE = STATE_PLAIN; 325 326state_plain: 327 start = YYCURSOR; 328/*!re2c 329 "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; } 330 N+ { passthru(STD_ARGS); goto state_plain; } 331*/ 332 333state_tag: 334 start = YYCURSOR; 335/*!re2c 336 alphanamespace+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; } 337 any { passthru(STD_ARGS); goto state_plain_begin; } 338*/ 339 340state_next_arg_begin: 341 STATE = STATE_NEXT_ARG; 342 343state_next_arg: 344 start = YYCURSOR; 345/*!re2c 346 [/]? [>] { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; } 347 [ \v\r\t\n]+ { passthru(STD_ARGS); goto state_next_arg; } 348 alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; } 349 any { passthru(STD_ARGS); goto state_plain_begin; } 350*/ 351 352state_arg: 353 start = YYCURSOR; 354/*!re2c 355 alpha alphadash* { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; } 356 any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; } 357*/ 358 359state_before_val: 360 start = YYCURSOR; 361/*!re2c 362 [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; } 363 any { --YYCURSOR; goto state_next_arg_begin; } 364*/ 365 366 367state_val: 368 start = YYCURSOR; 369/*!re2c 370 ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; } 371 ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; } 372 (any\[ \r\t\n>'"])+ { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; } 373 any { passthru(STD_ARGS); goto state_next_arg_begin; } 374*/ 375 376stop: 377 rest = YYLIMIT - start; 378 scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest)); 379 /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */ 380 if (rest < 0) rest = 0; 381 382 if (rest) memmove(ctx->buf.c, start, rest); 383 ctx->buf.len = rest; 384} 385 386char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC) 387{ 388 smart_str surl = {0}; 389 smart_str buf = {0}; 390 smart_str url_app = {0}; 391 392 smart_str_setl(&surl, url, urllen); 393 394 smart_str_appends(&url_app, name); 395 smart_str_appendc(&url_app, '='); 396 smart_str_appends(&url_app, value); 397 398 append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output TSRMLS_CC); 399 400 smart_str_0(&buf); 401 if (newlen) *newlen = buf.len; 402 403 smart_str_free(&url_app); 404 405 return buf.c; 406} 407 408 409static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC) 410{ 411 url_adapt_state_ex_t *ctx; 412 char *retval; 413 414 ctx = &BG(url_adapt_state_ex); 415 416 xx_mainloop(ctx, src, srclen TSRMLS_CC); 417 418 *newlen = ctx->result.len; 419 if (!ctx->result.c) { 420 smart_str_appendl(&ctx->result, "", 0); 421 } 422 smart_str_0(&ctx->result); 423 if (do_flush) { 424 smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len); 425 *newlen += ctx->buf.len; 426 smart_str_free(&ctx->buf); 427 } 428 retval = ctx->result.c; 429 ctx->result.c = NULL; 430 ctx->result.len = 0; 431 return retval; 432} 433 434static int php_url_scanner_ex_activate(TSRMLS_D) 435{ 436 url_adapt_state_ex_t *ctx; 437 438 ctx = &BG(url_adapt_state_ex); 439 440 memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags)); 441 442 return SUCCESS; 443} 444 445static int php_url_scanner_ex_deactivate(TSRMLS_D) 446{ 447 url_adapt_state_ex_t *ctx; 448 449 ctx = &BG(url_adapt_state_ex); 450 451 smart_str_free(&ctx->result); 452 smart_str_free(&ctx->buf); 453 smart_str_free(&ctx->tag); 454 smart_str_free(&ctx->arg); 455 456 return SUCCESS; 457} 458 459static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC) 460{ 461 size_t len; 462 463 if (BG(url_adapt_state_ex).url_app.len != 0) { 464 *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC); 465 if (sizeof(uint) < sizeof(size_t)) { 466 if (len > UINT_MAX) 467 len = UINT_MAX; 468 } 469 *handled_output_len = len; 470 } else if (BG(url_adapt_state_ex).url_app.len == 0) { 471 url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex); 472 if (ctx->buf.len) { 473 smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len); 474 smart_str_appendl(&ctx->result, output, output_len); 475 476 *handled_output = ctx->result.c; 477 *handled_output_len = ctx->buf.len + output_len; 478 479 ctx->result.c = NULL; 480 ctx->result.len = 0; 481 smart_str_free(&ctx->buf); 482 } else { 483 *handled_output = estrndup(output, *handled_output_len = output_len); 484 } 485 } else { 486 *handled_output = NULL; 487 } 488} 489 490PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC) 491{ 492 char *encoded = NULL; 493 int encoded_len; 494 smart_str val; 495 496 if (! BG(url_adapt_state_ex).active) { 497 php_url_scanner_ex_activate(TSRMLS_C); 498 php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC); 499 BG(url_adapt_state_ex).active = 1; 500 } 501 502 503 if (BG(url_adapt_state_ex).url_app.len != 0) { 504 smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output); 505 } 506 507 if (urlencode) { 508 encoded = php_url_encode(value, value_len, &encoded_len); 509 smart_str_setl(&val, encoded, encoded_len); 510 } else { 511 smart_str_setl(&val, value, value_len); 512 } 513 514 smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len); 515 smart_str_appendc(&BG(url_adapt_state_ex).url_app, '='); 516 smart_str_append(&BG(url_adapt_state_ex).url_app, &val); 517 518 smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\""); 519 smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len); 520 smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\""); 521 smart_str_append(&BG(url_adapt_state_ex).form_app, &val); 522 smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />"); 523 524 if (urlencode) 525 efree(encoded); 526 527 return SUCCESS; 528} 529 530PHPAPI int php_url_scanner_reset_vars(TSRMLS_D) 531{ 532 BG(url_adapt_state_ex).form_app.len = 0; 533 BG(url_adapt_state_ex).url_app.len = 0; 534 535 return SUCCESS; 536} 537 538PHP_MINIT_FUNCTION(url_scanner) 539{ 540 BG(url_adapt_state_ex).tags = NULL; 541 542 BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0; 543 BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0; 544 545 REGISTER_INI_ENTRIES(); 546 return SUCCESS; 547} 548 549PHP_MSHUTDOWN_FUNCTION(url_scanner) 550{ 551 UNREGISTER_INI_ENTRIES(); 552 553 return SUCCESS; 554} 555 556PHP_RINIT_FUNCTION(url_scanner) 557{ 558 BG(url_adapt_state_ex).active = 0; 559 560 return SUCCESS; 561} 562 563PHP_RSHUTDOWN_FUNCTION(url_scanner) 564{ 565 if (BG(url_adapt_state_ex).active) { 566 php_url_scanner_ex_deactivate(TSRMLS_C); 567 BG(url_adapt_state_ex).active = 0; 568 } 569 570 smart_str_free(&BG(url_adapt_state_ex).form_app); 571 smart_str_free(&BG(url_adapt_state_ex).url_app); 572 573 return SUCCESS; 574} 575