1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Rasmus Lerdorf <rasmus@php.net> |
14 | Stig Sæther Bakken <ssb@php.net> |
15 | Zeev Suraski <zeev@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include <stdio.h>
20 #include "php.h"
21 #include "php_string.h"
22 #include "php_variables.h"
23 #include <locale.h>
24 #ifdef HAVE_LANGINFO_H
25 # include <langinfo.h>
26 #endif
27
28 #ifdef HAVE_LIBINTL
29 # include <libintl.h> /* For LC_MESSAGES */
30 #endif
31
32 #include "scanf.h"
33 #include "zend_API.h"
34 #include "zend_execute.h"
35 #include "php_globals.h"
36 #include "basic_functions.h"
37 #include "zend_smart_str.h"
38 #include <Zend/zend_exceptions.h>
39 #ifdef ZTS
40 #include "TSRM.h"
41 #endif
42
43 /* For str_getcsv() support */
44 #include "ext/standard/file.h"
45 /* For php_next_utf8_char() */
46 #include "ext/standard/html.h"
47 #include "ext/random/php_random.h"
48
49 #ifdef __SSE2__
50 #include <emmintrin.h>
51 #include "Zend/zend_bitset.h"
52 #endif
53
54 /* this is read-only, so it's ok */
55 ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
56
57 /* localeconv mutex */
58 #ifdef ZTS
59 static MUTEX_T locale_mutex = NULL;
60 #endif
61
62 /* {{{ php_bin2hex */
php_bin2hex(const unsigned char * old,const size_t oldlen)63 static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
64 {
65 zend_string *result;
66 size_t i, j;
67
68 result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
69
70 for (i = j = 0; i < oldlen; i++) {
71 ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
72 ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
73 }
74 ZSTR_VAL(result)[j] = '\0';
75
76 return result;
77 }
78 /* }}} */
79
80 /* {{{ php_hex2bin */
php_hex2bin(const unsigned char * old,const size_t oldlen)81 static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
82 {
83 size_t target_length = oldlen >> 1;
84 zend_string *str = zend_string_alloc(target_length, 0);
85 unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
86 size_t i, j;
87
88 for (i = j = 0; i < target_length; i++) {
89 unsigned char c = old[j++];
90 unsigned char l = c & ~0x20;
91 int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
92 unsigned char d;
93
94 /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
95 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
96 d = (l - 0x10 - 0x27 * is_letter) << 4;
97 } else {
98 zend_string_efree(str);
99 return NULL;
100 }
101 c = old[j++];
102 l = c & ~0x20;
103 is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
104 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
105 d |= l - 0x10 - 0x27 * is_letter;
106 } else {
107 zend_string_efree(str);
108 return NULL;
109 }
110 ret[i] = d;
111 }
112 ret[i] = '\0';
113
114 return str;
115 }
116 /* }}} */
117
118 /* {{{ localeconv_r
119 * glibc's localeconv is not reentrant, so lets make it so ... sorta */
localeconv_r(struct lconv * out)120 PHPAPI struct lconv *localeconv_r(struct lconv *out)
121 {
122
123 #ifdef ZTS
124 tsrm_mutex_lock( locale_mutex );
125 #endif
126
127 /* cur->locinfo is struct __crt_locale_info which implementation is
128 hidden in vc14. TODO revisit this and check if a workaround available
129 and needed. */
130 #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
131 {
132 /* Even with the enabled per thread locale, localeconv
133 won't check any locale change in the master thread. */
134 _locale_t cur = _get_current_locale();
135 *out = *cur->locinfo->lconv;
136 _free_locale(cur);
137 }
138 #else
139 /* localeconv doesn't return an error condition */
140 *out = *localeconv();
141 #endif
142
143 #ifdef ZTS
144 tsrm_mutex_unlock( locale_mutex );
145 #endif
146
147 return out;
148 }
149 /* }}} */
150
151 #ifdef ZTS
152 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(localeconv)153 PHP_MINIT_FUNCTION(localeconv)
154 {
155 locale_mutex = tsrm_mutex_alloc();
156 return SUCCESS;
157 }
158 /* }}} */
159
160 /* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(localeconv)161 PHP_MSHUTDOWN_FUNCTION(localeconv)
162 {
163 tsrm_mutex_free( locale_mutex );
164 locale_mutex = NULL;
165 return SUCCESS;
166 }
167 /* }}} */
168 #endif
169
170 /* {{{ Converts the binary representation of data to hex */
PHP_FUNCTION(bin2hex)171 PHP_FUNCTION(bin2hex)
172 {
173 zend_string *result;
174 zend_string *data;
175
176 ZEND_PARSE_PARAMETERS_START(1, 1)
177 Z_PARAM_STR(data)
178 ZEND_PARSE_PARAMETERS_END();
179
180 result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
181
182 RETURN_STR(result);
183 }
184 /* }}} */
185
186 /* {{{ Converts the hex representation of data to binary */
PHP_FUNCTION(hex2bin)187 PHP_FUNCTION(hex2bin)
188 {
189 zend_string *result, *data;
190
191 ZEND_PARSE_PARAMETERS_START(1, 1)
192 Z_PARAM_STR(data)
193 ZEND_PARSE_PARAMETERS_END();
194
195 if (ZSTR_LEN(data) % 2 != 0) {
196 php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
197 RETURN_FALSE;
198 }
199
200 result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
201
202 if (!result) {
203 php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
204 RETURN_FALSE;
205 }
206
207 RETVAL_STR(result);
208 }
209 /* }}} */
210
php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS,int behavior)211 static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
212 {
213 zend_string *s11, *s22;
214 zend_long start = 0, len = 0;
215 bool len_is_null = 1;
216
217 ZEND_PARSE_PARAMETERS_START(2, 4)
218 Z_PARAM_STR(s11)
219 Z_PARAM_STR(s22)
220 Z_PARAM_OPTIONAL
221 Z_PARAM_LONG(start)
222 Z_PARAM_LONG_OR_NULL(len, len_is_null)
223 ZEND_PARSE_PARAMETERS_END();
224
225 size_t remain_len = ZSTR_LEN(s11);
226 if (start < 0) {
227 start += remain_len;
228 if (start < 0) {
229 start = 0;
230 }
231 } else if ((size_t) start > remain_len) {
232 start = remain_len;
233 }
234
235 remain_len -= start;
236 if (!len_is_null) {
237 if (len < 0) {
238 len += remain_len;
239 if (len < 0) {
240 len = 0;
241 }
242 } else if ((size_t) len > remain_len) {
243 len = remain_len;
244 }
245 } else {
246 len = remain_len;
247 }
248
249 if (len == 0) {
250 RETURN_LONG(0);
251 }
252
253 if (behavior == PHP_STR_STRSPN) {
254 RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
255 ZSTR_VAL(s22) /*str2_start*/,
256 ZSTR_VAL(s11) + start + len /*str1_end*/,
257 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
258 } else {
259 ZEND_ASSERT(behavior == PHP_STR_STRCSPN);
260 RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
261 ZSTR_VAL(s22) /*str2_start*/,
262 ZSTR_VAL(s11) + start + len /*str1_end*/,
263 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
264 }
265 }
266 /* }}} */
267
268 /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
PHP_FUNCTION(strspn)269 PHP_FUNCTION(strspn)
270 {
271 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_STR_STRSPN);
272 }
273 /* }}} */
274
275 /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
PHP_FUNCTION(strcspn)276 PHP_FUNCTION(strcspn)
277 {
278 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_STR_STRCSPN);
279 }
280 /* }}} */
281
282 #ifdef HAVE_NL_LANGINFO
283 /* {{{ Query language and locale information */
PHP_FUNCTION(nl_langinfo)284 PHP_FUNCTION(nl_langinfo)
285 {
286 zend_long item;
287 char *value;
288
289 ZEND_PARSE_PARAMETERS_START(1, 1)
290 Z_PARAM_LONG(item)
291 ZEND_PARSE_PARAMETERS_END();
292
293 switch(item) { /* {{{ */
294 #ifdef ABDAY_1
295 case ABDAY_1:
296 case ABDAY_2:
297 case ABDAY_3:
298 case ABDAY_4:
299 case ABDAY_5:
300 case ABDAY_6:
301 case ABDAY_7:
302 #endif
303 #ifdef DAY_1
304 case DAY_1:
305 case DAY_2:
306 case DAY_3:
307 case DAY_4:
308 case DAY_5:
309 case DAY_6:
310 case DAY_7:
311 #endif
312 #ifdef ABMON_1
313 case ABMON_1:
314 case ABMON_2:
315 case ABMON_3:
316 case ABMON_4:
317 case ABMON_5:
318 case ABMON_6:
319 case ABMON_7:
320 case ABMON_8:
321 case ABMON_9:
322 case ABMON_10:
323 case ABMON_11:
324 case ABMON_12:
325 #endif
326 #ifdef MON_1
327 case MON_1:
328 case MON_2:
329 case MON_3:
330 case MON_4:
331 case MON_5:
332 case MON_6:
333 case MON_7:
334 case MON_8:
335 case MON_9:
336 case MON_10:
337 case MON_11:
338 case MON_12:
339 #endif
340 #ifdef AM_STR
341 case AM_STR:
342 #endif
343 #ifdef PM_STR
344 case PM_STR:
345 #endif
346 #ifdef D_T_FMT
347 case D_T_FMT:
348 #endif
349 #ifdef D_FMT
350 case D_FMT:
351 #endif
352 #ifdef T_FMT
353 case T_FMT:
354 #endif
355 #ifdef T_FMT_AMPM
356 case T_FMT_AMPM:
357 #endif
358 #ifdef ERA
359 case ERA:
360 #endif
361 #ifdef ERA_YEAR
362 case ERA_YEAR:
363 #endif
364 #ifdef ERA_D_T_FMT
365 case ERA_D_T_FMT:
366 #endif
367 #ifdef ERA_D_FMT
368 case ERA_D_FMT:
369 #endif
370 #ifdef ERA_T_FMT
371 case ERA_T_FMT:
372 #endif
373 #ifdef ALT_DIGITS
374 case ALT_DIGITS:
375 #endif
376 #ifdef INT_CURR_SYMBOL
377 case INT_CURR_SYMBOL:
378 #endif
379 #ifdef CURRENCY_SYMBOL
380 case CURRENCY_SYMBOL:
381 #endif
382 #ifdef CRNCYSTR
383 case CRNCYSTR:
384 #endif
385 #ifdef MON_DECIMAL_POINT
386 case MON_DECIMAL_POINT:
387 #endif
388 #ifdef MON_THOUSANDS_SEP
389 case MON_THOUSANDS_SEP:
390 #endif
391 #ifdef MON_GROUPING
392 case MON_GROUPING:
393 #endif
394 #ifdef POSITIVE_SIGN
395 case POSITIVE_SIGN:
396 #endif
397 #ifdef NEGATIVE_SIGN
398 case NEGATIVE_SIGN:
399 #endif
400 #ifdef INT_FRAC_DIGITS
401 case INT_FRAC_DIGITS:
402 #endif
403 #ifdef FRAC_DIGITS
404 case FRAC_DIGITS:
405 #endif
406 #ifdef P_CS_PRECEDES
407 case P_CS_PRECEDES:
408 #endif
409 #ifdef P_SEP_BY_SPACE
410 case P_SEP_BY_SPACE:
411 #endif
412 #ifdef N_CS_PRECEDES
413 case N_CS_PRECEDES:
414 #endif
415 #ifdef N_SEP_BY_SPACE
416 case N_SEP_BY_SPACE:
417 #endif
418 #ifdef P_SIGN_POSN
419 case P_SIGN_POSN:
420 #endif
421 #ifdef N_SIGN_POSN
422 case N_SIGN_POSN:
423 #endif
424 #ifdef DECIMAL_POINT
425 case DECIMAL_POINT:
426 #elif defined(RADIXCHAR)
427 case RADIXCHAR:
428 #endif
429 #ifdef THOUSANDS_SEP
430 case THOUSANDS_SEP:
431 #elif defined(THOUSEP)
432 case THOUSEP:
433 #endif
434 #ifdef GROUPING
435 case GROUPING:
436 #endif
437 #ifdef YESEXPR
438 case YESEXPR:
439 #endif
440 #ifdef NOEXPR
441 case NOEXPR:
442 #endif
443 #ifdef YESSTR
444 case YESSTR:
445 #endif
446 #ifdef NOSTR
447 case NOSTR:
448 #endif
449 #ifdef CODESET
450 case CODESET:
451 #endif
452 break;
453 default:
454 php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
455 RETURN_FALSE;
456 }
457 /* }}} */
458
459 value = nl_langinfo(item);
460 if (value == NULL) {
461 RETURN_FALSE;
462 } else {
463 RETURN_STRING(value);
464 }
465 }
466 #endif
467 /* }}} */
468
469 /* {{{ Compares two strings using the current locale */
PHP_FUNCTION(strcoll)470 PHP_FUNCTION(strcoll)
471 {
472 zend_string *s1, *s2;
473
474 ZEND_PARSE_PARAMETERS_START(2, 2)
475 Z_PARAM_STR(s1)
476 Z_PARAM_STR(s2)
477 ZEND_PARSE_PARAMETERS_END();
478
479 RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
480 (const char *) ZSTR_VAL(s2)));
481 }
482 /* }}} */
483
484 /* {{{ php_charmask
485 * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
486 * it needs to be incrementing.
487 * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
488 */
php_charmask(const unsigned char * input,size_t len,char * mask)489 static inline zend_result php_charmask(const unsigned char *input, size_t len, char *mask)
490 {
491 const unsigned char *end;
492 unsigned char c;
493 zend_result result = SUCCESS;
494
495 memset(mask, 0, 256);
496 for (end = input+len; input < end; input++) {
497 c=*input;
498 if ((input+3 < end) && input[1] == '.' && input[2] == '.'
499 && input[3] >= c) {
500 memset(mask+c, 1, input[3] - c + 1);
501 input+=3;
502 } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
503 /* Error, try to be as helpful as possible:
504 (a range ending/starting with '.' won't be captured here) */
505 if (end-len >= input) { /* there was no 'left' char */
506 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
507 result = FAILURE;
508 continue;
509 }
510 if (input+2 >= end) { /* there is no 'right' char */
511 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
512 result = FAILURE;
513 continue;
514 }
515 if (input[-1] > input[2]) { /* wrong order */
516 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
517 result = FAILURE;
518 continue;
519 }
520 /* FIXME: better error (a..b..c is the only left possibility?) */
521 php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
522 result = FAILURE;
523 continue;
524 } else {
525 mask[c]=1;
526 }
527 }
528 return result;
529 }
530 /* }}} */
531
532 /* {{{ php_trim_int()
533 * mode 1 : trim left
534 * mode 2 : trim right
535 * mode 3 : trim left and right
536 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
537 */
php_trim_int(zend_string * str,const char * what,size_t what_len,int mode)538 static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
539 {
540 const char *start = ZSTR_VAL(str);
541 const char *end = start + ZSTR_LEN(str);
542 char mask[256];
543
544 if (what) {
545 if (what_len == 1) {
546 char p = *what;
547 if (mode & 1) {
548 while (start != end) {
549 if (*start == p) {
550 start++;
551 } else {
552 break;
553 }
554 }
555 }
556 if (mode & 2) {
557 while (start != end) {
558 if (*(end-1) == p) {
559 end--;
560 } else {
561 break;
562 }
563 }
564 }
565 } else {
566 php_charmask((const unsigned char *) what, what_len, mask);
567
568 if (mode & 1) {
569 while (start != end) {
570 if (mask[(unsigned char)*start]) {
571 start++;
572 } else {
573 break;
574 }
575 }
576 }
577 if (mode & 2) {
578 while (start != end) {
579 if (mask[(unsigned char)*(end-1)]) {
580 end--;
581 } else {
582 break;
583 }
584 }
585 }
586 }
587 } else {
588 if (mode & 1) {
589 while (start != end) {
590 unsigned char c = (unsigned char)*start;
591
592 if (c <= ' ' &&
593 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
594 start++;
595 } else {
596 break;
597 }
598 }
599 }
600 if (mode & 2) {
601 while (start != end) {
602 unsigned char c = (unsigned char)*(end-1);
603
604 if (c <= ' ' &&
605 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
606 end--;
607 } else {
608 break;
609 }
610 }
611 }
612 }
613
614 if (ZSTR_LEN(str) == end - start) {
615 return zend_string_copy(str);
616 } else if (end - start == 0) {
617 return ZSTR_EMPTY_ALLOC();
618 } else {
619 return zend_string_init(start, end - start, 0);
620 }
621 }
622 /* }}} */
623
624 /* {{{ php_trim_int()
625 * mode 1 : trim left
626 * mode 2 : trim right
627 * mode 3 : trim left and right
628 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
629 */
php_trim(zend_string * str,const char * what,size_t what_len,int mode)630 PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
631 {
632 return php_trim_int(str, what, what_len, mode);
633 }
634 /* }}} */
635
636 /* {{{ php_do_trim
637 * Base for trim(), rtrim() and ltrim() functions.
638 */
php_do_trim(INTERNAL_FUNCTION_PARAMETERS,int mode)639 static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
640 {
641 zend_string *str;
642 zend_string *what = NULL;
643
644 ZEND_PARSE_PARAMETERS_START(1, 2)
645 Z_PARAM_STR(str)
646 Z_PARAM_OPTIONAL
647 Z_PARAM_STR(what)
648 ZEND_PARSE_PARAMETERS_END();
649
650 ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
651 }
652 /* }}} */
653
654 /* {{{ Strips whitespace from the beginning and end of a string */
PHP_FUNCTION(trim)655 PHP_FUNCTION(trim)
656 {
657 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
658 }
659 /* }}} */
660
661 ZEND_FRAMELESS_FUNCTION(trim, 1)
662 {
663 zval str_tmp;
664 zend_string *str;
665
666 Z_FLF_PARAM_STR(1, str, str_tmp);
667
668 ZVAL_STR(return_value, php_trim_int(str, /* what */ NULL, /* what_len */ 0, /* mode */ 3));
669
670 flf_clean:
671 Z_FLF_PARAM_FREE_STR(1, str_tmp);
672 }
673
674 ZEND_FRAMELESS_FUNCTION(trim, 2)
675 {
676 zval str_tmp, what_tmp;
677 zend_string *str, *what;
678
679 Z_FLF_PARAM_STR(1, str, str_tmp);
680 Z_FLF_PARAM_STR(2, what, what_tmp);
681
682 ZVAL_STR(return_value, php_trim_int(str, ZSTR_VAL(what), ZSTR_LEN(what), /* mode */ 3));
683
684 flf_clean:
685 Z_FLF_PARAM_FREE_STR(1, str_tmp);
686 Z_FLF_PARAM_FREE_STR(2, what_tmp);
687 }
688
689 /* {{{ Removes trailing whitespace */
PHP_FUNCTION(rtrim)690 PHP_FUNCTION(rtrim)
691 {
692 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
693 }
694 /* }}} */
695
696 /* {{{ Strips whitespace from the beginning of a string */
PHP_FUNCTION(ltrim)697 PHP_FUNCTION(ltrim)
698 {
699 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
700 }
701 /* }}} */
702
703 /* {{{ Wraps buffer to selected number of characters using string break char */
PHP_FUNCTION(wordwrap)704 PHP_FUNCTION(wordwrap)
705 {
706 zend_string *text;
707 char *breakchar = "\n";
708 size_t newtextlen, chk, breakchar_len = 1;
709 size_t alloced;
710 zend_long current = 0, laststart = 0, lastspace = 0;
711 zend_long linelength = 75;
712 bool docut = 0;
713 zend_string *newtext;
714
715 ZEND_PARSE_PARAMETERS_START(1, 4)
716 Z_PARAM_STR(text)
717 Z_PARAM_OPTIONAL
718 Z_PARAM_LONG(linelength)
719 Z_PARAM_STRING(breakchar, breakchar_len)
720 Z_PARAM_BOOL(docut)
721 ZEND_PARSE_PARAMETERS_END();
722
723 if (ZSTR_LEN(text) == 0) {
724 RETURN_EMPTY_STRING();
725 }
726
727 if (breakchar_len == 0) {
728 zend_argument_value_error(3, "cannot be empty");
729 RETURN_THROWS();
730 }
731
732 if (linelength == 0 && docut) {
733 zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
734 RETURN_THROWS();
735 }
736
737 /* Special case for a single-character break as it needs no
738 additional storage space */
739 if (breakchar_len == 1 && !docut) {
740 newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
741
742 laststart = lastspace = 0;
743 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
744 if (ZSTR_VAL(text)[current] == breakchar[0]) {
745 laststart = lastspace = current + 1;
746 } else if (ZSTR_VAL(text)[current] == ' ') {
747 if (current - laststart >= linelength) {
748 ZSTR_VAL(newtext)[current] = breakchar[0];
749 laststart = current + 1;
750 }
751 lastspace = current;
752 } else if (current - laststart >= linelength && laststart != lastspace) {
753 ZSTR_VAL(newtext)[lastspace] = breakchar[0];
754 laststart = lastspace + 1;
755 }
756 }
757
758 RETURN_NEW_STR(newtext);
759 } else {
760 /* Multiple character line break or forced cut */
761 if (linelength > 0) {
762 chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
763 newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
764 alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
765 } else {
766 chk = ZSTR_LEN(text);
767 alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
768 newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
769 }
770
771 /* now keep track of the actual new text length */
772 newtextlen = 0;
773
774 laststart = lastspace = 0;
775 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
776 if (chk == 0) {
777 alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
778 newtext = zend_string_extend(newtext, alloced, 0);
779 chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
780 }
781 /* when we hit an existing break, copy to new buffer, and
782 * fix up laststart and lastspace */
783 if (ZSTR_VAL(text)[current] == breakchar[0]
784 && current + breakchar_len < ZSTR_LEN(text)
785 && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
786 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
787 newtextlen += current - laststart + breakchar_len;
788 current += breakchar_len - 1;
789 laststart = lastspace = current + 1;
790 chk--;
791 }
792 /* if it is a space, check if it is at the line boundary,
793 * copy and insert a break, or just keep track of it */
794 else if (ZSTR_VAL(text)[current] == ' ') {
795 if (current - laststart >= linelength) {
796 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
797 newtextlen += current - laststart;
798 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
799 newtextlen += breakchar_len;
800 laststart = current + 1;
801 chk--;
802 }
803 lastspace = current;
804 }
805 /* if we are cutting, and we've accumulated enough
806 * characters, and we haven't see a space for this line,
807 * copy and insert a break. */
808 else if (current - laststart >= linelength
809 && docut && laststart >= lastspace) {
810 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
811 newtextlen += current - laststart;
812 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
813 newtextlen += breakchar_len;
814 laststart = lastspace = current;
815 chk--;
816 }
817 /* if the current word puts us over the linelength, copy
818 * back up until the last space, insert a break, and move
819 * up the laststart */
820 else if (current - laststart >= linelength
821 && laststart < lastspace) {
822 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
823 newtextlen += lastspace - laststart;
824 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
825 newtextlen += breakchar_len;
826 laststart = lastspace = lastspace + 1;
827 chk--;
828 }
829 }
830
831 /* copy over any stragglers */
832 if (laststart != current) {
833 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
834 newtextlen += current - laststart;
835 }
836
837 ZSTR_VAL(newtext)[newtextlen] = '\0';
838 /* free unused memory */
839 newtext = zend_string_truncate(newtext, newtextlen, 0);
840
841 RETURN_NEW_STR(newtext);
842 }
843 }
844 /* }}} */
845
846 /* {{{ php_explode */
php_explode(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)847 PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
848 {
849 const char *p1 = ZSTR_VAL(str);
850 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
851 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
852 zval tmp;
853
854 if (p2 == NULL) {
855 ZVAL_STR_COPY(&tmp, str);
856 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
857 } else {
858 zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
859 ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
860 do {
861 ZEND_HASH_FILL_GROW();
862 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
863 ZEND_HASH_FILL_NEXT();
864 p1 = p2 + ZSTR_LEN(delim);
865 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
866 } while (p2 != NULL && --limit > 1);
867
868 if (p1 <= endp) {
869 ZEND_HASH_FILL_GROW();
870 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
871 ZEND_HASH_FILL_NEXT();
872 }
873 } ZEND_HASH_FILL_END();
874 }
875 }
876 /* }}} */
877
878 /* {{{ php_explode_negative_limit */
php_explode_negative_limit(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)879 PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
880 {
881 #define EXPLODE_ALLOC_STEP 64
882 const char *p1 = ZSTR_VAL(str);
883 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
884 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
885 zval tmp;
886
887 if (p2 == NULL) {
888 /*
889 do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
890 by doing nothing we return empty array
891 */
892 } else {
893 size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
894 zend_long i, to_return;
895 const char **positions = emalloc(allocated * sizeof(char *));
896
897 positions[found++] = p1;
898 do {
899 if (found >= allocated) {
900 allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
901 positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
902 }
903 positions[found++] = p1 = p2 + ZSTR_LEN(delim);
904 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
905 } while (p2 != NULL);
906
907 to_return = limit + found;
908 /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
909 for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
910 ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
911 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
912 }
913 efree((void *)positions);
914 }
915 #undef EXPLODE_ALLOC_STEP
916 }
917 /* }}} */
918
919 /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
PHP_FUNCTION(explode)920 PHP_FUNCTION(explode)
921 {
922 zend_string *str, *delim;
923 zend_long limit = ZEND_LONG_MAX; /* No limit */
924 zval tmp;
925
926 ZEND_PARSE_PARAMETERS_START(2, 3)
927 Z_PARAM_STR(delim)
928 Z_PARAM_STR(str)
929 Z_PARAM_OPTIONAL
930 Z_PARAM_LONG(limit)
931 ZEND_PARSE_PARAMETERS_END();
932
933 if (ZSTR_LEN(delim) == 0) {
934 zend_argument_value_error(1, "cannot be empty");
935 RETURN_THROWS();
936 }
937
938 array_init(return_value);
939
940 if (ZSTR_LEN(str) == 0) {
941 if (limit >= 0) {
942 ZVAL_EMPTY_STRING(&tmp);
943 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
944 }
945 return;
946 }
947
948 if (limit > 1) {
949 php_explode(delim, str, return_value, limit);
950 } else if (limit < 0) {
951 php_explode_negative_limit(delim, str, return_value, limit);
952 } else {
953 ZVAL_STR_COPY(&tmp, str);
954 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
955 }
956 }
957 /* }}} */
958
959 /* {{{ php_implode */
php_implode(const zend_string * glue,HashTable * pieces,zval * return_value)960 PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
961 {
962 zval *tmp;
963 uint32_t numelems;
964 zend_string *str;
965 char *cptr;
966 size_t len = 0;
967 struct {
968 zend_string *str;
969 zend_long lval;
970 } *strings, *ptr;
971 ALLOCA_FLAG(use_heap)
972
973 numelems = zend_hash_num_elements(pieces);
974
975 if (numelems == 0) {
976 RETURN_EMPTY_STRING();
977 } else if (numelems == 1) {
978 /* loop to search the first not undefined element... */
979 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
980 RETURN_STR(zval_get_string(tmp));
981 } ZEND_HASH_FOREACH_END();
982 }
983
984 ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
985
986 uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(glue);
987
988 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
989 if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
990 ptr->str = Z_STR_P(tmp);
991 len += ZSTR_LEN(ptr->str);
992 ptr->lval = 0;
993 flags &= ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(ptr->str);
994 ptr++;
995 } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
996 zend_long val = Z_LVAL_P(tmp);
997
998 ptr->str = NULL;
999 ptr->lval = val;
1000 ptr++;
1001 if (val <= 0) {
1002 len++;
1003 }
1004 while (val) {
1005 val /= 10;
1006 len++;
1007 }
1008 } else {
1009 ptr->str = zval_get_string_func(tmp);
1010 len += ZSTR_LEN(ptr->str);
1011 ptr->lval = 1;
1012 flags &= ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(ptr->str);
1013 ptr++;
1014 }
1015 } ZEND_HASH_FOREACH_END();
1016
1017 /* numelems cannot be 0, we checked above */
1018 str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
1019 GC_ADD_FLAGS(str, flags);
1020 cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
1021 *cptr = 0;
1022
1023 while (1) {
1024 ptr--;
1025 if (EXPECTED(ptr->str)) {
1026 cptr -= ZSTR_LEN(ptr->str);
1027 memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
1028 if (ptr->lval) {
1029 zend_string_release_ex(ptr->str, 0);
1030 }
1031 } else {
1032 char *oldPtr = cptr;
1033 char oldVal = *cptr;
1034 cptr = zend_print_long_to_buf(cptr, ptr->lval);
1035 *oldPtr = oldVal;
1036 }
1037
1038 if (ptr == strings) {
1039 break;
1040 }
1041
1042 cptr -= ZSTR_LEN(glue);
1043 memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
1044 }
1045
1046 free_alloca(strings, use_heap);
1047 RETURN_NEW_STR(str);
1048 }
1049 /* }}} */
1050
1051 /* {{{ Joins array elements placing glue string between items and return one string */
PHP_FUNCTION(implode)1052 PHP_FUNCTION(implode)
1053 {
1054 zend_string *arg1_str = NULL;
1055 HashTable *arg1_array = NULL;
1056 zend_array *pieces = NULL;
1057
1058 ZEND_PARSE_PARAMETERS_START(1, 2)
1059 Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
1060 Z_PARAM_OPTIONAL
1061 Z_PARAM_ARRAY_HT_OR_NULL(pieces)
1062 ZEND_PARSE_PARAMETERS_END();
1063
1064 if (pieces == NULL) {
1065 if (arg1_array == NULL) {
1066 zend_type_error(
1067 "%s(): If argument #1 ($separator) is of type string, "
1068 "argument #2 ($array) must be of type array, null given",
1069 get_active_function_name()
1070 );
1071 RETURN_THROWS();
1072 }
1073
1074 arg1_str = ZSTR_EMPTY_ALLOC();
1075 pieces = arg1_array;
1076 } else {
1077 if (arg1_str == NULL) {
1078 zend_argument_type_error(1, "must be of type string, array given");
1079 RETURN_THROWS();
1080 }
1081 }
1082
1083 php_implode(arg1_str, pieces, return_value);
1084 }
1085 /* }}} */
1086
1087 ZEND_FRAMELESS_FUNCTION(implode, 1)
1088 {
1089 zval *pieces;
1090
1091 /* Manual parsing for more accurate error message. */
1092 if (!zend_parse_arg_array(arg1, &pieces, /* null_check */ false, /* or_object */ false)) { \
1093 zend_type_error(
1094 "%s(): If argument #1 ($separator) is of type string, "
1095 "argument #2 ($array) must be of type array, null given",
1096 get_active_function_name()
1097 );
1098 goto flf_clean; \
1099 }
1100
1101 zend_string *str = ZSTR_EMPTY_ALLOC();
1102
1103 php_implode(str, Z_ARR_P(pieces), return_value);
1104
1105 flf_clean:;
1106 }
1107
1108 ZEND_FRAMELESS_FUNCTION(implode, 2)
1109 {
1110 zval str_tmp;
1111 zend_string *str;
1112 zval *pieces;
1113
1114 Z_FLF_PARAM_STR(1, str, str_tmp);
1115 Z_FLF_PARAM_ARRAY_OR_NULL(2, pieces);
1116
1117 if (!pieces) {
1118 zend_type_error(
1119 "%s(): If argument #1 ($separator) is of type string, "
1120 "argument #2 ($array) must be of type array, null given",
1121 get_active_function_name()
1122 );
1123 goto flf_clean;
1124 }
1125
1126 php_implode(str, Z_ARR_P(pieces), return_value);
1127
1128 flf_clean:;
1129 Z_FLF_PARAM_FREE_STR(1, str_tmp);
1130 }
1131
1132 #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
1133
1134 /* {{{ Tokenize a string */
PHP_FUNCTION(strtok)1135 PHP_FUNCTION(strtok)
1136 {
1137 zend_string *str, *tok = NULL;
1138 char *token;
1139 char *token_end;
1140 char *p;
1141 char *pe;
1142 size_t skipped = 0;
1143
1144 ZEND_PARSE_PARAMETERS_START(1, 2)
1145 Z_PARAM_STR(str)
1146 Z_PARAM_OPTIONAL
1147 Z_PARAM_STR_OR_NULL(tok)
1148 ZEND_PARSE_PARAMETERS_END();
1149
1150 if (!tok) {
1151 tok = str;
1152 } else {
1153 if (BG(strtok_string)) {
1154 zend_string_release(BG(strtok_string));
1155 }
1156 BG(strtok_string) = zend_string_copy(str);
1157 BG(strtok_last) = ZSTR_VAL(str);
1158 BG(strtok_len) = ZSTR_LEN(str);
1159 }
1160
1161 if (!BG(strtok_string)) {
1162 /* String to tokenize not set. */
1163 php_error_docref(NULL, E_WARNING, "Both arguments must be provided when starting tokenization");
1164 RETURN_FALSE;
1165 }
1166
1167 p = BG(strtok_last); /* Where we start to search */
1168 pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
1169 if (p >= pe) {
1170 /* Reached the end of the string. */
1171 RETURN_FALSE;
1172 }
1173
1174 token = ZSTR_VAL(tok);
1175 token_end = token + ZSTR_LEN(tok);
1176
1177 while (token < token_end) {
1178 STRTOK_TABLE(token++) = 1;
1179 }
1180
1181 /* Skip leading delimiters */
1182 while (STRTOK_TABLE(p)) {
1183 if (++p >= pe) {
1184 /* no other chars left */
1185 goto return_false;
1186 }
1187 skipped++;
1188 }
1189
1190 /* We know at this place that *p is no delimiter, so skip it */
1191 while (++p < pe) {
1192 if (STRTOK_TABLE(p)) {
1193 goto return_token;
1194 }
1195 }
1196
1197 if (p - BG(strtok_last)) {
1198 return_token:
1199 RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
1200 BG(strtok_last) = p + 1;
1201 } else {
1202 return_false:
1203 RETVAL_FALSE;
1204 zend_string_release(BG(strtok_string));
1205 BG(strtok_string) = NULL;
1206 }
1207
1208 /* Restore table -- usually faster then memset'ing the table on every invocation */
1209 token = ZSTR_VAL(tok);
1210 while (token < token_end) {
1211 STRTOK_TABLE(token++) = 0;
1212 }
1213 }
1214 /* }}} */
1215
1216 /* {{{ php_strtoupper */
php_strtoupper(char * s,size_t len)1217 PHPAPI char *php_strtoupper(char *s, size_t len)
1218 {
1219 zend_str_toupper(s, len);
1220 return s;
1221 }
1222 /* }}} */
1223
1224 /* {{{ php_string_toupper */
php_string_toupper(zend_string * s)1225 PHPAPI zend_string *php_string_toupper(zend_string *s)
1226 {
1227 return zend_string_toupper(s);
1228 }
1229 /* }}} */
1230
1231 /* {{{ Makes a string uppercase */
PHP_FUNCTION(strtoupper)1232 PHP_FUNCTION(strtoupper)
1233 {
1234 zend_string *arg;
1235
1236 ZEND_PARSE_PARAMETERS_START(1, 1)
1237 Z_PARAM_STR(arg)
1238 ZEND_PARSE_PARAMETERS_END();
1239
1240 RETURN_STR(zend_string_toupper(arg));
1241 }
1242 /* }}} */
1243
1244 /* {{{ php_strtolower */
php_strtolower(char * s,size_t len)1245 PHPAPI char *php_strtolower(char *s, size_t len)
1246 {
1247 zend_str_tolower(s, len);
1248 return s;
1249 }
1250 /* }}} */
1251
1252 /* {{{ php_string_tolower */
php_string_tolower(zend_string * s)1253 PHPAPI zend_string *php_string_tolower(zend_string *s)
1254 {
1255 return zend_string_tolower(s);
1256 }
1257 /* }}} */
1258
1259 /* {{{ Makes a string lowercase */
PHP_FUNCTION(strtolower)1260 PHP_FUNCTION(strtolower)
1261 {
1262 zend_string *str;
1263
1264 ZEND_PARSE_PARAMETERS_START(1, 1)
1265 Z_PARAM_STR(str)
1266 ZEND_PARSE_PARAMETERS_END();
1267
1268 RETURN_STR(zend_string_tolower(str));
1269 }
1270 /* }}} */
1271
PHP_FUNCTION(str_increment)1272 PHP_FUNCTION(str_increment)
1273 {
1274 zend_string *str;
1275
1276 ZEND_PARSE_PARAMETERS_START(1, 1)
1277 Z_PARAM_STR(str)
1278 ZEND_PARSE_PARAMETERS_END();
1279
1280 if (ZSTR_LEN(str) == 0) {
1281 zend_argument_value_error(1, "cannot be empty");
1282 RETURN_THROWS();
1283 }
1284 if (!zend_string_only_has_ascii_alphanumeric(str)) {
1285 zend_argument_value_error(1, "must be composed only of alphanumeric ASCII characters");
1286 RETURN_THROWS();
1287 }
1288
1289 zend_string *incremented = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), /* persistent */ false);
1290 size_t position = ZSTR_LEN(str)-1;
1291 bool carry = false;
1292
1293 do {
1294 char c = ZSTR_VAL(incremented)[position];
1295 /* We know c is in ['a', 'z'], ['A', 'Z'], or ['0', '9'] range from zend_string_only_has_ascii_alphanumeric() */
1296 if (EXPECTED( c != 'z' && c != 'Z' && c != '9' )) {
1297 carry = false;
1298 ZSTR_VAL(incremented)[position]++;
1299 } else { /* if 'z', 'Z', or '9' */
1300 carry = true;
1301 if (c == '9') {
1302 ZSTR_VAL(incremented)[position] = '0';
1303 } else {
1304 ZSTR_VAL(incremented)[position] -= 25;
1305 }
1306 }
1307 } while (carry && position-- > 0);
1308
1309 if (UNEXPECTED(carry)) {
1310 zend_string *tmp = zend_string_alloc(ZSTR_LEN(incremented)+1, 0);
1311 memcpy(ZSTR_VAL(tmp) + 1, ZSTR_VAL(incremented), ZSTR_LEN(incremented));
1312 ZSTR_VAL(tmp)[ZSTR_LEN(incremented)+1] = '\0';
1313 switch (ZSTR_VAL(incremented)[0]) {
1314 case '0':
1315 ZSTR_VAL(tmp)[0] = '1';
1316 break;
1317 default:
1318 ZSTR_VAL(tmp)[0] = ZSTR_VAL(incremented)[0];
1319 break;
1320 }
1321 zend_string_release_ex(incremented, /* persistent */ false);
1322 RETURN_STR(tmp);
1323 }
1324 RETURN_STR(incremented);
1325 }
1326
1327
PHP_FUNCTION(str_decrement)1328 PHP_FUNCTION(str_decrement)
1329 {
1330 zend_string *str;
1331
1332 ZEND_PARSE_PARAMETERS_START(1, 1)
1333 Z_PARAM_STR(str)
1334 ZEND_PARSE_PARAMETERS_END();
1335
1336 if (ZSTR_LEN(str) == 0) {
1337 zend_argument_value_error(1, "cannot be empty");
1338 RETURN_THROWS();
1339 }
1340 if (!zend_string_only_has_ascii_alphanumeric(str)) {
1341 zend_argument_value_error(1, "must be composed only of alphanumeric ASCII characters");
1342 RETURN_THROWS();
1343 }
1344 if (ZSTR_LEN(str) >= 1 && ZSTR_VAL(str)[0] == '0') {
1345 zend_argument_value_error(1, "\"%s\" is out of decrement range", ZSTR_VAL(str));
1346 RETURN_THROWS();
1347 }
1348
1349 zend_string *decremented = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), /* persistent */ false);
1350 size_t position = ZSTR_LEN(str)-1;
1351 bool carry = false;
1352
1353 do {
1354 char c = ZSTR_VAL(decremented)[position];
1355 /* We know c is in ['a', 'z'], ['A', 'Z'], or ['0', '9'] range from zend_string_only_has_ascii_alphanumeric() */
1356 if (EXPECTED( c != 'a' && c != 'A' && c != '0' )) {
1357 carry = false;
1358 ZSTR_VAL(decremented)[position]--;
1359 } else { /* if 'a', 'A', or '0' */
1360 carry = true;
1361 if (c == '0') {
1362 ZSTR_VAL(decremented)[position] = '9';
1363 } else {
1364 ZSTR_VAL(decremented)[position] += 25;
1365 }
1366 }
1367 } while (carry && position-- > 0);
1368
1369 if (UNEXPECTED(carry || (ZSTR_VAL(decremented)[0] == '0' && ZSTR_LEN(decremented) > 1))) {
1370 if (ZSTR_LEN(decremented) == 1) {
1371 zend_string_release_ex(decremented, /* persistent */ false);
1372 zend_argument_value_error(1, "\"%s\" is out of decrement range", ZSTR_VAL(str));
1373 RETURN_THROWS();
1374 }
1375 zend_string *tmp = zend_string_alloc(ZSTR_LEN(decremented) - 1, 0);
1376 memcpy(ZSTR_VAL(tmp), ZSTR_VAL(decremented) + 1, ZSTR_LEN(decremented) - 1);
1377 ZSTR_VAL(tmp)[ZSTR_LEN(decremented) - 1] = '\0';
1378 zend_string_release_ex(decremented, /* persistent */ false);
1379 RETURN_STR(tmp);
1380 }
1381 RETURN_STR(decremented);
1382 }
1383
1384 #if defined(PHP_WIN32)
_is_basename_start(const char * start,const char * pos)1385 static bool _is_basename_start(const char *start, const char *pos)
1386 {
1387 if (pos - start >= 1
1388 && *(pos-1) != '/'
1389 && *(pos-1) != '\\') {
1390 if (pos - start == 1) {
1391 return 1;
1392 } else if (*(pos-2) == '/' || *(pos-2) == '\\') {
1393 return 1;
1394 } else if (*(pos-2) == ':'
1395 && _is_basename_start(start, pos - 2)) {
1396 return 1;
1397 }
1398 }
1399 return 0;
1400 }
1401 #endif
1402
1403 /* {{{ php_basename */
php_basename(const char * s,size_t len,const char * suffix,size_t suffix_len)1404 PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
1405 {
1406 const char *basename_start;
1407 const char *basename_end;
1408
1409 if (CG(ascii_compatible_locale)) {
1410 basename_end = s + len - 1;
1411
1412 /* Strip trailing slashes */
1413 while (basename_end >= s
1414 #ifdef PHP_WIN32
1415 && (*basename_end == '/'
1416 || *basename_end == '\\'
1417 || (*basename_end == ':'
1418 && _is_basename_start(s, basename_end)))) {
1419 #else
1420 && *basename_end == '/') {
1421 #endif
1422 basename_end--;
1423 }
1424 if (basename_end < s) {
1425 return ZSTR_EMPTY_ALLOC();
1426 }
1427
1428 /* Extract filename */
1429 basename_start = basename_end;
1430 basename_end++;
1431 while (basename_start > s
1432 #ifdef PHP_WIN32
1433 && *(basename_start-1) != '/'
1434 && *(basename_start-1) != '\\') {
1435
1436 if (*(basename_start-1) == ':' &&
1437 _is_basename_start(s, basename_start - 1)) {
1438 break;
1439 }
1440 #else
1441 && *(basename_start-1) != '/') {
1442 #endif
1443 basename_start--;
1444 }
1445 } else {
1446 /* State 0 is directly after a directory separator (or at the start of the string).
1447 * State 1 is everything else. */
1448 int state = 0;
1449
1450 basename_start = s;
1451 basename_end = s;
1452 while (len > 0) {
1453 int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
1454
1455 switch (inc_len) {
1456 case 0:
1457 goto quit_loop;
1458 case 1:
1459 #ifdef PHP_WIN32
1460 if (*s == '/' || *s == '\\') {
1461 #else
1462 if (*s == '/') {
1463 #endif
1464 if (state == 1) {
1465 state = 0;
1466 basename_end = s;
1467 }
1468 #ifdef PHP_WIN32
1469 /* Catch relative paths in c:file.txt style. They're not to confuse
1470 with the NTFS streams. This part ensures also, that no drive
1471 letter traversing happens. */
1472 } else if ((*s == ':' && (s - basename_start == 1))) {
1473 if (state == 0) {
1474 basename_start = s;
1475 state = 1;
1476 } else {
1477 basename_end = s;
1478 state = 0;
1479 }
1480 #endif
1481 } else {
1482 if (state == 0) {
1483 basename_start = s;
1484 state = 1;
1485 }
1486 }
1487 break;
1488 default:
1489 if (inc_len < 0) {
1490 /* If character is invalid, treat it like other non-significant characters. */
1491 inc_len = 1;
1492 php_mb_reset();
1493 }
1494 if (state == 0) {
1495 basename_start = s;
1496 state = 1;
1497 }
1498 break;
1499 }
1500 s += inc_len;
1501 len -= inc_len;
1502 }
1503
1504 quit_loop:
1505 if (state == 1) {
1506 basename_end = s;
1507 }
1508 }
1509
1510 if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
1511 memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
1512 basename_end -= suffix_len;
1513 }
1514
1515 return zend_string_init(basename_start, basename_end - basename_start, 0);
1516 }
1517 /* }}} */
1518
1519 /* {{{ Returns the filename component of the path */
1520 PHP_FUNCTION(basename)
1521 {
1522 char *string, *suffix = NULL;
1523 size_t string_len, suffix_len = 0;
1524
1525 ZEND_PARSE_PARAMETERS_START(1, 2)
1526 Z_PARAM_STRING(string, string_len)
1527 Z_PARAM_OPTIONAL
1528 Z_PARAM_STRING(suffix, suffix_len)
1529 ZEND_PARSE_PARAMETERS_END();
1530
1531 RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
1532 }
1533 /* }}} */
1534
1535 /* {{{ php_dirname
1536 Returns directory name component of path */
1537 PHPAPI size_t php_dirname(char *path, size_t len)
1538 {
1539 return zend_dirname(path, len);
1540 }
1541 /* }}} */
1542
1543 static inline void _zend_dirname(zval *return_value, zend_string *str, zend_long levels)
1544 {
1545 zend_string *ret;
1546
1547 ret = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
1548
1549 if (levels == 1) {
1550 /* Default case */
1551 #ifdef PHP_WIN32
1552 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), ZSTR_LEN(str));
1553 #else
1554 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), ZSTR_LEN(str));
1555 #endif
1556 } else if (levels < 1) {
1557 zend_argument_value_error(2, "must be greater than or equal to 1");
1558 zend_string_efree(ret);
1559 RETURN_THROWS();
1560 } else {
1561 /* Some levels up */
1562 size_t str_len;
1563 do {
1564 #ifdef PHP_WIN32
1565 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1566 #else
1567 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1568 #endif
1569 } while (ZSTR_LEN(ret) < str_len && --levels);
1570 }
1571
1572 RETURN_NEW_STR(ret);
1573 }
1574
1575 /* {{{ Returns the directory name component of the path */
1576 PHP_FUNCTION(dirname)
1577 {
1578 zend_string *str;
1579 zend_long levels = 1;
1580
1581 ZEND_PARSE_PARAMETERS_START(1, 2)
1582 Z_PARAM_STR(str)
1583 Z_PARAM_OPTIONAL
1584 Z_PARAM_LONG(levels)
1585 ZEND_PARSE_PARAMETERS_END();
1586
1587 _zend_dirname(return_value, str, levels);
1588 }
1589 /* }}} */
1590
1591 ZEND_FRAMELESS_FUNCTION(dirname, 1)
1592 {
1593 zval str_tmp;
1594 zend_string *str;
1595
1596 Z_FLF_PARAM_STR(1, str, str_tmp);
1597
1598 _zend_dirname(return_value, str, 1);
1599
1600 flf_clean:
1601 Z_FLF_PARAM_FREE_STR(1, str_tmp);
1602 }
1603
1604 ZEND_FRAMELESS_FUNCTION(dirname, 2)
1605 {
1606 zval str_tmp;
1607 zend_string *str;
1608 zend_long levels;
1609
1610 Z_FLF_PARAM_STR(1, str, str_tmp);
1611 Z_FLF_PARAM_LONG(2, levels);
1612
1613 _zend_dirname(return_value, str, levels);
1614
1615 flf_clean:
1616 Z_FLF_PARAM_FREE_STR(1, str_tmp);
1617 }
1618
1619 /* {{{ Returns information about a certain string */
1620 PHP_FUNCTION(pathinfo)
1621 {
1622 zval tmp;
1623 char *path, *dirname;
1624 size_t path_len;
1625 bool have_basename;
1626 zend_long opt = PHP_PATHINFO_ALL;
1627 zend_string *ret = NULL;
1628
1629 ZEND_PARSE_PARAMETERS_START(1, 2)
1630 Z_PARAM_STRING(path, path_len)
1631 Z_PARAM_OPTIONAL
1632 Z_PARAM_LONG(opt)
1633 ZEND_PARSE_PARAMETERS_END();
1634
1635 have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
1636
1637 array_init(&tmp);
1638
1639 if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
1640 dirname = estrndup(path, path_len);
1641 php_dirname(dirname, path_len);
1642 if (*dirname) {
1643 add_assoc_string(&tmp, "dirname", dirname);
1644 }
1645 efree(dirname);
1646 }
1647
1648 if (have_basename) {
1649 ret = php_basename(path, path_len, NULL, 0);
1650 add_assoc_str(&tmp, "basename", zend_string_copy(ret));
1651 }
1652
1653 if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
1654 const char *p;
1655 ptrdiff_t idx;
1656
1657 if (!have_basename) {
1658 ret = php_basename(path, path_len, NULL, 0);
1659 }
1660
1661 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1662
1663 if (p) {
1664 idx = p - ZSTR_VAL(ret);
1665 add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
1666 }
1667 }
1668
1669 if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
1670 const char *p;
1671 ptrdiff_t idx;
1672
1673 /* Have we already looked up the basename? */
1674 if (!have_basename && !ret) {
1675 ret = php_basename(path, path_len, NULL, 0);
1676 }
1677
1678 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1679
1680 idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
1681 add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
1682 }
1683
1684 if (ret) {
1685 zend_string_release_ex(ret, 0);
1686 }
1687
1688 if (opt == PHP_PATHINFO_ALL) {
1689 RETURN_COPY_VALUE(&tmp);
1690 } else {
1691 zval *element;
1692 if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
1693 RETVAL_COPY_DEREF(element);
1694 } else {
1695 RETVAL_EMPTY_STRING();
1696 }
1697 zval_ptr_dtor(&tmp);
1698 }
1699 }
1700 /* }}} */
1701
1702 /* {{{ php_stristr
1703 case insensitive strstr */
1704 PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
1705 {
1706 return (char*)php_memnistr(s, t, t_len, s + s_len);
1707 }
1708 /* }}} */
1709
1710 static size_t php_strspn_strcspn_common(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end, bool must_match)
1711 {
1712 /* Fast path for short strings.
1713 * The table lookup cannot be faster in this case because we not only have to compare, but also build the table.
1714 * We only compare in this case.
1715 * Empirically tested that the table lookup approach is only beneficial if characters is longer than 1 character. */
1716 if (characters_end - characters == 1) {
1717 const char *ptr = haystack;
1718 while (ptr < haystack_end && (*ptr == *characters) == must_match) {
1719 ptr++;
1720 }
1721 return ptr - haystack;
1722 }
1723
1724 /* Every character in characters will set a boolean in this lookup table.
1725 * We'll use the lookup table as a fast lookup for the characters in characters while looping over haystack. */
1726 bool table[256];
1727 /* Use multiple small memsets to inline the memset with intrinsics, trick learned from glibc. */
1728 memset(table, 0, 64);
1729 memset(table + 64, 0, 64);
1730 memset(table + 128, 0, 64);
1731 memset(table + 192, 0, 64);
1732
1733 while (characters < characters_end) {
1734 table[(unsigned char) *characters] = true;
1735 characters++;
1736 }
1737
1738 const char *ptr = haystack;
1739 while (ptr < haystack_end && table[(unsigned char) *ptr] == must_match) {
1740 ptr++;
1741 }
1742
1743 return ptr - haystack;
1744 }
1745
1746 /* {{{ php_strspn */
1747 PHPAPI size_t php_strspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end)
1748 {
1749 return php_strspn_strcspn_common(haystack, characters, haystack_end, characters_end, true);
1750 }
1751 /* }}} */
1752
1753 /* {{{ php_strcspn */
1754 PHPAPI size_t php_strcspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end)
1755 {
1756 return php_strspn_strcspn_common(haystack, characters, haystack_end, characters_end, false);
1757 }
1758 /* }}} */
1759
1760 /* {{{ Finds first occurrence of a string within another, case insensitive */
1761 PHP_FUNCTION(stristr)
1762 {
1763 zend_string *haystack, *needle;
1764 const char *found = NULL;
1765 size_t found_offset;
1766 bool part = 0;
1767
1768 ZEND_PARSE_PARAMETERS_START(2, 3)
1769 Z_PARAM_STR(haystack)
1770 Z_PARAM_STR(needle)
1771 Z_PARAM_OPTIONAL
1772 Z_PARAM_BOOL(part)
1773 ZEND_PARSE_PARAMETERS_END();
1774
1775 found = php_stristr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle));
1776
1777 if (UNEXPECTED(!found)) {
1778 RETURN_FALSE;
1779 }
1780 found_offset = found - ZSTR_VAL(haystack);
1781 if (part) {
1782 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1783 }
1784 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1785 }
1786 /* }}} */
1787
1788 static inline void _zend_strstr(zval *return_value, zend_string *haystack, zend_string *needle, bool part)
1789 {
1790 const char *found = NULL;
1791 zend_long found_offset;
1792
1793 found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1794
1795 if (UNEXPECTED(!found)) {
1796 RETURN_FALSE;
1797 }
1798 found_offset = found - ZSTR_VAL(haystack);
1799 if (part) {
1800 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1801 }
1802 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1803 }
1804
1805 /* {{{ Finds first occurrence of a string within another */
1806 PHP_FUNCTION(strstr)
1807 {
1808 zend_string *haystack, *needle;
1809 bool part = 0;
1810
1811 ZEND_PARSE_PARAMETERS_START(2, 3)
1812 Z_PARAM_STR(haystack)
1813 Z_PARAM_STR(needle)
1814 Z_PARAM_OPTIONAL
1815 Z_PARAM_BOOL(part)
1816 ZEND_PARSE_PARAMETERS_END();
1817
1818 _zend_strstr(return_value, haystack, needle, part);
1819 }
1820 /* }}} */
1821
1822 ZEND_FRAMELESS_FUNCTION(strstr, 2)
1823 {
1824 zval haystack_tmp, needle_tmp;
1825 zend_string *haystack, *needle;
1826
1827 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1828 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1829
1830 _zend_strstr(return_value, haystack, needle, /* part */ false);
1831
1832 flf_clean:
1833 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1834 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1835 }
1836
1837 ZEND_FRAMELESS_FUNCTION(strstr, 3)
1838 {
1839 zval haystack_tmp, needle_tmp;
1840 zend_string *haystack, *needle;
1841 bool part;
1842
1843 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1844 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1845 Z_FLF_PARAM_BOOL(3, part);
1846
1847 _zend_strstr(return_value, haystack, needle, part);
1848
1849 flf_clean:
1850 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1851 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1852 }
1853
1854 /* {{{ Checks if a string contains another */
1855 PHP_FUNCTION(str_contains)
1856 {
1857 zend_string *haystack, *needle;
1858
1859 ZEND_PARSE_PARAMETERS_START(2, 2)
1860 Z_PARAM_STR(haystack)
1861 Z_PARAM_STR(needle)
1862 ZEND_PARSE_PARAMETERS_END();
1863
1864 RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1865 }
1866 /* }}} */
1867
1868 ZEND_FRAMELESS_FUNCTION(str_contains, 2)
1869 {
1870 zval haystack_tmp, needle_tmp;
1871 zend_string *haystack, *needle;
1872
1873 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1874 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1875
1876 RETVAL_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1877
1878 flf_clean:
1879 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1880 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1881 }
1882
1883 /* {{{ Checks if haystack starts with needle */
1884 PHP_FUNCTION(str_starts_with)
1885 {
1886 zend_string *haystack, *needle;
1887
1888 ZEND_PARSE_PARAMETERS_START(2, 2)
1889 Z_PARAM_STR(haystack)
1890 Z_PARAM_STR(needle)
1891 ZEND_PARSE_PARAMETERS_END();
1892
1893 RETURN_BOOL(zend_string_starts_with(haystack, needle));
1894 }
1895 /* }}} */
1896
1897 ZEND_FRAMELESS_FUNCTION(str_starts_with, 2)
1898 {
1899 zval haystack_tmp, needle_tmp;
1900 zend_string *haystack, *needle;
1901
1902 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1903 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1904
1905 RETVAL_BOOL(zend_string_starts_with(haystack, needle));
1906
1907 flf_clean:
1908 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1909 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1910 }
1911
1912 /* {{{ Checks if haystack ends with needle */
1913 PHP_FUNCTION(str_ends_with)
1914 {
1915 zend_string *haystack, *needle;
1916
1917 ZEND_PARSE_PARAMETERS_START(2, 2)
1918 Z_PARAM_STR(haystack)
1919 Z_PARAM_STR(needle)
1920 ZEND_PARSE_PARAMETERS_END();
1921
1922 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1923 RETURN_FALSE;
1924 }
1925
1926 RETURN_BOOL(memcmp(
1927 ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
1928 ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1929 }
1930 /* }}} */
1931
1932 static inline void _zend_strpos(zval *return_value, zend_string *haystack, zend_string *needle, zend_long offset)
1933 {
1934 const char *found = NULL;
1935
1936 if (offset < 0) {
1937 offset += (zend_long)ZSTR_LEN(haystack);
1938 }
1939 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1940 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1941 RETURN_THROWS();
1942 }
1943
1944 found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
1945 ZSTR_VAL(needle), ZSTR_LEN(needle),
1946 ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1947
1948 if (UNEXPECTED(!found)) {
1949 RETURN_FALSE;
1950 }
1951 RETURN_LONG(found - ZSTR_VAL(haystack));
1952 }
1953
1954 /* {{{ Finds position of first occurrence of a string within another */
1955 PHP_FUNCTION(strpos)
1956 {
1957 zend_string *haystack, *needle;
1958 zend_long offset = 0;
1959
1960 ZEND_PARSE_PARAMETERS_START(2, 3)
1961 Z_PARAM_STR(haystack)
1962 Z_PARAM_STR(needle)
1963 Z_PARAM_OPTIONAL
1964 Z_PARAM_LONG(offset)
1965 ZEND_PARSE_PARAMETERS_END();
1966
1967 _zend_strpos(return_value, haystack, needle, offset);
1968 }
1969 /* }}} */
1970
1971 ZEND_FRAMELESS_FUNCTION(strpos, 2)
1972 {
1973 zval haystack_tmp, needle_tmp;
1974 zend_string *haystack, *needle;
1975
1976 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1977 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1978
1979 _zend_strpos(return_value, haystack, needle, 0);
1980
1981 flf_clean:
1982 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1983 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1984 }
1985
1986 ZEND_FRAMELESS_FUNCTION(strpos, 3)
1987 {
1988 zval haystack_tmp, needle_tmp;
1989 zend_string *haystack, *needle;
1990 zend_long offset;
1991
1992 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1993 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1994 Z_FLF_PARAM_LONG(3, offset);
1995
1996 _zend_strpos(return_value, haystack, needle, offset);
1997
1998 flf_clean:
1999 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
2000 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
2001 }
2002
2003 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
2004 PHP_FUNCTION(stripos)
2005 {
2006 const char *found = NULL;
2007 zend_string *haystack, *needle;
2008 zend_long offset = 0;
2009
2010 ZEND_PARSE_PARAMETERS_START(2, 3)
2011 Z_PARAM_STR(haystack)
2012 Z_PARAM_STR(needle)
2013 Z_PARAM_OPTIONAL
2014 Z_PARAM_LONG(offset)
2015 ZEND_PARSE_PARAMETERS_END();
2016
2017 if (offset < 0) {
2018 offset += (zend_long)ZSTR_LEN(haystack);
2019 }
2020 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
2021 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2022 RETURN_THROWS();
2023 }
2024
2025 found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset,
2026 ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
2027
2028 if (UNEXPECTED(!found)) {
2029 RETURN_FALSE;
2030 }
2031 RETURN_LONG(found - ZSTR_VAL(haystack));
2032 }
2033 /* }}} */
2034
2035 /* {{{ Finds position of last occurrence of a string within another string */
2036 PHP_FUNCTION(strrpos)
2037 {
2038 zend_string *needle;
2039 zend_string *haystack;
2040 zend_long offset = 0;
2041 const char *p, *e, *found;
2042
2043 ZEND_PARSE_PARAMETERS_START(2, 3)
2044 Z_PARAM_STR(haystack)
2045 Z_PARAM_STR(needle)
2046 Z_PARAM_OPTIONAL
2047 Z_PARAM_LONG(offset)
2048 ZEND_PARSE_PARAMETERS_END();
2049
2050 if (offset >= 0) {
2051 if ((size_t)offset > ZSTR_LEN(haystack)) {
2052 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2053 RETURN_THROWS();
2054 }
2055 p = ZSTR_VAL(haystack) + (size_t)offset;
2056 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2057 } else {
2058 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2059 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2060 RETURN_THROWS();
2061 }
2062
2063 p = ZSTR_VAL(haystack);
2064 if ((size_t)-offset < ZSTR_LEN(needle)) {
2065 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2066 } else {
2067 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
2068 }
2069 }
2070
2071 found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e);
2072
2073 if (UNEXPECTED(!found)) {
2074 RETURN_FALSE;
2075 }
2076 RETURN_LONG(found - ZSTR_VAL(haystack));
2077 }
2078 /* }}} */
2079
2080 /* {{{ Finds position of last occurrence of a string within another string */
2081 PHP_FUNCTION(strripos)
2082 {
2083 zend_string *needle;
2084 zend_string *haystack;
2085 zend_long offset = 0;
2086 const char *p, *e, *found;
2087 zend_string *needle_dup, *haystack_dup;
2088
2089 ZEND_PARSE_PARAMETERS_START(2, 3)
2090 Z_PARAM_STR(haystack)
2091 Z_PARAM_STR(needle)
2092 Z_PARAM_OPTIONAL
2093 Z_PARAM_LONG(offset)
2094 ZEND_PARSE_PARAMETERS_END();
2095
2096 if (ZSTR_LEN(needle) == 1) {
2097 /* Single character search can shortcut memcmps
2098 Can also avoid tolower emallocs */
2099 char lowered;
2100 if (offset >= 0) {
2101 if ((size_t)offset > ZSTR_LEN(haystack)) {
2102 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2103 RETURN_THROWS();
2104 }
2105 p = ZSTR_VAL(haystack) + (size_t)offset;
2106 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
2107 } else {
2108 p = ZSTR_VAL(haystack);
2109 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2110 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2111 RETURN_THROWS();
2112 }
2113 e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
2114 }
2115 lowered = zend_tolower_ascii(*ZSTR_VAL(needle));
2116 while (e >= p) {
2117 if (zend_tolower_ascii(*e) == lowered) {
2118 RETURN_LONG(e - p + (offset > 0 ? offset : 0));
2119 }
2120 e--;
2121 }
2122 RETURN_FALSE;
2123 }
2124
2125 haystack_dup = zend_string_tolower(haystack);
2126 if (offset >= 0) {
2127 if ((size_t)offset > ZSTR_LEN(haystack)) {
2128 zend_string_release_ex(haystack_dup, 0);
2129 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2130 RETURN_THROWS();
2131 }
2132 p = ZSTR_VAL(haystack_dup) + offset;
2133 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2134 } else {
2135 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2136 zend_string_release_ex(haystack_dup, 0);
2137 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2138 RETURN_THROWS();
2139 }
2140
2141 p = ZSTR_VAL(haystack_dup);
2142 if ((size_t)-offset < ZSTR_LEN(needle)) {
2143 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2144 } else {
2145 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
2146 }
2147 }
2148
2149 needle_dup = zend_string_tolower(needle);
2150 if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
2151 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
2152 zend_string_release_ex(needle_dup, 0);
2153 zend_string_release_ex(haystack_dup, 0);
2154 } else {
2155 zend_string_release_ex(needle_dup, 0);
2156 zend_string_release_ex(haystack_dup, 0);
2157 RETURN_FALSE;
2158 }
2159 }
2160 /* }}} */
2161
2162 /* {{{ Finds the last occurrence of a character in a string within another */
2163 PHP_FUNCTION(strrchr)
2164 {
2165 zend_string *haystack, *needle;
2166 const char *found = NULL;
2167 zend_long found_offset;
2168 bool part = 0;
2169
2170 ZEND_PARSE_PARAMETERS_START(2, 3)
2171 Z_PARAM_STR(haystack)
2172 Z_PARAM_STR(needle)
2173 Z_PARAM_OPTIONAL
2174 Z_PARAM_BOOL(part)
2175 ZEND_PARSE_PARAMETERS_END();
2176
2177 found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
2178 if (UNEXPECTED(!found)) {
2179 RETURN_FALSE;
2180 }
2181 found_offset = found - ZSTR_VAL(haystack);
2182 if (part) {
2183 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
2184 }
2185 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
2186 }
2187 /* }}} */
2188
2189 /* {{{ php_chunk_split */
2190 static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
2191 {
2192 char *q;
2193 const char *p;
2194 size_t chunks;
2195 size_t restlen;
2196 zend_string *dest;
2197
2198 chunks = srclen / chunklen;
2199 restlen = srclen - chunks * chunklen; /* srclen % chunklen */
2200 if (restlen) {
2201 /* We want chunks to be rounded up rather than rounded down.
2202 * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
2203 chunks++;
2204 }
2205
2206 dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
2207
2208 for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
2209 q = zend_mempcpy(q, p, chunklen);
2210 q = zend_mempcpy(q, end, endlen);
2211 p += chunklen;
2212 }
2213
2214 if (restlen) {
2215 q = zend_mempcpy(q, p, restlen);
2216 q = zend_mempcpy(q, end, endlen);
2217 }
2218
2219 *q = '\0';
2220 ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
2221
2222 return dest;
2223 }
2224 /* }}} */
2225
2226 /* {{{ Returns split line */
2227 PHP_FUNCTION(chunk_split)
2228 {
2229 zend_string *str;
2230 char *end = "\r\n";
2231 size_t endlen = 2;
2232 zend_long chunklen = 76;
2233 zend_string *result;
2234
2235 ZEND_PARSE_PARAMETERS_START(1, 3)
2236 Z_PARAM_STR(str)
2237 Z_PARAM_OPTIONAL
2238 Z_PARAM_LONG(chunklen)
2239 Z_PARAM_STRING(end, endlen)
2240 ZEND_PARSE_PARAMETERS_END();
2241
2242 if (chunklen <= 0) {
2243 zend_argument_value_error(2, "must be greater than 0");
2244 RETURN_THROWS();
2245 }
2246
2247 if ((size_t)chunklen > ZSTR_LEN(str)) {
2248 /* to maintain BC, we must return original string + ending */
2249 result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
2250 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
2251 memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
2252 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2253 RETURN_NEW_STR(result);
2254 }
2255
2256 if (!ZSTR_LEN(str)) {
2257 RETURN_EMPTY_STRING();
2258 }
2259
2260 result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
2261
2262 RETURN_STR(result);
2263 }
2264 /* }}} */
2265
2266 static inline void _zend_substr(zval *return_value, zend_string *str, zend_long f, bool len_is_null, zend_long l)
2267 {
2268 if (f < 0) {
2269 /* if "from" position is negative, count start position from the end
2270 * of the string
2271 */
2272 if (-(size_t)f > ZSTR_LEN(str)) {
2273 f = 0;
2274 } else {
2275 f = (zend_long)ZSTR_LEN(str) + f;
2276 }
2277 } else if ((size_t)f > ZSTR_LEN(str)) {
2278 RETURN_EMPTY_STRING();
2279 }
2280
2281 if (!len_is_null) {
2282 if (l < 0) {
2283 /* if "length" position is negative, set it to the length
2284 * needed to stop that many chars from the end of the string
2285 */
2286 if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
2287 l = 0;
2288 } else {
2289 l = (zend_long)ZSTR_LEN(str) - f + l;
2290 }
2291 } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
2292 l = (zend_long)ZSTR_LEN(str) - f;
2293 }
2294 } else {
2295 l = (zend_long)ZSTR_LEN(str) - f;
2296 }
2297
2298 if (l == ZSTR_LEN(str)) {
2299 RETURN_STR_COPY(str);
2300 } else {
2301 RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
2302 }
2303 }
2304
2305 /* {{{ Returns part of a string */
2306 PHP_FUNCTION(substr)
2307 {
2308 zend_string *str;
2309 zend_long l = 0, f;
2310 bool len_is_null = 1;
2311
2312 ZEND_PARSE_PARAMETERS_START(2, 3)
2313 Z_PARAM_STR(str)
2314 Z_PARAM_LONG(f)
2315 Z_PARAM_OPTIONAL
2316 Z_PARAM_LONG_OR_NULL(l, len_is_null)
2317 ZEND_PARSE_PARAMETERS_END();
2318
2319 _zend_substr(return_value, str, f, len_is_null, l);
2320 }
2321 /* }}} */
2322
2323 ZEND_FRAMELESS_FUNCTION(substr, 2)
2324 {
2325 zval str_tmp;
2326 zend_string *str;
2327 zend_long f;
2328
2329 Z_FLF_PARAM_STR(1, str, str_tmp);
2330 Z_FLF_PARAM_LONG(2, f);
2331
2332 _zend_substr(return_value, str, f, /* len_is_null */ true, 0);
2333
2334 flf_clean:
2335 Z_FLF_PARAM_FREE_STR(1, str_tmp);
2336 }
2337
2338 ZEND_FRAMELESS_FUNCTION(substr, 3)
2339 {
2340 zval str_tmp;
2341 zend_string *str;
2342 zend_long f, l;
2343 bool len_is_null;
2344
2345 Z_FLF_PARAM_STR(1, str, str_tmp);
2346 Z_FLF_PARAM_LONG(2, f);
2347 Z_FLF_PARAM_LONG_OR_NULL(3, len_is_null, l);
2348
2349 _zend_substr(return_value, str, f, len_is_null, l);
2350
2351 flf_clean:
2352 Z_FLF_PARAM_FREE_STR(1, str_tmp);
2353 }
2354
2355 /* {{{ Replaces part of a string with another string */
2356 PHP_FUNCTION(substr_replace)
2357 {
2358 zend_string *str, *repl_str;
2359 HashTable *str_ht, *repl_ht;
2360 HashTable *from_ht;
2361 zend_long from_long;
2362 HashTable *len_ht = NULL;
2363 zend_long len_long;
2364 bool len_is_null = 1;
2365 zend_long l = 0;
2366 zend_long f;
2367 zend_string *result;
2368 HashPosition from_idx, repl_idx, len_idx;
2369 zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
2370
2371 ZEND_PARSE_PARAMETERS_START(3, 4)
2372 Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
2373 Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
2374 Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
2375 Z_PARAM_OPTIONAL
2376 Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
2377 ZEND_PARSE_PARAMETERS_END();
2378
2379 if (len_is_null) {
2380 if (str) {
2381 l = ZSTR_LEN(str);
2382 }
2383 } else if (!len_ht) {
2384 l = len_long;
2385 }
2386
2387 if (str) {
2388 if (from_ht) {
2389 zend_argument_type_error(3, "cannot be an array when working on a single string");
2390 RETURN_THROWS();
2391 }
2392 if (len_ht) {
2393 zend_argument_type_error(4, "cannot be an array when working on a single string");
2394 RETURN_THROWS();
2395 }
2396
2397 f = from_long;
2398
2399 /* if "from" position is negative, count start position from the end
2400 * of the string
2401 */
2402 if (f < 0) {
2403 f = (zend_long)ZSTR_LEN(str) + f;
2404 if (f < 0) {
2405 f = 0;
2406 }
2407 } else if ((size_t)f > ZSTR_LEN(str)) {
2408 f = ZSTR_LEN(str);
2409 }
2410 /* if "length" position is negative, set it to the length
2411 * needed to stop that many chars from the end of the string
2412 */
2413 if (l < 0) {
2414 l = ((zend_long)ZSTR_LEN(str) - f) + l;
2415 if (l < 0) {
2416 l = 0;
2417 }
2418 }
2419
2420 if ((size_t)l > ZSTR_LEN(str)) {
2421 l = ZSTR_LEN(str);
2422 }
2423
2424 if ((f + l) > (zend_long)ZSTR_LEN(str)) {
2425 l = ZSTR_LEN(str) - f;
2426 }
2427
2428 zend_string *tmp_repl_str = NULL;
2429 if (repl_ht) {
2430 repl_idx = 0;
2431 if (HT_IS_PACKED(repl_ht)) {
2432 while (repl_idx < repl_ht->nNumUsed) {
2433 tmp_repl = &repl_ht->arPacked[repl_idx];
2434 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2435 break;
2436 }
2437 repl_idx++;
2438 }
2439 } else {
2440 while (repl_idx < repl_ht->nNumUsed) {
2441 tmp_repl = &repl_ht->arData[repl_idx].val;
2442 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2443 break;
2444 }
2445 repl_idx++;
2446 }
2447 }
2448 if (repl_idx < repl_ht->nNumUsed) {
2449 repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2450 } else {
2451 repl_str = STR_EMPTY_ALLOC();
2452 }
2453 }
2454
2455 result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
2456
2457 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
2458 if (ZSTR_LEN(repl_str)) {
2459 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2460 }
2461 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
2462 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2463 zend_tmp_string_release(tmp_repl_str);
2464 RETURN_NEW_STR(result);
2465 } else { /* str is array of strings */
2466 zend_string *str_index = NULL;
2467 size_t result_len;
2468 zend_ulong num_index;
2469
2470 /* TODO
2471 if (!len_is_null && from_ht) {
2472 if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
2473 php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
2474 RETURN_STR_COPY(str);
2475 }
2476 }
2477 */
2478
2479 array_init(return_value);
2480
2481 from_idx = len_idx = repl_idx = 0;
2482
2483 ZEND_HASH_FOREACH_KEY_VAL(str_ht, num_index, str_index, tmp_str) {
2484 zend_string *tmp_orig_str;
2485 zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
2486
2487 if (from_ht) {
2488 if (HT_IS_PACKED(from_ht)) {
2489 while (from_idx < from_ht->nNumUsed) {
2490 tmp_from = &from_ht->arPacked[from_idx];
2491 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2492 break;
2493 }
2494 from_idx++;
2495 }
2496 } else {
2497 while (from_idx < from_ht->nNumUsed) {
2498 tmp_from = &from_ht->arData[from_idx].val;
2499 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2500 break;
2501 }
2502 from_idx++;
2503 }
2504 }
2505 if (from_idx < from_ht->nNumUsed) {
2506 f = zval_get_long(tmp_from);
2507
2508 if (f < 0) {
2509 f = (zend_long)ZSTR_LEN(orig_str) + f;
2510 if (f < 0) {
2511 f = 0;
2512 }
2513 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2514 f = ZSTR_LEN(orig_str);
2515 }
2516 from_idx++;
2517 } else {
2518 f = 0;
2519 }
2520 } else {
2521 f = from_long;
2522 if (f < 0) {
2523 f = (zend_long)ZSTR_LEN(orig_str) + f;
2524 if (f < 0) {
2525 f = 0;
2526 }
2527 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2528 f = ZSTR_LEN(orig_str);
2529 }
2530 }
2531
2532 if (len_ht) {
2533 if (HT_IS_PACKED(len_ht)) {
2534 while (len_idx < len_ht->nNumUsed) {
2535 tmp_len = &len_ht->arPacked[len_idx];
2536 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2537 break;
2538 }
2539 len_idx++;
2540 }
2541 } else {
2542 while (len_idx < len_ht->nNumUsed) {
2543 tmp_len = &len_ht->arData[len_idx].val;
2544 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2545 break;
2546 }
2547 len_idx++;
2548 }
2549 }
2550 if (len_idx < len_ht->nNumUsed) {
2551 l = zval_get_long(tmp_len);
2552 len_idx++;
2553 } else {
2554 l = ZSTR_LEN(orig_str);
2555 }
2556 } else if (!len_is_null) {
2557 l = len_long;
2558 } else {
2559 l = ZSTR_LEN(orig_str);
2560 }
2561
2562 if (l < 0) {
2563 l = (ZSTR_LEN(orig_str) - f) + l;
2564 if (l < 0) {
2565 l = 0;
2566 }
2567 }
2568
2569 ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
2570 ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
2571 if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
2572 l = ZSTR_LEN(orig_str) - f;
2573 }
2574
2575 result_len = ZSTR_LEN(orig_str) - l;
2576
2577 if (repl_ht) {
2578 if (HT_IS_PACKED(repl_ht)) {
2579 while (repl_idx < repl_ht->nNumUsed) {
2580 tmp_repl = &repl_ht->arPacked[repl_idx];
2581 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2582 break;
2583 }
2584 repl_idx++;
2585 }
2586 } else {
2587 while (repl_idx < repl_ht->nNumUsed) {
2588 tmp_repl = &repl_ht->arData[repl_idx].val;
2589 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2590 break;
2591 }
2592 repl_idx++;
2593 }
2594 }
2595 if (repl_idx < repl_ht->nNumUsed) {
2596 zend_string *tmp_repl_str;
2597 zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2598
2599 result_len += ZSTR_LEN(repl_str);
2600 repl_idx++;
2601 result = zend_string_safe_alloc(1, result_len, 0, 0);
2602
2603 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2604 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2605 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2606 zend_tmp_string_release(tmp_repl_str);
2607 } else {
2608 result = zend_string_safe_alloc(1, result_len, 0, 0);
2609
2610 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2611 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2612 }
2613 } else {
2614 result_len += ZSTR_LEN(repl_str);
2615
2616 result = zend_string_safe_alloc(1, result_len, 0, 0);
2617
2618 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2619 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2620 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2621 }
2622
2623 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2624
2625 if (str_index) {
2626 zval tmp;
2627
2628 ZVAL_NEW_STR(&tmp, result);
2629 zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
2630 } else {
2631 add_index_str(return_value, num_index, result);
2632 }
2633
2634 zend_tmp_string_release(tmp_orig_str);
2635 } ZEND_HASH_FOREACH_END();
2636 } /* if */
2637 }
2638 /* }}} */
2639
2640 /* {{{ Quotes meta characters */
2641 PHP_FUNCTION(quotemeta)
2642 {
2643 zend_string *old;
2644 const char *old_end, *p;
2645 char *q;
2646 char c;
2647 zend_string *str;
2648
2649 ZEND_PARSE_PARAMETERS_START(1, 1)
2650 Z_PARAM_STR(old)
2651 ZEND_PARSE_PARAMETERS_END();
2652
2653 old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
2654
2655 if (ZSTR_LEN(old) == 0) {
2656 RETURN_EMPTY_STRING();
2657 }
2658
2659 str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
2660
2661 for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
2662 c = *p;
2663 switch (c) {
2664 case '.':
2665 case '\\':
2666 case '+':
2667 case '*':
2668 case '?':
2669 case '[':
2670 case '^':
2671 case ']':
2672 case '$':
2673 case '(':
2674 case ')':
2675 *q++ = '\\';
2676 ZEND_FALLTHROUGH;
2677 default:
2678 *q++ = c;
2679 }
2680 }
2681
2682 *q = '\0';
2683
2684 RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
2685 }
2686 /* }}} */
2687
2688 /* {{{ Returns ASCII value of character
2689 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
2690 PHP_FUNCTION(ord)
2691 {
2692 zend_string *str;
2693
2694 ZEND_PARSE_PARAMETERS_START(1, 1)
2695 Z_PARAM_STR(str)
2696 ZEND_PARSE_PARAMETERS_END();
2697
2698 RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
2699 }
2700 /* }}} */
2701
2702 /* {{{ Converts ASCII code to a character
2703 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
2704 PHP_FUNCTION(chr)
2705 {
2706 zend_long c;
2707
2708 ZEND_PARSE_PARAMETERS_START(1, 1)
2709 Z_PARAM_LONG(c)
2710 ZEND_PARSE_PARAMETERS_END();
2711
2712 c &= 0xff;
2713 RETURN_CHAR(c);
2714 }
2715 /* }}} */
2716
2717 /* {{{ php_ucfirst
2718 Uppercase the first character of the word in a native string */
2719 static zend_string* php_ucfirst(zend_string *str)
2720 {
2721 const unsigned char ch = ZSTR_VAL(str)[0];
2722 unsigned char r = zend_toupper_ascii(ch);
2723 if (r == ch) {
2724 return zend_string_copy(str);
2725 } else {
2726 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2727 ZSTR_VAL(s)[0] = r;
2728 return s;
2729 }
2730 }
2731 /* }}} */
2732
2733 /* {{{ Makes a string's first character uppercase */
2734 PHP_FUNCTION(ucfirst)
2735 {
2736 zend_string *str;
2737
2738 ZEND_PARSE_PARAMETERS_START(1, 1)
2739 Z_PARAM_STR(str)
2740 ZEND_PARSE_PARAMETERS_END();
2741
2742 if (!ZSTR_LEN(str)) {
2743 RETURN_EMPTY_STRING();
2744 }
2745
2746 RETURN_STR(php_ucfirst(str));
2747 }
2748 /* }}} */
2749
2750 /* {{{
2751 Lowercase the first character of the word in a native string */
2752 static zend_string* php_lcfirst(zend_string *str)
2753 {
2754 unsigned char r = zend_tolower_ascii(ZSTR_VAL(str)[0]);
2755 if (r == ZSTR_VAL(str)[0]) {
2756 return zend_string_copy(str);
2757 } else {
2758 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2759 ZSTR_VAL(s)[0] = r;
2760 return s;
2761 }
2762 }
2763 /* }}} */
2764
2765 /* {{{ Make a string's first character lowercase */
2766 PHP_FUNCTION(lcfirst)
2767 {
2768 zend_string *str;
2769
2770 ZEND_PARSE_PARAMETERS_START(1, 1)
2771 Z_PARAM_STR(str)
2772 ZEND_PARSE_PARAMETERS_END();
2773
2774 if (!ZSTR_LEN(str)) {
2775 RETURN_EMPTY_STRING();
2776 }
2777
2778 RETURN_STR(php_lcfirst(str));
2779 }
2780 /* }}} */
2781
2782 /* {{{ Uppercase the first character of every word in a string */
2783 PHP_FUNCTION(ucwords)
2784 {
2785 zend_string *str;
2786 char *delims = " \t\r\n\f\v";
2787 char *r;
2788 const char *r_end;
2789 size_t delims_len = 6;
2790 char mask[256];
2791
2792 ZEND_PARSE_PARAMETERS_START(1, 2)
2793 Z_PARAM_STR(str)
2794 Z_PARAM_OPTIONAL
2795 Z_PARAM_STRING(delims, delims_len)
2796 ZEND_PARSE_PARAMETERS_END();
2797
2798 if (!ZSTR_LEN(str)) {
2799 RETURN_EMPTY_STRING();
2800 }
2801
2802 php_charmask((const unsigned char *) delims, delims_len, mask);
2803
2804 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
2805 r = Z_STRVAL_P(return_value);
2806
2807 *r = zend_toupper_ascii((unsigned char) *r);
2808 for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
2809 if (mask[(unsigned char)*r++]) {
2810 *r = zend_toupper_ascii((unsigned char) *r);
2811 }
2812 }
2813 }
2814 /* }}} */
2815
2816 /* {{{ php_strtr */
2817 PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
2818 {
2819 size_t i;
2820
2821 if (UNEXPECTED(trlen < 1)) {
2822 return str;
2823 } else if (trlen == 1) {
2824 char ch_from = *str_from;
2825 char ch_to = *str_to;
2826
2827 for (i = 0; i < len; i++) {
2828 if (str[i] == ch_from) {
2829 str[i] = ch_to;
2830 }
2831 }
2832 } else {
2833 unsigned char xlat[256];
2834
2835 memset(xlat, 0, sizeof(xlat));
2836
2837 for (i = 0; i < trlen; i++) {
2838 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2839 }
2840
2841 for (i = 0; i < len; i++) {
2842 str[i] += xlat[(size_t)(unsigned char) str[i]];
2843 }
2844 }
2845
2846 return str;
2847 }
2848 /* }}} */
2849
2850 /* {{{ php_strtr_ex */
2851 static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
2852 {
2853 zend_string *new_str = NULL;
2854 size_t i;
2855
2856 if (UNEXPECTED(trlen < 1)) {
2857 return zend_string_copy(str);
2858 } else if (trlen == 1) {
2859 char ch_from = *str_from;
2860 char ch_to = *str_to;
2861 char *output;
2862 char *input = ZSTR_VAL(str);
2863 size_t len = ZSTR_LEN(str);
2864
2865 #ifdef __SSE2__
2866 if (ZSTR_LEN(str) >= sizeof(__m128i)) {
2867 __m128i search = _mm_set1_epi8(ch_from);
2868 __m128i delta = _mm_set1_epi8(ch_to - ch_from);
2869
2870 do {
2871 __m128i src = _mm_loadu_si128((__m128i*)(input));
2872 __m128i mask = _mm_cmpeq_epi8(src, search);
2873 if (_mm_movemask_epi8(mask)) {
2874 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2875 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2876 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2877 _mm_storeu_si128((__m128i *)(output),
2878 _mm_add_epi8(src,
2879 _mm_and_si128(mask, delta)));
2880 input += sizeof(__m128i);
2881 output += sizeof(__m128i);
2882 len -= sizeof(__m128i);
2883 for (; len >= sizeof(__m128i); input += sizeof(__m128i), output += sizeof(__m128i), len -= sizeof(__m128i)) {
2884 src = _mm_loadu_si128((__m128i*)(input));
2885 mask = _mm_cmpeq_epi8(src, search);
2886 _mm_storeu_si128((__m128i *)(output),
2887 _mm_add_epi8(src,
2888 _mm_and_si128(mask, delta)));
2889 }
2890 for (; len > 0; input++, output++, len--) {
2891 *output = (*input == ch_from) ? ch_to : *input;
2892 }
2893 *output = 0;
2894 return new_str;
2895 }
2896 input += sizeof(__m128i);
2897 len -= sizeof(__m128i);
2898 } while (len >= sizeof(__m128i));
2899 }
2900 #endif
2901 for (; len > 0; input++, len--) {
2902 if (*input == ch_from) {
2903 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2904 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2905 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2906 *output = ch_to;
2907 input++;
2908 output++;
2909 len--;
2910 for (; len > 0; input++, output++, len--) {
2911 *output = (*input == ch_from) ? ch_to : *input;
2912 }
2913 *output = 0;
2914 return new_str;
2915 }
2916 }
2917 } else {
2918 unsigned char xlat[256];
2919
2920 memset(xlat, 0, sizeof(xlat));;
2921
2922 for (i = 0; i < trlen; i++) {
2923 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2924 }
2925
2926 for (i = 0; i < ZSTR_LEN(str); i++) {
2927 if (xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
2928 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2929 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2930 do {
2931 ZSTR_VAL(new_str)[i] = ZSTR_VAL(str)[i] + xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
2932 i++;
2933 } while (i < ZSTR_LEN(str));
2934 ZSTR_VAL(new_str)[i] = 0;
2935 return new_str;
2936 }
2937 }
2938 }
2939
2940 return zend_string_copy(str);
2941 }
2942 /* }}} */
2943
2944 static void php_strtr_array_ex(zval *return_value, zend_string *input, HashTable *pats)
2945 {
2946 const char *str = ZSTR_VAL(input);
2947 size_t slen = ZSTR_LEN(input);
2948 zend_ulong num_key;
2949 zend_string *str_key;
2950 size_t len, pos, old_pos;
2951 bool has_num_keys = false;
2952 size_t minlen = 128*1024;
2953 size_t maxlen = 0;
2954 HashTable str_hash;
2955 zval *entry;
2956 const char *key;
2957 smart_str result = {0};
2958 zend_ulong bitset[256/sizeof(zend_ulong)];
2959 zend_ulong *num_bitset;
2960
2961 /* we will collect all possible key lengths */
2962 num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
2963 memset(bitset, 0, sizeof(bitset));
2964
2965 /* check if original array has numeric keys */
2966 ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
2967 if (UNEXPECTED(!str_key)) {
2968 has_num_keys = true;
2969 } else {
2970 len = ZSTR_LEN(str_key);
2971 if (UNEXPECTED(len == 0)) {
2972 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
2973 continue;
2974 } else if (UNEXPECTED(len > slen)) {
2975 /* skip long patterns */
2976 continue;
2977 }
2978 if (len > maxlen) {
2979 maxlen = len;
2980 }
2981 if (len < minlen) {
2982 minlen = len;
2983 }
2984 /* remember possible key length */
2985 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2986 bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
2987 }
2988 } ZEND_HASH_FOREACH_END();
2989
2990 if (UNEXPECTED(has_num_keys)) {
2991 zend_string *key_used;
2992 /* we have to rebuild HashTable with numeric keys */
2993 zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
2994 ZEND_HASH_FOREACH_KEY_VAL(pats, num_key, str_key, entry) {
2995 if (UNEXPECTED(!str_key)) {
2996 key_used = zend_long_to_str(num_key);
2997 len = ZSTR_LEN(key_used);
2998 if (UNEXPECTED(len > slen)) {
2999 /* skip long patterns */
3000 zend_string_release(key_used);
3001 continue;
3002 }
3003 if (len > maxlen) {
3004 maxlen = len;
3005 }
3006 if (len < minlen) {
3007 minlen = len;
3008 }
3009 /* remember possible key length */
3010 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
3011 bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
3012 } else {
3013 key_used = str_key;
3014 len = ZSTR_LEN(key_used);
3015 if (UNEXPECTED(len > slen)) {
3016 /* skip long patterns */
3017 continue;
3018 }
3019 }
3020 zend_hash_add(&str_hash, key_used, entry);
3021 if (UNEXPECTED(!str_key)) {
3022 zend_string_release_ex(key_used, 0);
3023 }
3024 } ZEND_HASH_FOREACH_END();
3025 pats = &str_hash;
3026 }
3027
3028 if (UNEXPECTED(minlen > maxlen)) {
3029 /* return the original string */
3030 if (pats == &str_hash) {
3031 zend_hash_destroy(&str_hash);
3032 }
3033 efree(num_bitset);
3034 RETURN_STR_COPY(input);
3035 }
3036
3037 old_pos = pos = 0;
3038 while (pos <= slen - minlen) {
3039 key = str + pos;
3040 if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
3041 len = maxlen;
3042 if (len > slen - pos) {
3043 len = slen - pos;
3044 }
3045 while (len >= minlen) {
3046 if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
3047 entry = zend_hash_str_find(pats, key, len);
3048 if (entry != NULL) {
3049 zend_string *tmp;
3050 zend_string *s = zval_get_tmp_string(entry, &tmp);
3051 smart_str_appendl(&result, str + old_pos, pos - old_pos);
3052 smart_str_append(&result, s);
3053 old_pos = pos + len;
3054 pos = old_pos - 1;
3055 zend_tmp_string_release(tmp);
3056 break;
3057 }
3058 }
3059 len--;
3060 }
3061 }
3062 pos++;
3063 }
3064
3065 if (result.s) {
3066 smart_str_appendl(&result, str + old_pos, slen - old_pos);
3067 RETVAL_STR(smart_str_extract(&result));
3068 } else {
3069 smart_str_free(&result);
3070 RETVAL_STR_COPY(input);
3071 }
3072
3073 if (pats == &str_hash) {
3074 zend_hash_destroy(&str_hash);
3075 }
3076 efree(num_bitset);
3077 }
3078
3079 /* {{{ count_chars */
3080 static zend_always_inline zend_long count_chars(const char *p, zend_long length, char ch)
3081 {
3082 zend_long count = 0;
3083 const char *endp;
3084
3085 #ifdef __SSE2__
3086 if (length >= sizeof(__m128i)) {
3087 __m128i search = _mm_set1_epi8(ch);
3088
3089 do {
3090 __m128i src = _mm_loadu_si128((__m128i*)(p));
3091 uint32_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(src, search));
3092 // TODO: It would be great to use POPCNT, but it's available only with SSE4.1
3093 #if 1
3094 while (mask != 0) {
3095 count++;
3096 mask = mask & (mask - 1);
3097 }
3098 #else
3099 if (mask) {
3100 mask = mask - ((mask >> 1) & 0x5555);
3101 mask = (mask & 0x3333) + ((mask >> 2) & 0x3333);
3102 mask = (mask + (mask >> 4)) & 0x0F0F;
3103 mask = (mask + (mask >> 8)) & 0x00ff;
3104 count += mask;
3105 }
3106 #endif
3107 p += sizeof(__m128i);
3108 length -= sizeof(__m128i);
3109 } while (length >= sizeof(__m128i));
3110 }
3111 endp = p + length;
3112 while (p != endp) {
3113 count += (*p == ch);
3114 p++;
3115 }
3116 #else
3117 endp = p + length;
3118 while ((p = memchr(p, ch, endp-p))) {
3119 count++;
3120 p++;
3121 }
3122 #endif
3123 return count;
3124 }
3125 /* }}} */
3126
3127 /* {{{ php_char_to_str_ex */
3128 static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, bool case_sensitivity, zend_long *replace_count)
3129 {
3130 zend_string *result;
3131 size_t char_count;
3132 int lc_from = 0;
3133 const char *source, *source_end;
3134 char *target;
3135
3136 if (case_sensitivity) {
3137 char_count = count_chars(ZSTR_VAL(str), ZSTR_LEN(str), from);
3138 } else {
3139 char_count = 0;
3140 lc_from = zend_tolower_ascii(from);
3141 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
3142 for (source = ZSTR_VAL(str); source < source_end; source++) {
3143 if (zend_tolower_ascii(*source) == lc_from) {
3144 char_count++;
3145 }
3146 }
3147 }
3148
3149 if (char_count == 0) {
3150 return zend_string_copy(str);
3151 }
3152
3153 if (replace_count) {
3154 *replace_count += char_count;
3155 }
3156
3157 if (to_len > 0) {
3158 result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
3159 } else {
3160 result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
3161 }
3162 target = ZSTR_VAL(result);
3163
3164 if (case_sensitivity) {
3165 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
3166
3167 while ((p = memchr(p, from, (e - p)))) {
3168 target = zend_mempcpy(target, s, (p - s));
3169 target = zend_mempcpy(target, to, to_len);
3170 p++;
3171 s = p;
3172 if (--char_count == 0) break;
3173 }
3174 if (s < e) {
3175 target = zend_mempcpy(target, s, e - s);
3176 }
3177 } else {
3178 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
3179 for (source = ZSTR_VAL(str); source < source_end; source++) {
3180 if (zend_tolower_ascii(*source) == lc_from) {
3181 target = zend_mempcpy(target, to, to_len);
3182 } else {
3183 *target = *source;
3184 target++;
3185 }
3186 }
3187 }
3188 *target = 0;
3189 return result;
3190 }
3191 /* }}} */
3192
3193 /* {{{ php_str_to_str_ex */
3194 static zend_string *php_str_to_str_ex(zend_string *haystack,
3195 const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
3196 {
3197
3198 if (needle_len < ZSTR_LEN(haystack)) {
3199 zend_string *new_str;
3200 const char *end;
3201 const char *p, *r;
3202 char *e;
3203
3204 if (needle_len == str_len) {
3205 new_str = NULL;
3206 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3207 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3208 if (!new_str) {
3209 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3210 }
3211 memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
3212 (*replace_count)++;
3213 }
3214 if (!new_str) {
3215 goto nothing_todo;
3216 }
3217 return new_str;
3218 } else {
3219 size_t count = 0;
3220 const char *o = ZSTR_VAL(haystack);
3221 const char *n = needle;
3222 const char *endp = o + ZSTR_LEN(haystack);
3223
3224 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3225 o += needle_len;
3226 count++;
3227 }
3228 if (count == 0) {
3229 /* Needle doesn't occur, shortcircuit the actual replacement. */
3230 goto nothing_todo;
3231 }
3232 if (str_len > needle_len) {
3233 new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
3234 } else {
3235 new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
3236 }
3237
3238 e = ZSTR_VAL(new_str);
3239 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3240 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3241 e = zend_mempcpy(e, p, r - p);
3242 e = zend_mempcpy(e, str, str_len);
3243 (*replace_count)++;
3244 }
3245
3246 if (p < end) {
3247 e = zend_mempcpy(e, p, end - p);
3248 }
3249
3250 *e = '\0';
3251 return new_str;
3252 }
3253 } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
3254 nothing_todo:
3255 return zend_string_copy(haystack);
3256 } else {
3257 (*replace_count)++;
3258 return zend_string_init_fast(str, str_len);
3259 }
3260 }
3261 /* }}} */
3262
3263 /* {{{ php_str_to_str_i_ex */
3264 static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
3265 zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
3266 {
3267 zend_string *new_str = NULL;
3268 zend_string *lc_needle;
3269
3270 if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
3271 const char *end;
3272 const char *p, *r;
3273 char *e;
3274
3275 if (ZSTR_LEN(needle) == str_len) {
3276 lc_needle = zend_string_tolower(needle);
3277 end = lc_haystack + ZSTR_LEN(haystack);
3278 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3279 if (!new_str) {
3280 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3281 }
3282 memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
3283 (*replace_count)++;
3284 }
3285 zend_string_release_ex(lc_needle, 0);
3286
3287 if (!new_str) {
3288 goto nothing_todo;
3289 }
3290 return new_str;
3291 } else {
3292 size_t count = 0;
3293 const char *o = lc_haystack;
3294 const char *n;
3295 const char *endp = o + ZSTR_LEN(haystack);
3296
3297 lc_needle = zend_string_tolower(needle);
3298 n = ZSTR_VAL(lc_needle);
3299
3300 while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
3301 o += ZSTR_LEN(lc_needle);
3302 count++;
3303 }
3304 if (count == 0) {
3305 /* Needle doesn't occur, shortcircuit the actual replacement. */
3306 zend_string_release_ex(lc_needle, 0);
3307 goto nothing_todo;
3308 }
3309
3310 if (str_len > ZSTR_LEN(lc_needle)) {
3311 new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
3312 } else {
3313 new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
3314 }
3315
3316 e = ZSTR_VAL(new_str);
3317 end = lc_haystack + ZSTR_LEN(haystack);
3318
3319 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3320 e = zend_mempcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
3321 e = zend_mempcpy(e, str, str_len);
3322 (*replace_count)++;
3323 }
3324
3325 if (p < end) {
3326 e = zend_mempcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
3327 }
3328 *e = '\0';
3329
3330 zend_string_release_ex(lc_needle, 0);
3331
3332 return new_str;
3333 }
3334 } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
3335 nothing_todo:
3336 return zend_string_copy(haystack);
3337 } else {
3338 lc_needle = zend_string_tolower(needle);
3339
3340 if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
3341 zend_string_release_ex(lc_needle, 0);
3342 goto nothing_todo;
3343 }
3344 zend_string_release_ex(lc_needle, 0);
3345
3346 new_str = zend_string_init(str, str_len, 0);
3347
3348 (*replace_count)++;
3349 return new_str;
3350 }
3351 }
3352 /* }}} */
3353
3354 /* {{{ php_str_to_str */
3355 PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
3356 {
3357 zend_string *new_str;
3358
3359 if (needle_len < length) {
3360 const char *end;
3361 const char *s, *p;
3362 char *e, *r;
3363
3364 if (needle_len == str_len) {
3365 new_str = zend_string_init(haystack, length, 0);
3366 end = ZSTR_VAL(new_str) + length;
3367 for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3368 memcpy(r, str, str_len);
3369 }
3370 return new_str;
3371 } else {
3372 if (str_len < needle_len) {
3373 new_str = zend_string_alloc(length, 0);
3374 } else {
3375 size_t count = 0;
3376 const char *o = haystack;
3377 const char *n = needle;
3378 const char *endp = o + length;
3379
3380 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3381 o += needle_len;
3382 count++;
3383 }
3384 if (count == 0) {
3385 /* Needle doesn't occur, shortcircuit the actual replacement. */
3386 new_str = zend_string_init(haystack, length, 0);
3387 return new_str;
3388 } else {
3389 if (str_len > needle_len) {
3390 new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
3391 } else {
3392 new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
3393 }
3394 }
3395 }
3396
3397 s = e = ZSTR_VAL(new_str);
3398 end = haystack + length;
3399 for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3400 e = zend_mempcpy(e, p, r - p);
3401 e = zend_mempcpy(e, str, str_len);
3402 }
3403
3404 if (p < end) {
3405 e = zend_mempcpy(e, p, end - p);
3406 }
3407
3408 *e = '\0';
3409 new_str = zend_string_truncate(new_str, e - s, 0);
3410 return new_str;
3411 }
3412 } else if (needle_len > length || memcmp(haystack, needle, length)) {
3413 new_str = zend_string_init(haystack, length, 0);
3414 return new_str;
3415 } else {
3416 new_str = zend_string_init(str, str_len, 0);
3417
3418 return new_str;
3419 }
3420 }
3421 /* }}} */
3422
3423 static void php_strtr_array(zval *return_value, zend_string *str, HashTable *from_ht)
3424 {
3425 if (zend_hash_num_elements(from_ht) < 1) {
3426 RETURN_STR_COPY(str);
3427 } else if (zend_hash_num_elements(from_ht) == 1) {
3428 zend_long num_key;
3429 zend_string *str_key, *tmp_str, *replace, *tmp_replace;
3430 zval *entry;
3431
3432 ZEND_HASH_FOREACH_KEY_VAL(from_ht, num_key, str_key, entry) {
3433 tmp_str = NULL;
3434 if (UNEXPECTED(!str_key)) {
3435 str_key = tmp_str = zend_long_to_str(num_key);
3436 }
3437 replace = zval_get_tmp_string(entry, &tmp_replace);
3438 if (ZSTR_LEN(str_key) < 1) {
3439 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
3440 RETVAL_STR_COPY(str);
3441 } else if (ZSTR_LEN(str_key) == 1) {
3442 RETVAL_STR(php_char_to_str_ex(str,
3443 ZSTR_VAL(str_key)[0],
3444 ZSTR_VAL(replace),
3445 ZSTR_LEN(replace),
3446 /* case_sensitive */ true,
3447 NULL));
3448 } else {
3449 zend_long dummy;
3450 RETVAL_STR(php_str_to_str_ex(str,
3451 ZSTR_VAL(str_key), ZSTR_LEN(str_key),
3452 ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
3453 }
3454 zend_tmp_string_release(tmp_str);
3455 zend_tmp_string_release(tmp_replace);
3456 return;
3457 } ZEND_HASH_FOREACH_END();
3458 } else {
3459 php_strtr_array_ex(return_value, str, from_ht);
3460 }
3461 }
3462
3463 /* {{{ Translates characters in str using given translation tables */
3464 PHP_FUNCTION(strtr)
3465 {
3466 zend_string *str, *from_str = NULL;
3467 HashTable *from_ht = NULL;
3468 char *to = NULL;
3469 size_t to_len = 0;
3470
3471 if (ZEND_NUM_ARGS() <= 2) {
3472 ZEND_PARSE_PARAMETERS_START(2, 2)
3473 Z_PARAM_STR(str)
3474 Z_PARAM_ARRAY_HT(from_ht)
3475 ZEND_PARSE_PARAMETERS_END();
3476 } else {
3477 ZEND_PARSE_PARAMETERS_START(3, 3)
3478 Z_PARAM_STR(str)
3479 Z_PARAM_STR(from_str)
3480 Z_PARAM_STRING(to, to_len)
3481 ZEND_PARSE_PARAMETERS_END();
3482 }
3483
3484 /* shortcut for empty string */
3485 if (ZSTR_LEN(str) == 0) {
3486 RETURN_EMPTY_STRING();
3487 }
3488
3489 if (!to) {
3490 php_strtr_array(return_value, str, from_ht);
3491 } else {
3492 RETURN_STR(php_strtr_ex(str,
3493 ZSTR_VAL(from_str),
3494 to,
3495 MIN(ZSTR_LEN(from_str), to_len)));
3496 }
3497 }
3498 /* }}} */
3499
3500 ZEND_FRAMELESS_FUNCTION(strtr, 2)
3501 {
3502 zval str_tmp;
3503 zend_string *str;
3504 zval *from;
3505
3506 Z_FLF_PARAM_STR(1, str, str_tmp);
3507 Z_FLF_PARAM_ARRAY(2, from);
3508
3509 if (ZSTR_LEN(str) == 0) {
3510 RETVAL_EMPTY_STRING();
3511 goto flf_clean;
3512 }
3513
3514 php_strtr_array(return_value, str, Z_ARR_P(from));
3515
3516 flf_clean:
3517 Z_FLF_PARAM_FREE_STR(1, str_tmp);
3518 }
3519
3520 ZEND_FRAMELESS_FUNCTION(strtr, 3)
3521 {
3522 zval str_tmp, from_tmp, to_tmp;
3523 zend_string *str, *from, *to;
3524
3525 Z_FLF_PARAM_STR(1, str, str_tmp);
3526 Z_FLF_PARAM_STR(2, from, from_tmp);
3527 Z_FLF_PARAM_STR(3, to, to_tmp);
3528
3529 if (ZSTR_LEN(str) == 0) {
3530 RETVAL_EMPTY_STRING();
3531 goto flf_clean;
3532 }
3533
3534 RETVAL_STR(php_strtr_ex(str, ZSTR_VAL(from), ZSTR_VAL(to), MIN(ZSTR_LEN(from), ZSTR_LEN(to))));
3535
3536 flf_clean:
3537 Z_FLF_PARAM_FREE_STR(1, str_tmp);
3538 Z_FLF_PARAM_FREE_STR(2, from_tmp);
3539 Z_FLF_PARAM_FREE_STR(3, to_tmp);
3540 }
3541
3542 /* {{{ Reverse a string */
3543 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3544 #include <tmmintrin.h>
3545 #elif defined(__aarch64__) || defined(_M_ARM64)
3546 #include <arm_neon.h>
3547 #endif
3548 PHP_FUNCTION(strrev)
3549 {
3550 zend_string *str;
3551 const char *s, *e;
3552 char *p;
3553 zend_string *n;
3554
3555 ZEND_PARSE_PARAMETERS_START(1, 1)
3556 Z_PARAM_STR(str)
3557 ZEND_PARSE_PARAMETERS_END();
3558
3559 n = zend_string_alloc(ZSTR_LEN(str), 0);
3560 p = ZSTR_VAL(n);
3561
3562 s = ZSTR_VAL(str);
3563 e = s + ZSTR_LEN(str);
3564 --e;
3565 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3566 if (e - s > 15) {
3567 const __m128i map = _mm_set_epi8(
3568 0, 1, 2, 3,
3569 4, 5, 6, 7,
3570 8, 9, 10, 11,
3571 12, 13, 14, 15);
3572 do {
3573 const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
3574 _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
3575 p += 16;
3576 e -= 16;
3577 } while (e - s > 15);
3578 }
3579 #elif defined(__aarch64__)
3580 if (e - s > 15) {
3581 do {
3582 const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
3583 /* Synthesize rev128 with a rev64 + ext. */
3584 const uint8x16_t rev = vrev64q_u8(str);
3585 const uint8x16_t ext = (uint8x16_t)
3586 vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
3587 vst1q_u8((uint8_t *)p, ext);
3588 p += 16;
3589 e -= 16;
3590 } while (e - s > 15);
3591 }
3592 #elif defined(_M_ARM64)
3593 if (e - s > 15) {
3594 do {
3595 const __n128 str = vld1q_u8((uint8_t *)(e - 15));
3596 /* Synthesize rev128 with a rev64 + ext. */
3597 /* strange force cast limit on windows: you cannot convert anything */
3598 const __n128 rev = vrev64q_u8(str);
3599 const __n128 ext = vextq_u64(rev, rev, 1);
3600 vst1q_u8((uint8_t *)p, ext);
3601 p += 16;
3602 e -= 16;
3603 } while (e - s > 15);
3604 }
3605 #endif
3606 while (e >= s) {
3607 *p++ = *e--;
3608 }
3609
3610 *p = '\0';
3611
3612 RETVAL_NEW_STR(n);
3613 }
3614 /* }}} */
3615
3616 /* {{{ php_similar_str */
3617 static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
3618 {
3619 const char *p, *q;
3620 const char *end1 = (char *) txt1 + len1;
3621 const char *end2 = (char *) txt2 + len2;
3622 size_t l;
3623
3624 *max = 0;
3625 *count = 0;
3626 for (p = (char *) txt1; p < end1; p++) {
3627 for (q = (char *) txt2; q < end2; q++) {
3628 for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
3629 if (l > *max) {
3630 *max = l;
3631 *count += 1;
3632 *pos1 = p - txt1;
3633 *pos2 = q - txt2;
3634 }
3635 }
3636 }
3637 }
3638 /* }}} */
3639
3640 /* {{{ php_similar_char */
3641 static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
3642 {
3643 size_t sum;
3644 size_t pos1 = 0, pos2 = 0, max, count;
3645
3646 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
3647 if ((sum = max)) {
3648 if (pos1 && pos2 && count > 1) {
3649 sum += php_similar_char(txt1, pos1,
3650 txt2, pos2);
3651 }
3652 if ((pos1 + max < len1) && (pos2 + max < len2)) {
3653 sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
3654 txt2 + pos2 + max, len2 - pos2 - max);
3655 }
3656 }
3657
3658 return sum;
3659 }
3660 /* }}} */
3661
3662 /* {{{ Calculates the similarity between two strings */
3663 PHP_FUNCTION(similar_text)
3664 {
3665 zend_string *t1, *t2;
3666 zval *percent = NULL;
3667 bool compute_percentage = ZEND_NUM_ARGS() >= 3;
3668 size_t sim;
3669
3670 ZEND_PARSE_PARAMETERS_START(2, 3)
3671 Z_PARAM_STR(t1)
3672 Z_PARAM_STR(t2)
3673 Z_PARAM_OPTIONAL
3674 Z_PARAM_ZVAL(percent)
3675 ZEND_PARSE_PARAMETERS_END();
3676
3677 if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
3678 if (compute_percentage) {
3679 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
3680 }
3681
3682 RETURN_LONG(0);
3683 }
3684
3685 sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
3686
3687 if (compute_percentage) {
3688 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
3689 }
3690
3691 RETURN_LONG(sim);
3692 }
3693 /* }}} */
3694
3695 /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
3696 PHP_FUNCTION(addcslashes)
3697 {
3698 zend_string *str, *what;
3699
3700 ZEND_PARSE_PARAMETERS_START(2, 2)
3701 Z_PARAM_STR(str)
3702 Z_PARAM_STR(what)
3703 ZEND_PARSE_PARAMETERS_END();
3704
3705 if (ZSTR_LEN(str) == 0) {
3706 RETURN_EMPTY_STRING();
3707 }
3708
3709 if (ZSTR_LEN(what) == 0) {
3710 RETURN_STR_COPY(str);
3711 }
3712
3713 RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
3714 }
3715 /* }}} */
3716
3717 /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
3718 PHP_FUNCTION(addslashes)
3719 {
3720 zend_string *str;
3721
3722 ZEND_PARSE_PARAMETERS_START(1, 1)
3723 Z_PARAM_STR(str)
3724 ZEND_PARSE_PARAMETERS_END();
3725
3726 if (ZSTR_LEN(str) == 0) {
3727 RETURN_EMPTY_STRING();
3728 }
3729
3730 RETURN_STR(php_addslashes(str));
3731 }
3732 /* }}} */
3733
3734 /* {{{ Strips backslashes from a string. Uses C-style conventions */
3735 PHP_FUNCTION(stripcslashes)
3736 {
3737 zend_string *str;
3738
3739 ZEND_PARSE_PARAMETERS_START(1, 1)
3740 Z_PARAM_STR(str)
3741 ZEND_PARSE_PARAMETERS_END();
3742
3743 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3744 php_stripcslashes(Z_STR_P(return_value));
3745 }
3746 /* }}} */
3747
3748 /* {{{ Strips backslashes from a string */
3749 PHP_FUNCTION(stripslashes)
3750 {
3751 zend_string *str;
3752
3753 ZEND_PARSE_PARAMETERS_START(1, 1)
3754 Z_PARAM_STR(str)
3755 ZEND_PARSE_PARAMETERS_END();
3756
3757 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3758 php_stripslashes(Z_STR_P(return_value));
3759 }
3760 /* }}} */
3761
3762 /* {{{ php_stripcslashes */
3763 PHPAPI void php_stripcslashes(zend_string *str)
3764 {
3765 const char *source, *end;
3766 char *target;
3767 size_t nlen = ZSTR_LEN(str), i;
3768 char numtmp[4];
3769
3770 for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
3771 if (*source == '\\' && source + 1 < end) {
3772 source++;
3773 switch (*source) {
3774 case 'n': *target++='\n'; nlen--; break;
3775 case 'r': *target++='\r'; nlen--; break;
3776 case 'a': *target++='\a'; nlen--; break;
3777 case 't': *target++='\t'; nlen--; break;
3778 case 'v': *target++='\v'; nlen--; break;
3779 case 'b': *target++='\b'; nlen--; break;
3780 case 'f': *target++='\f'; nlen--; break;
3781 case '\\': *target++='\\'; nlen--; break;
3782 case 'x':
3783 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3784 numtmp[0] = *++source;
3785 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3786 numtmp[1] = *++source;
3787 numtmp[2] = '\0';
3788 nlen-=3;
3789 } else {
3790 numtmp[1] = '\0';
3791 nlen-=2;
3792 }
3793 *target++=(char)strtol(numtmp, NULL, 16);
3794 break;
3795 }
3796 ZEND_FALLTHROUGH;
3797 default:
3798 i=0;
3799 while (source < end && *source >= '0' && *source <= '7' && i<3) {
3800 numtmp[i++] = *source++;
3801 }
3802 if (i) {
3803 numtmp[i]='\0';
3804 *target++=(char)strtol(numtmp, NULL, 8);
3805 nlen-=i;
3806 source--;
3807 } else {
3808 *target++=*source;
3809 nlen--;
3810 }
3811 }
3812 } else {
3813 *target++=*source;
3814 }
3815 }
3816
3817 if (nlen != 0) {
3818 *target='\0';
3819 }
3820
3821 ZSTR_LEN(str) = nlen;
3822 }
3823 /* }}} */
3824
3825 /* {{{ php_addcslashes_str */
3826 PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
3827 {
3828 char flags[256];
3829 char *target;
3830 const char *source, *end;
3831 char c;
3832 size_t newlen;
3833 zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
3834
3835 php_charmask((const unsigned char *) what, wlength, flags);
3836
3837 for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
3838 c = *source;
3839 if (flags[(unsigned char)c]) {
3840 if ((unsigned char) c < 32 || (unsigned char) c > 126) {
3841 *target++ = '\\';
3842 switch (c) {
3843 case '\n': *target++ = 'n'; break;
3844 case '\t': *target++ = 't'; break;
3845 case '\r': *target++ = 'r'; break;
3846 case '\a': *target++ = 'a'; break;
3847 case '\v': *target++ = 'v'; break;
3848 case '\b': *target++ = 'b'; break;
3849 case '\f': *target++ = 'f'; break;
3850 default: target += sprintf(target, "%03o", (unsigned char) c);
3851 }
3852 continue;
3853 }
3854 *target++ = '\\';
3855 }
3856 *target++ = c;
3857 }
3858 *target = 0;
3859 newlen = target - ZSTR_VAL(new_str);
3860 if (newlen < len * 4) {
3861 new_str = zend_string_truncate(new_str, newlen, 0);
3862 }
3863 return new_str;
3864 }
3865 /* }}} */
3866
3867 /* {{{ php_addcslashes */
3868 PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
3869 {
3870 return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
3871 }
3872 /* }}} */
3873
3874 /* {{{ php_addslashes */
3875
3876 #ifdef ZEND_INTRIN_SSE4_2_NATIVE
3877 # include <nmmintrin.h>
3878 # include "Zend/zend_bitset.h"
3879 #elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3880 # include <nmmintrin.h>
3881 # include "Zend/zend_bitset.h"
3882 # include "Zend/zend_cpuinfo.h"
3883
3884 ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
3885 zend_string *php_addslashes_default(zend_string *str);
3886
3887 # ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO
3888 PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
3889
3890 typedef zend_string *(*php_addslashes_func_t)(zend_string *);
3891
3892 ZEND_NO_SANITIZE_ADDRESS
3893 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3894 static php_addslashes_func_t resolve_addslashes(void) {
3895 if (zend_cpu_supports_sse42()) {
3896 return php_addslashes_sse42;
3897 }
3898 return php_addslashes_default;
3899 }
3900 # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
3901
3902 static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
3903
3904 PHPAPI zend_string *php_addslashes(zend_string *str) {
3905 return php_addslashes_ptr(str);
3906 }
3907
3908 /* {{{ PHP_MINIT_FUNCTION */
3909 PHP_MINIT_FUNCTION(string_intrin)
3910 {
3911 if (zend_cpu_supports_sse42()) {
3912 php_addslashes_ptr = php_addslashes_sse42;
3913 } else {
3914 php_addslashes_ptr = php_addslashes_default;
3915 }
3916 return SUCCESS;
3917 }
3918 /* }}} */
3919 # endif
3920 #endif
3921
3922 #if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3923 # ifdef ZEND_INTRIN_SSE4_2_NATIVE
3924 PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
3925 # elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3926 zend_string *php_addslashes_sse42(zend_string *str)
3927 # endif
3928 {
3929 ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
3930 __m128i w128, s128;
3931 uint32_t res = 0;
3932 /* maximum string length, worst case situation */
3933 char *target;
3934 const char *source, *end;
3935 size_t offset;
3936 zend_string *new_str;
3937
3938 if (!str) {
3939 return ZSTR_EMPTY_ALLOC();
3940 }
3941
3942 source = ZSTR_VAL(str);
3943 end = source + ZSTR_LEN(str);
3944
3945 if (ZSTR_LEN(str) > 15) {
3946 w128 = _mm_load_si128((__m128i *)slashchars);
3947 do {
3948 s128 = _mm_loadu_si128((__m128i *)source);
3949 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3950 if (res) {
3951 goto do_escape;
3952 }
3953 source += 16;
3954 } while ((end - source) > 15);
3955 }
3956
3957 while (source < end) {
3958 switch (*source) {
3959 case '\0':
3960 case '\'':
3961 case '\"':
3962 case '\\':
3963 goto do_escape;
3964 default:
3965 source++;
3966 break;
3967 }
3968 }
3969
3970 return zend_string_copy(str);
3971
3972 do_escape:
3973 offset = source - (char *)ZSTR_VAL(str);
3974 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3975 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3976 target = ZSTR_VAL(new_str) + offset;
3977
3978 if (res) {
3979 int pos = 0;
3980 do {
3981 int i, n = zend_ulong_ntz(res);
3982 for (i = 0; i < n; i++) {
3983 *target++ = source[pos + i];
3984 }
3985 pos += n;
3986 *target++ = '\\';
3987 if (source[pos] == '\0') {
3988 *target++ = '0';
3989 } else {
3990 *target++ = source[pos];
3991 }
3992 pos++;
3993 res = res >> (n + 1);
3994 } while (res);
3995
3996 for (; pos < 16; pos++) {
3997 *target++ = source[pos];
3998 }
3999 source += 16;
4000 } else if (end - source > 15) {
4001 w128 = _mm_load_si128((__m128i *)slashchars);
4002 }
4003
4004 for (; end - source > 15; source += 16) {
4005 int pos = 0;
4006 s128 = _mm_loadu_si128((__m128i *)source);
4007 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
4008 if (res) {
4009 do {
4010 int i, n = zend_ulong_ntz(res);
4011 for (i = 0; i < n; i++) {
4012 *target++ = source[pos + i];
4013 }
4014 pos += n;
4015 *target++ = '\\';
4016 if (source[pos] == '\0') {
4017 *target++ = '0';
4018 } else {
4019 *target++ = source[pos];
4020 }
4021 pos++;
4022 res = res >> (n + 1);
4023 } while (res);
4024 for (; pos < 16; pos++) {
4025 *target++ = source[pos];
4026 }
4027 } else {
4028 _mm_storeu_si128((__m128i*)target, s128);
4029 target += 16;
4030 }
4031 }
4032
4033 while (source < end) {
4034 switch (*source) {
4035 case '\0':
4036 *target++ = '\\';
4037 *target++ = '0';
4038 break;
4039 case '\'':
4040 case '\"':
4041 case '\\':
4042 *target++ = '\\';
4043 ZEND_FALLTHROUGH;
4044 default:
4045 *target++ = *source;
4046 break;
4047 }
4048 source++;
4049 }
4050
4051 *target = '\0';
4052
4053 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
4054 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
4055 } else {
4056 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
4057 }
4058
4059 return new_str;
4060 }
4061 /* }}} */
4062 #endif
4063
4064 #if defined(__aarch64__) || defined(_M_ARM64)
4065 typedef union {
4066 uint8_t mem[16];
4067 uint64_t dw[2];
4068 } quad_word;
4069
4070 static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
4071 uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
4072 uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
4073 uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
4074 uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
4075 uint8x16_t s01 = vorrq_u8(s0, s1);
4076 uint8x16_t s23 = vorrq_u8(s2, s3);
4077 uint8x16_t s0123 = vorrq_u8(s01, s23);
4078 quad_word qw;
4079 vst1q_u8(qw.mem, s0123);
4080 return qw;
4081 }
4082
4083 static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
4084 {
4085 for (int i = 0; i < 16; i++) {
4086 char s = source[i];
4087 if (res.mem[i] == 0)
4088 *target++ = s;
4089 else {
4090 *target++ = '\\';
4091 if (s == '\0')
4092 *target++ = '0';
4093 else
4094 *target++ = s;
4095 }
4096 }
4097 return target;
4098 }
4099 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
4100
4101 #ifndef ZEND_INTRIN_SSE4_2_NATIVE
4102 # ifdef ZEND_INTRIN_SSE4_2_RESOLVER
4103 zend_string *php_addslashes_default(zend_string *str) /* {{{ */
4104 # else
4105 PHPAPI zend_string *php_addslashes(zend_string *str)
4106 # endif
4107 {
4108 /* maximum string length, worst case situation */
4109 char *target;
4110 const char *source, *end;
4111 size_t offset;
4112 zend_string *new_str;
4113
4114 if (!str) {
4115 return ZSTR_EMPTY_ALLOC();
4116 }
4117
4118 source = ZSTR_VAL(str);
4119 end = source + ZSTR_LEN(str);
4120
4121 # if defined(__aarch64__) || defined(_M_ARM64)
4122 quad_word res = {0};
4123 if (ZSTR_LEN(str) > 15) {
4124 do {
4125 res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
4126 if (res.dw[0] | res.dw[1])
4127 goto do_escape;
4128 source += 16;
4129 } while ((end - source) > 15);
4130 }
4131 /* Finish the last 15 bytes or less with the scalar loop. */
4132 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
4133
4134 while (source < end) {
4135 switch (*source) {
4136 case '\0':
4137 case '\'':
4138 case '\"':
4139 case '\\':
4140 goto do_escape;
4141 default:
4142 source++;
4143 break;
4144 }
4145 }
4146
4147 return zend_string_copy(str);
4148
4149 do_escape:
4150 offset = source - (char *)ZSTR_VAL(str);
4151 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
4152 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
4153 target = ZSTR_VAL(new_str) + offset;
4154
4155 # if defined(__aarch64__) || defined(_M_ARM64)
4156 if (res.dw[0] | res.dw[1]) {
4157 target = aarch64_add_slashes(res, source, target);
4158 source += 16;
4159 }
4160 for (; end - source > 15; source += 16) {
4161 uint8x16_t x = vld1q_u8((uint8_t *)source);
4162 res = aarch64_contains_slash_chars(x);
4163 if (res.dw[0] | res.dw[1]) {
4164 target = aarch64_add_slashes(res, source, target);
4165 } else {
4166 vst1q_u8((uint8_t*)target, x);
4167 target += 16;
4168 }
4169 }
4170 /* Finish the last 15 bytes or less with the scalar loop. */
4171 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
4172
4173 while (source < end) {
4174 switch (*source) {
4175 case '\0':
4176 *target++ = '\\';
4177 *target++ = '0';
4178 break;
4179 case '\'':
4180 case '\"':
4181 case '\\':
4182 *target++ = '\\';
4183 ZEND_FALLTHROUGH;
4184 default:
4185 *target++ = *source;
4186 break;
4187 }
4188 source++;
4189 }
4190
4191 *target = '\0';
4192
4193 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
4194 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
4195 } else {
4196 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
4197 }
4198
4199 return new_str;
4200 }
4201 #endif
4202 /* }}} */
4203 /* }}} */
4204
4205 /* {{{ php_stripslashes
4206 *
4207 * be careful, this edits the string in-place */
4208 static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
4209 {
4210 #if defined(__aarch64__) || defined(_M_ARM64)
4211 while (len > 15) {
4212 uint8x16_t x = vld1q_u8((uint8_t *)str);
4213 quad_word q;
4214 vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
4215 if (q.dw[0] | q.dw[1]) {
4216 unsigned int i = 0;
4217 while (i < 16) {
4218 if (q.mem[i] == 0) {
4219 *out++ = str[i];
4220 i++;
4221 continue;
4222 }
4223
4224 i++; /* skip the slash */
4225 if (i < len) {
4226 char s = str[i];
4227 if (s == '0')
4228 *out++ = '\0';
4229 else
4230 *out++ = s; /* preserve the next character */
4231 i++;
4232 }
4233 }
4234 str += i;
4235 len -= i;
4236 } else {
4237 vst1q_u8((uint8_t*)out, x);
4238 out += 16;
4239 str += 16;
4240 len -= 16;
4241 }
4242 }
4243 /* Finish the last 15 bytes or less with the scalar loop. */
4244 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
4245 while (len > 0) {
4246 if (*str == '\\') {
4247 str++; /* skip the slash */
4248 len--;
4249 if (len > 0) {
4250 if (*str == '0') {
4251 *out++='\0';
4252 str++;
4253 } else {
4254 *out++ = *str++; /* preserve the next character */
4255 }
4256 len--;
4257 }
4258 } else {
4259 *out++ = *str++;
4260 len--;
4261 }
4262 }
4263
4264 return out;
4265 }
4266
4267 #ifdef __SSE2__
4268 PHPAPI void php_stripslashes(zend_string *str)
4269 {
4270 const char *s = ZSTR_VAL(str);
4271 char *t = ZSTR_VAL(str);
4272 size_t l = ZSTR_LEN(str);
4273
4274 if (l > 15) {
4275 const __m128i slash = _mm_set1_epi8('\\');
4276
4277 do {
4278 __m128i in = _mm_loadu_si128((__m128i *)s);
4279 __m128i any_slash = _mm_cmpeq_epi8(in, slash);
4280 uint32_t res = _mm_movemask_epi8(any_slash);
4281
4282 if (res) {
4283 int i, n = zend_ulong_ntz(res);
4284 const char *e = s + 15;
4285 l -= n;
4286 for (i = 0; i < n; i++) {
4287 *t++ = *s++;
4288 }
4289 for (; s < e; s++) {
4290 if (*s == '\\') {
4291 s++;
4292 l--;
4293 if (*s == '0') {
4294 *t = '\0';
4295 } else {
4296 *t = *s;
4297 }
4298 } else {
4299 *t = *s;
4300 }
4301 t++;
4302 l--;
4303 }
4304 } else {
4305 _mm_storeu_si128((__m128i *)t, in);
4306 s += 16;
4307 t += 16;
4308 l -= 16;
4309 }
4310 } while (l > 15);
4311 }
4312
4313 t = php_stripslashes_impl(s, t, l);
4314 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4315 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4316 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4317 }
4318 }
4319 #else
4320 PHPAPI void php_stripslashes(zend_string *str)
4321 {
4322 const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
4323 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4324 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4325 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4326 }
4327 }
4328 #endif
4329 /* }}} */
4330
4331 #define _HEB_BLOCK_TYPE_ENG 1
4332 #define _HEB_BLOCK_TYPE_HEB 2
4333 #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
4334 #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
4335 #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
4336
4337 /* {{{ php_str_replace_in_subject */
4338 static zend_long php_str_replace_in_subject(
4339 zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
4340 zend_string *subject_str, zval *result, bool case_sensitivity
4341 ) {
4342 zval *search_entry;
4343 zend_string *tmp_result;
4344 char *replace_value = NULL;
4345 size_t replace_len = 0;
4346 zend_long replace_count = 0;
4347 zend_string *lc_subject_str = NULL;
4348 uint32_t replace_idx;
4349
4350 if (ZSTR_LEN(subject_str) == 0) {
4351 ZVAL_EMPTY_STRING(result);
4352 return 0;
4353 }
4354
4355 /* If search is an array */
4356 if (search_ht) {
4357 /* Duplicate subject string for repeated replacement */
4358 zend_string_addref(subject_str);
4359
4360 if (replace_ht) {
4361 replace_idx = 0;
4362 } else {
4363 /* Set replacement value to the passed one */
4364 replace_value = ZSTR_VAL(replace_str);
4365 replace_len = ZSTR_LEN(replace_str);
4366 }
4367
4368 /* For each entry in the search array, get the entry */
4369 ZEND_HASH_FOREACH_VAL(search_ht, search_entry) {
4370 /* Make sure we're dealing with strings. */
4371 zend_string *tmp_search_str;
4372 zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
4373 zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
4374
4375 /* If replace is an array. */
4376 if (replace_ht) {
4377 /* Get current entry */
4378 zval *replace_entry = NULL;
4379 if (HT_IS_PACKED(replace_ht)) {
4380 while (replace_idx < replace_ht->nNumUsed) {
4381 replace_entry = &replace_ht->arPacked[replace_idx];
4382 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4383 break;
4384 }
4385 replace_idx++;
4386 }
4387 } else {
4388 while (replace_idx < replace_ht->nNumUsed) {
4389 replace_entry = &replace_ht->arData[replace_idx].val;
4390 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4391 break;
4392 }
4393 replace_idx++;
4394 }
4395 }
4396 if (replace_idx < replace_ht->nNumUsed) {
4397 /* Make sure we're dealing with strings. */
4398 replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
4399
4400 /* Set replacement value to the one we got from array */
4401 replace_value = ZSTR_VAL(replace_entry_str);
4402 replace_len = ZSTR_LEN(replace_entry_str);
4403
4404 replace_idx++;
4405 } else {
4406 /* We've run out of replacement strings, so use an empty one. */
4407 replace_value = "";
4408 replace_len = 0;
4409 }
4410 }
4411
4412 if (ZSTR_LEN(search_str) == 1) {
4413 zend_long old_replace_count = replace_count;
4414
4415 tmp_result = php_char_to_str_ex(subject_str,
4416 ZSTR_VAL(search_str)[0],
4417 replace_value,
4418 replace_len,
4419 case_sensitivity,
4420 &replace_count);
4421 if (lc_subject_str && replace_count != old_replace_count) {
4422 zend_string_release_ex(lc_subject_str, 0);
4423 lc_subject_str = NULL;
4424 }
4425 } else if (ZSTR_LEN(search_str) > 1) {
4426 if (case_sensitivity) {
4427 tmp_result = php_str_to_str_ex(subject_str,
4428 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4429 replace_value, replace_len, &replace_count);
4430 } else {
4431 zend_long old_replace_count = replace_count;
4432
4433 if (!lc_subject_str) {
4434 lc_subject_str = zend_string_tolower(subject_str);
4435 }
4436 tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4437 search_str, replace_value, replace_len, &replace_count);
4438 if (replace_count != old_replace_count) {
4439 zend_string_release_ex(lc_subject_str, 0);
4440 lc_subject_str = NULL;
4441 }
4442 }
4443 } else {
4444 zend_tmp_string_release(tmp_search_str);
4445 zend_tmp_string_release(tmp_replace_entry_str);
4446 continue;
4447 }
4448
4449 zend_tmp_string_release(tmp_search_str);
4450 zend_tmp_string_release(tmp_replace_entry_str);
4451
4452 if (subject_str == tmp_result) {
4453 zend_string_delref(subject_str);
4454 } else {
4455 zend_string_release_ex(subject_str, 0);
4456 subject_str = tmp_result;
4457 if (ZSTR_LEN(subject_str) == 0) {
4458 zend_string_release_ex(subject_str, 0);
4459 ZVAL_EMPTY_STRING(result);
4460 if (lc_subject_str) {
4461 zend_string_release_ex(lc_subject_str, 0);
4462 }
4463 return replace_count;
4464 }
4465 }
4466 } ZEND_HASH_FOREACH_END();
4467 ZVAL_STR(result, subject_str);
4468 if (lc_subject_str) {
4469 zend_string_release_ex(lc_subject_str, 0);
4470 }
4471 } else {
4472 ZEND_ASSERT(search_str);
4473 if (ZSTR_LEN(search_str) == 1) {
4474 ZVAL_STR(result,
4475 php_char_to_str_ex(subject_str,
4476 ZSTR_VAL(search_str)[0],
4477 ZSTR_VAL(replace_str),
4478 ZSTR_LEN(replace_str),
4479 case_sensitivity,
4480 &replace_count));
4481 } else if (ZSTR_LEN(search_str) > 1) {
4482 if (case_sensitivity) {
4483 ZVAL_STR(result, php_str_to_str_ex(subject_str,
4484 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4485 ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4486 } else {
4487 lc_subject_str = zend_string_tolower(subject_str);
4488 ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4489 search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4490 zend_string_release_ex(lc_subject_str, 0);
4491 }
4492 } else {
4493 ZVAL_STR_COPY(result, subject_str);
4494 }
4495 }
4496 return replace_count;
4497 }
4498 /* }}} */
4499
4500 static void _php_str_replace_common(
4501 zval *return_value,
4502 HashTable *search_ht, zend_string *search_str,
4503 HashTable *replace_ht, zend_string *replace_str,
4504 HashTable *subject_ht, zend_string *subject_str,
4505 zval *zcount,
4506 bool case_sensitivity
4507 ) {
4508 zval *subject_entry;
4509 zval result;
4510 zend_string *string_key;
4511 zend_ulong num_key;
4512 zend_long count = 0;
4513
4514 /* Make sure we're dealing with strings and do the replacement. */
4515 if (search_str && replace_ht) {
4516 zend_argument_type_error(2, "must be of type string when argument #1 ($search) is a string");
4517 RETURN_THROWS();
4518 }
4519
4520 /* if subject is an array */
4521 if (subject_ht) {
4522 array_init(return_value);
4523
4524 /* For each subject entry, convert it to string, then perform replacement
4525 and add the result to the return_value array. */
4526 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
4527 zend_string *tmp_subject_str;
4528 ZVAL_DEREF(subject_entry);
4529 subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
4530 count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
4531 zend_tmp_string_release(tmp_subject_str);
4532
4533 /* Add to return array */
4534 if (string_key) {
4535 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
4536 } else {
4537 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
4538 }
4539 } ZEND_HASH_FOREACH_END();
4540 } else { /* if subject is not an array */
4541 count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
4542 }
4543 if (zcount) {
4544 ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
4545 }
4546 }
4547
4548 /* {{{ php_str_replace_common */
4549 static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool case_sensitivity)
4550 {
4551 zend_string *search_str;
4552 HashTable *search_ht;
4553 zend_string *replace_str;
4554 HashTable *replace_ht;
4555 zend_string *subject_str;
4556 HashTable *subject_ht;
4557 zval *zcount = NULL;
4558
4559 ZEND_PARSE_PARAMETERS_START(3, 4)
4560 Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
4561 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
4562 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
4563 Z_PARAM_OPTIONAL
4564 Z_PARAM_ZVAL(zcount)
4565 ZEND_PARSE_PARAMETERS_END();
4566
4567 _php_str_replace_common(return_value, search_ht, search_str, replace_ht, replace_str, subject_ht, subject_str, zcount, case_sensitivity);
4568 }
4569 /* }}} */
4570
4571 /* {{{ Replaces all occurrences of search in haystack with replace */
4572 PHP_FUNCTION(str_replace)
4573 {
4574 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4575 }
4576 /* }}} */
4577
4578 ZEND_FRAMELESS_FUNCTION(str_replace, 3)
4579 {
4580 zend_string *search_str, *replace_str, *subject_str;
4581 HashTable *search_ht, *replace_ht, *subject_ht;
4582 zval search_tmp, replace_tmp, subject_tmp;
4583
4584 Z_FLF_PARAM_ARRAY_HT_OR_STR(1, search_ht, search_str, search_tmp);
4585 Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
4586 Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
4587
4588 _php_str_replace_common(return_value, search_ht, search_str, replace_ht, replace_str, subject_ht, subject_str, /* zcount */ NULL, /* case_sensitivity */ true);
4589
4590 flf_clean:;
4591 Z_FLF_PARAM_FREE_STR(1, search_tmp);
4592 Z_FLF_PARAM_FREE_STR(2, replace_tmp);
4593 Z_FLF_PARAM_FREE_STR(3, subject_tmp);
4594 }
4595
4596 /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
4597 PHP_FUNCTION(str_ireplace)
4598 {
4599 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4600 }
4601 /* }}} */
4602
4603 /* {{{ Converts logical Hebrew text to visual text */
4604 PHP_FUNCTION(hebrev)
4605 {
4606 char *str, *heb_str, *target;
4607 const char *tmp;
4608 size_t block_start, block_end, block_type, i;
4609 zend_long max_chars=0, char_count;
4610 size_t begin, end, orig_begin;
4611 size_t str_len;
4612 zend_string *broken_str;
4613
4614 ZEND_PARSE_PARAMETERS_START(1, 2)
4615 Z_PARAM_STRING(str, str_len)
4616 Z_PARAM_OPTIONAL
4617 Z_PARAM_LONG(max_chars)
4618 ZEND_PARSE_PARAMETERS_END();
4619
4620 if (str_len == 0) {
4621 RETURN_EMPTY_STRING();
4622 }
4623
4624 tmp = str;
4625 block_start=block_end=0;
4626
4627 heb_str = (char *) emalloc(str_len+1);
4628 target = heb_str+str_len;
4629 *target = 0;
4630 target--;
4631
4632 if (isheb(*tmp)) {
4633 block_type = _HEB_BLOCK_TYPE_HEB;
4634 } else {
4635 block_type = _HEB_BLOCK_TYPE_ENG;
4636 }
4637
4638 do {
4639 if (block_type == _HEB_BLOCK_TYPE_HEB) {
4640 while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
4641 tmp++;
4642 block_end++;
4643 }
4644 for (i = block_start+1; i<= block_end+1; i++) {
4645 *target = str[i-1];
4646 switch (*target) {
4647 case '(':
4648 *target = ')';
4649 break;
4650 case ')':
4651 *target = '(';
4652 break;
4653 case '[':
4654 *target = ']';
4655 break;
4656 case ']':
4657 *target = '[';
4658 break;
4659 case '{':
4660 *target = '}';
4661 break;
4662 case '}':
4663 *target = '{';
4664 break;
4665 case '<':
4666 *target = '>';
4667 break;
4668 case '>':
4669 *target = '<';
4670 break;
4671 case '\\':
4672 *target = '/';
4673 break;
4674 case '/':
4675 *target = '\\';
4676 break;
4677 default:
4678 break;
4679 }
4680 target--;
4681 }
4682 block_type = _HEB_BLOCK_TYPE_ENG;
4683 } else {
4684 while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
4685 tmp++;
4686 block_end++;
4687 }
4688 while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
4689 tmp--;
4690 block_end--;
4691 }
4692 for (i = block_end+1; i >= block_start+1; i--) {
4693 *target = str[i-1];
4694 target--;
4695 }
4696 block_type = _HEB_BLOCK_TYPE_HEB;
4697 }
4698 block_start=block_end+1;
4699 } while (block_end < str_len-1);
4700
4701
4702 broken_str = zend_string_alloc(str_len, 0);
4703 begin = end = str_len-1;
4704 target = ZSTR_VAL(broken_str);
4705
4706 while (1) {
4707 char_count=0;
4708 while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
4709 char_count++;
4710 begin--;
4711 if (_isnewline(heb_str[begin])) {
4712 while (begin > 0 && _isnewline(heb_str[begin-1])) {
4713 begin--;
4714 char_count++;
4715 }
4716 break;
4717 }
4718 }
4719 if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
4720 size_t new_char_count=char_count, new_begin=begin;
4721
4722 while (new_char_count > 0) {
4723 if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
4724 break;
4725 }
4726 new_begin++;
4727 new_char_count--;
4728 }
4729 if (new_char_count > 0) {
4730 begin=new_begin;
4731 }
4732 }
4733 orig_begin=begin;
4734
4735 if (_isblank(heb_str[begin])) {
4736 heb_str[begin]='\n';
4737 }
4738 while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
4739 begin++;
4740 }
4741 for (i = begin; i <= end; i++) { /* copy content */
4742 *target = heb_str[i];
4743 target++;
4744 }
4745 for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
4746 *target = heb_str[i];
4747 target++;
4748 }
4749 begin=orig_begin;
4750
4751 if (begin == 0) {
4752 *target = 0;
4753 break;
4754 }
4755 begin--;
4756 end=begin;
4757 }
4758 efree(heb_str);
4759
4760 RETURN_NEW_STR(broken_str);
4761 }
4762 /* }}} */
4763
4764 /* {{{ Converts newlines to HTML line breaks */
4765 PHP_FUNCTION(nl2br)
4766 {
4767 /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
4768 const char *tmp, *end;
4769 zend_string *str;
4770 char *target;
4771 size_t repl_cnt = 0;
4772 bool is_xhtml = 1;
4773 zend_string *result;
4774
4775 ZEND_PARSE_PARAMETERS_START(1, 2)
4776 Z_PARAM_STR(str)
4777 Z_PARAM_OPTIONAL
4778 Z_PARAM_BOOL(is_xhtml)
4779 ZEND_PARSE_PARAMETERS_END();
4780
4781 tmp = ZSTR_VAL(str);
4782 end = ZSTR_VAL(str) + ZSTR_LEN(str);
4783
4784 /* it is really faster to scan twice and allocate mem once instead of scanning once
4785 and constantly reallocing */
4786 while (tmp < end) {
4787 if (*tmp == '\r') {
4788 if (*(tmp+1) == '\n') {
4789 tmp++;
4790 }
4791 repl_cnt++;
4792 } else if (*tmp == '\n') {
4793 if (*(tmp+1) == '\r') {
4794 tmp++;
4795 }
4796 repl_cnt++;
4797 }
4798
4799 tmp++;
4800 }
4801
4802 if (repl_cnt == 0) {
4803 RETURN_STR_COPY(str);
4804 }
4805
4806 {
4807 size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
4808
4809 result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
4810 target = ZSTR_VAL(result);
4811 }
4812
4813 tmp = ZSTR_VAL(str);
4814 while (tmp < end) {
4815 switch (*tmp) {
4816 case '\r':
4817 case '\n':
4818 *target++ = '<';
4819 *target++ = 'b';
4820 *target++ = 'r';
4821
4822 if (is_xhtml) {
4823 *target++ = ' ';
4824 *target++ = '/';
4825 }
4826
4827 *target++ = '>';
4828
4829 if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
4830 *target++ = *tmp++;
4831 }
4832 ZEND_FALLTHROUGH;
4833 default:
4834 *target++ = *tmp;
4835 }
4836
4837 tmp++;
4838 }
4839
4840 *target = '\0';
4841
4842 RETURN_NEW_STR(result);
4843 }
4844 /* }}} */
4845
4846 /* {{{ Strips HTML and PHP tags from a string */
4847 PHP_FUNCTION(strip_tags)
4848 {
4849 zend_string *buf;
4850 zend_string *str;
4851 zend_string *allow_str = NULL;
4852 HashTable *allow_ht = NULL;
4853 const char *allowed_tags=NULL;
4854 size_t allowed_tags_len=0;
4855 smart_str tags_ss = {0};
4856
4857 ZEND_PARSE_PARAMETERS_START(1, 2)
4858 Z_PARAM_STR(str)
4859 Z_PARAM_OPTIONAL
4860 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
4861 ZEND_PARSE_PARAMETERS_END();
4862
4863 if (allow_ht) {
4864 zval *tmp;
4865 zend_string *tag;
4866
4867 ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
4868 tag = zval_get_string(tmp);
4869 smart_str_appendc(&tags_ss, '<');
4870 smart_str_append(&tags_ss, tag);
4871 smart_str_appendc(&tags_ss, '>');
4872 zend_string_release(tag);
4873 } ZEND_HASH_FOREACH_END();
4874 if (tags_ss.s) {
4875 smart_str_0(&tags_ss);
4876 allowed_tags = ZSTR_VAL(tags_ss.s);
4877 allowed_tags_len = ZSTR_LEN(tags_ss.s);
4878 }
4879 } else if (allow_str) {
4880 allowed_tags = ZSTR_VAL(allow_str);
4881 allowed_tags_len = ZSTR_LEN(allow_str);
4882 }
4883
4884 buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
4885 ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
4886 smart_str_free(&tags_ss);
4887 RETURN_NEW_STR(buf);
4888 }
4889 /* }}} */
4890
4891 static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
4892 const char *retval;
4893
4894 if (zend_string_equals_literal(loc, "0")) {
4895 loc = NULL;
4896 } else {
4897 if (ZSTR_LEN(loc) >= 255) {
4898 php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
4899 return NULL;
4900 }
4901 }
4902
4903 # ifndef PHP_WIN32
4904 retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
4905 # else
4906 if (loc) {
4907 /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
4908 char *locp = ZSTR_VAL(loc);
4909 if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
4910 && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
4911 && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
4912 && (locp[5] == '\0' || locp[5] == '.')
4913 && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
4914 && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
4915 && locp[5] == '\0')
4916 ) {
4917 retval = NULL;
4918 } else {
4919 retval = setlocale(cat, ZSTR_VAL(loc));
4920 }
4921 } else {
4922 retval = setlocale(cat, NULL);
4923 }
4924 # endif
4925 if (!retval) {
4926 return NULL;
4927 }
4928
4929 if (loc) {
4930 /* Remember if locale was changed */
4931 size_t len = strlen(retval);
4932
4933 BG(locale_changed) = 1;
4934 if (cat == LC_CTYPE || cat == LC_ALL) {
4935 zend_update_current_locale();
4936 if (BG(ctype_string)) {
4937 zend_string_release_ex(BG(ctype_string), 0);
4938 }
4939 if (len == 1 && *retval == 'C') {
4940 /* C locale is represented as NULL. */
4941 BG(ctype_string) = NULL;
4942 return ZSTR_CHAR('C');
4943 } else if (zend_string_equals_cstr(loc, retval, len)) {
4944 BG(ctype_string) = zend_string_copy(loc);
4945 return zend_string_copy(BG(ctype_string));
4946 } else {
4947 BG(ctype_string) = zend_string_init(retval, len, 0);
4948 return zend_string_copy(BG(ctype_string));
4949 }
4950 } else if (zend_string_equals_cstr(loc, retval, len)) {
4951 return zend_string_copy(loc);
4952 }
4953 }
4954 return zend_string_init(retval, strlen(retval), 0);
4955 }
4956
4957 static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
4958 zend_string *tmp_loc_str;
4959 zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
4960 if (UNEXPECTED(loc_str == NULL)) {
4961 return NULL;
4962 }
4963 zend_string *result = try_setlocale_str(cat, loc_str);
4964 zend_tmp_string_release(tmp_loc_str);
4965 return result;
4966 }
4967
4968 /* {{{ Set locale information */
4969 PHP_FUNCTION(setlocale)
4970 {
4971 zend_long cat;
4972 zval *args = NULL;
4973 int num_args;
4974
4975 ZEND_PARSE_PARAMETERS_START(2, -1)
4976 Z_PARAM_LONG(cat)
4977 Z_PARAM_VARIADIC('+', args, num_args)
4978 ZEND_PARSE_PARAMETERS_END();
4979
4980 for (uint32_t i = 0; i < num_args; i++) {
4981 if (Z_TYPE(args[i]) == IS_ARRAY) {
4982 zval *elem;
4983 ZEND_HASH_FOREACH_VAL(Z_ARRVAL(args[i]), elem) {
4984 zend_string *result = try_setlocale_zval(cat, elem);
4985 if (EG(exception)) {
4986 RETURN_THROWS();
4987 }
4988 if (result) {
4989 RETURN_STR(result);
4990 }
4991 } ZEND_HASH_FOREACH_END();
4992 } else {
4993 zend_string *result = try_setlocale_zval(cat, &args[i]);
4994 if (EG(exception)) {
4995 RETURN_THROWS();
4996 }
4997 if (result) {
4998 RETURN_STR(result);
4999 }
5000 }
5001 }
5002
5003 RETURN_FALSE;
5004 }
5005 /* }}} */
5006
5007 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
5008 PHP_FUNCTION(parse_str)
5009 {
5010 char *arg;
5011 zval *arrayArg = NULL;
5012 char *res = NULL;
5013 size_t arglen;
5014
5015 ZEND_PARSE_PARAMETERS_START(2, 2)
5016 Z_PARAM_STRING(arg, arglen)
5017 Z_PARAM_ZVAL(arrayArg)
5018 ZEND_PARSE_PARAMETERS_END();
5019
5020 arrayArg = zend_try_array_init(arrayArg);
5021 if (!arrayArg) {
5022 RETURN_THROWS();
5023 }
5024
5025 res = estrndup(arg, arglen);
5026 sapi_module.treat_data(PARSE_STRING, res, arrayArg);
5027 }
5028 /* }}} */
5029
5030 #define PHP_TAG_BUF_SIZE 1023
5031
5032 /* {{{ php_tag_find
5033 *
5034 * Check if tag is in a set of tags
5035 *
5036 * states:
5037 *
5038 * 0 start tag
5039 * 1 first non-whitespace char seen
5040 */
5041 static bool php_tag_find(char *tag, size_t len, const char *set) {
5042 char c, *n;
5043 const char *t;
5044 int state = 0;
5045 bool done = 0;
5046 char *norm;
5047
5048 if (len == 0) {
5049 return 0;
5050 }
5051
5052 norm = emalloc(len+1);
5053
5054 n = norm;
5055 t = tag;
5056 c = zend_tolower_ascii(*t);
5057 /*
5058 normalize the tag removing leading and trailing whitespace
5059 and turn any <a whatever...> into just <a> and any </tag>
5060 into <tag>
5061 */
5062 while (!done) {
5063 switch (c) {
5064 case '<':
5065 *(n++) = c;
5066 break;
5067 case '>':
5068 done =1;
5069 break;
5070 default:
5071 if (!isspace((int)c)) {
5072 if (state == 0) {
5073 state=1;
5074 }
5075 if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
5076 *(n++) = c;
5077 }
5078 } else {
5079 if (state == 1)
5080 done=1;
5081 }
5082 break;
5083 }
5084 c = zend_tolower_ascii(*(++t));
5085 }
5086 *(n++) = '>';
5087 *n = '\0';
5088 if (strstr(set, norm)) {
5089 done=1;
5090 } else {
5091 done=0;
5092 }
5093 efree(norm);
5094 return done;
5095 }
5096 /* }}} */
5097
5098 PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
5099 {
5100 return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
5101 }
5102 /* }}} */
5103
5104 /* {{{ php_strip_tags
5105
5106 A simple little state-machine to strip out html and php tags
5107
5108 State 0 is the output state, State 1 means we are inside a
5109 normal html tag and state 2 means we are inside a php tag.
5110
5111 The state variable is passed in to allow a function like fgetss
5112 to maintain state across calls to the function.
5113
5114 lc holds the last significant character read and br is a bracket
5115 counter.
5116
5117 When an allow string is passed in we keep track of the string
5118 in state 1 and when the tag is closed check it against the
5119 allow string to see if we should allow it.
5120
5121 swm: Added ability to strip <?xml tags without assuming it PHP
5122 code.
5123 */
5124 PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, bool allow_tag_spaces)
5125 {
5126 char *tbuf, *tp, *rp, c, lc;
5127 const char *buf, *p, *end;
5128 int br, depth=0, in_q = 0;
5129 uint8_t state = 0;
5130 size_t pos;
5131 char *allow_free = NULL;
5132 char is_xml = 0;
5133
5134 buf = estrndup(rbuf, len);
5135 end = buf + len;
5136 lc = '\0';
5137 p = buf;
5138 rp = rbuf;
5139 br = 0;
5140 if (allow) {
5141 allow_free = zend_str_tolower_dup_ex(allow, allow_len);
5142 allow = allow_free ? allow_free : allow;
5143 tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
5144 tp = tbuf;
5145 } else {
5146 tbuf = tp = NULL;
5147 }
5148
5149 state_0:
5150 if (p >= end) {
5151 goto finish;
5152 }
5153 c = *p;
5154 switch (c) {
5155 case '\0':
5156 break;
5157 case '<':
5158 if (in_q) {
5159 break;
5160 }
5161 if (isspace(*(p + 1)) && !allow_tag_spaces) {
5162 *(rp++) = c;
5163 break;
5164 }
5165 lc = '<';
5166 state = 1;
5167 if (allow) {
5168 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5169 pos = tp - tbuf;
5170 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5171 tp = tbuf + pos;
5172 }
5173 *(tp++) = '<';
5174 }
5175 p++;
5176 goto state_1;
5177 case '>':
5178 if (depth) {
5179 depth--;
5180 break;
5181 }
5182
5183 if (in_q) {
5184 break;
5185 }
5186
5187 *(rp++) = c;
5188 break;
5189 default:
5190 *(rp++) = c;
5191 break;
5192 }
5193 p++;
5194 goto state_0;
5195
5196 state_1:
5197 if (p >= end) {
5198 goto finish;
5199 }
5200 c = *p;
5201 switch (c) {
5202 case '\0':
5203 break;
5204 case '<':
5205 if (in_q) {
5206 break;
5207 }
5208 if (isspace(*(p + 1)) && !allow_tag_spaces) {
5209 goto reg_char_1;
5210 }
5211 depth++;
5212 break;
5213 case '>':
5214 if (depth) {
5215 depth--;
5216 break;
5217 }
5218 if (in_q) {
5219 break;
5220 }
5221
5222 lc = '>';
5223 if (is_xml && p >= buf + 1 && *(p -1) == '-') {
5224 break;
5225 }
5226 in_q = state = is_xml = 0;
5227 if (allow) {
5228 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5229 pos = tp - tbuf;
5230 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5231 tp = tbuf + pos;
5232 }
5233 *(tp++) = '>';
5234 *tp='\0';
5235 if (php_tag_find(tbuf, tp-tbuf, allow)) {
5236 rp = zend_mempcpy(rp, tbuf, tp - tbuf);
5237 }
5238 tp = tbuf;
5239 }
5240 p++;
5241 goto state_0;
5242 case '"':
5243 case '\'':
5244 if (p != buf && (!in_q || *p == in_q)) {
5245 if (in_q) {
5246 in_q = 0;
5247 } else {
5248 in_q = *p;
5249 }
5250 }
5251 goto reg_char_1;
5252 case '!':
5253 /* JavaScript & Other HTML scripting languages */
5254 if (p >= buf + 1 && *(p-1) == '<') {
5255 state = 3;
5256 lc = c;
5257 p++;
5258 goto state_3;
5259 } else {
5260 goto reg_char_1;
5261 }
5262 break;
5263 case '?':
5264 if (p >= buf + 1 && *(p-1) == '<') {
5265 br=0;
5266 state = 2;
5267 p++;
5268 goto state_2;
5269 } else {
5270 goto reg_char_1;
5271 }
5272 break;
5273 default:
5274 reg_char_1:
5275 if (allow) {
5276 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5277 pos = tp - tbuf;
5278 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5279 tp = tbuf + pos;
5280 }
5281 *(tp++) = c;
5282 }
5283 break;
5284 }
5285 p++;
5286 goto state_1;
5287
5288 state_2:
5289 if (p >= end) {
5290 goto finish;
5291 }
5292 c = *p;
5293 switch (c) {
5294 case '(':
5295 if (lc != '"' && lc != '\'') {
5296 lc = '(';
5297 br++;
5298 }
5299 break;
5300 case ')':
5301 if (lc != '"' && lc != '\'') {
5302 lc = ')';
5303 br--;
5304 }
5305 break;
5306 case '>':
5307 if (depth) {
5308 depth--;
5309 break;
5310 }
5311 if (in_q) {
5312 break;
5313 }
5314
5315 if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
5316 in_q = state = 0;
5317 tp = tbuf;
5318 p++;
5319 goto state_0;
5320 }
5321 break;
5322 case '"':
5323 case '\'':
5324 if (p >= buf + 1 && *(p-1) != '\\') {
5325 if (lc == c) {
5326 lc = '\0';
5327 } else if (lc != '\\') {
5328 lc = c;
5329 }
5330 if (p != buf && (!in_q || *p == in_q)) {
5331 if (in_q) {
5332 in_q = 0;
5333 } else {
5334 in_q = *p;
5335 }
5336 }
5337 }
5338 break;
5339 case 'l':
5340 case 'L':
5341 /* swm: If we encounter '<?xml' then we shouldn't be in
5342 * state == 2 (PHP). Switch back to HTML.
5343 */
5344 if (state == 2 && p > buf+4
5345 && (*(p-1) == 'm' || *(p-1) == 'M')
5346 && (*(p-2) == 'x' || *(p-2) == 'X')
5347 && *(p-3) == '?'
5348 && *(p-4) == '<') {
5349 state = 1; is_xml=1;
5350 p++;
5351 goto state_1;
5352 }
5353 break;
5354 default:
5355 break;
5356 }
5357 p++;
5358 goto state_2;
5359
5360 state_3:
5361 if (p >= end) {
5362 goto finish;
5363 }
5364 c = *p;
5365 switch (c) {
5366 case '>':
5367 if (depth) {
5368 depth--;
5369 break;
5370 }
5371 if (in_q) {
5372 break;
5373 }
5374 in_q = state = 0;
5375 tp = tbuf;
5376 p++;
5377 goto state_0;
5378 case '"':
5379 case '\'':
5380 if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
5381 if (in_q) {
5382 in_q = 0;
5383 } else {
5384 in_q = *p;
5385 }
5386 }
5387 break;
5388 case '-':
5389 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
5390 state = 4;
5391 p++;
5392 goto state_4;
5393 }
5394 break;
5395 case 'E':
5396 case 'e':
5397 /* !DOCTYPE exception */
5398 if (p > buf+6
5399 && (*(p-1) == 'p' || *(p-1) == 'P')
5400 && (*(p-2) == 'y' || *(p-2) == 'Y')
5401 && (*(p-3) == 't' || *(p-3) == 'T')
5402 && (*(p-4) == 'c' || *(p-4) == 'C')
5403 && (*(p-5) == 'o' || *(p-5) == 'O')
5404 && (*(p-6) == 'd' || *(p-6) == 'D')) {
5405 state = 1;
5406 p++;
5407 goto state_1;
5408 }
5409 break;
5410 default:
5411 break;
5412 }
5413 p++;
5414 goto state_3;
5415
5416 state_4:
5417 while (p < end) {
5418 c = *p;
5419 if (c == '>' && !in_q) {
5420 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
5421 in_q = state = 0;
5422 tp = tbuf;
5423 p++;
5424 goto state_0;
5425 }
5426 }
5427 p++;
5428 }
5429
5430 finish:
5431 if (rp < rbuf + len) {
5432 *rp = '\0';
5433 }
5434 efree((void *)buf);
5435 if (tbuf) {
5436 efree(tbuf);
5437 }
5438 if (allow_free) {
5439 efree(allow_free);
5440 }
5441
5442 return (size_t)(rp - rbuf);
5443 }
5444 /* }}} */
5445
5446 /* {{{ Parse a CSV string into an array */
5447 PHP_FUNCTION(str_getcsv)
5448 {
5449 zend_string *str;
5450 char delim = ',', enc = '"';
5451 int esc = (unsigned char) '\\';
5452 char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
5453 size_t delim_len = 0, enc_len = 0, esc_len = 0;
5454
5455 ZEND_PARSE_PARAMETERS_START(1, 4)
5456 Z_PARAM_STR(str)
5457 Z_PARAM_OPTIONAL
5458 Z_PARAM_STRING(delim_str, delim_len)
5459 Z_PARAM_STRING(enc_str, enc_len)
5460 Z_PARAM_STRING(esc_str, esc_len)
5461 ZEND_PARSE_PARAMETERS_END();
5462
5463 delim = delim_len ? delim_str[0] : delim;
5464 enc = enc_len ? enc_str[0] : enc;
5465 if (esc_str != NULL) {
5466 esc = esc_len ? (unsigned char) esc_str[0] : PHP_CSV_NO_ESCAPE;
5467 }
5468
5469 HashTable *values = php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str));
5470 if (values == NULL) {
5471 values = php_bc_fgetcsv_empty_line();
5472 }
5473 RETURN_ARR(values);
5474 }
5475 /* }}} */
5476
5477 /* {{{ Returns the input string repeat mult times */
5478 PHP_FUNCTION(str_repeat)
5479 {
5480 zend_string *input_str; /* Input string */
5481 zend_long mult; /* Multiplier */
5482 zend_string *result; /* Resulting string */
5483 size_t result_len; /* Length of the resulting string */
5484
5485 ZEND_PARSE_PARAMETERS_START(2, 2)
5486 Z_PARAM_STR(input_str)
5487 Z_PARAM_LONG(mult)
5488 ZEND_PARSE_PARAMETERS_END();
5489
5490 if (mult < 0) {
5491 zend_argument_value_error(2, "must be greater than or equal to 0");
5492 RETURN_THROWS();
5493 }
5494
5495 /* Don't waste our time if it's empty */
5496 /* ... or if the multiplier is zero */
5497 if (ZSTR_LEN(input_str) == 0 || mult == 0)
5498 RETURN_EMPTY_STRING();
5499
5500 /* Initialize the result string */
5501 result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
5502 result_len = ZSTR_LEN(input_str) * mult;
5503 ZSTR_COPY_CONCAT_PROPERTIES(result, input_str);
5504
5505 /* Heavy optimization for situations where input string is 1 byte long */
5506 if (ZSTR_LEN(input_str) == 1) {
5507 memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
5508 } else {
5509 const char *s, *ee;
5510 char *e;
5511 ptrdiff_t l=0;
5512 memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
5513 s = ZSTR_VAL(result);
5514 e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
5515 ee = ZSTR_VAL(result) + result_len;
5516
5517 while (e<ee) {
5518 l = (e-s) < (ee-e) ? (e-s) : (ee-e);
5519 memmove(e, s, l);
5520 e += l;
5521 }
5522 }
5523
5524 ZSTR_VAL(result)[result_len] = '\0';
5525
5526 RETURN_NEW_STR(result);
5527 }
5528 /* }}} */
5529
5530 /* {{{ Returns info about what characters are used in input */
5531 PHP_FUNCTION(count_chars)
5532 {
5533 zend_string *input;
5534 int chars[256];
5535 zend_long mymode=0;
5536 const unsigned char *buf;
5537 int inx;
5538 char retstr[256];
5539 size_t retlen=0;
5540 size_t tmp = 0;
5541
5542 ZEND_PARSE_PARAMETERS_START(1, 2)
5543 Z_PARAM_STR(input)
5544 Z_PARAM_OPTIONAL
5545 Z_PARAM_LONG(mymode)
5546 ZEND_PARSE_PARAMETERS_END();
5547
5548 if (mymode < 0 || mymode > 4) {
5549 zend_argument_value_error(2, "must be between 0 and 4 (inclusive)");
5550 RETURN_THROWS();
5551 }
5552
5553 buf = (const unsigned char *) ZSTR_VAL(input);
5554 memset((void*) chars, 0, sizeof(chars));
5555
5556 while (tmp < ZSTR_LEN(input)) {
5557 chars[*buf]++;
5558 buf++;
5559 tmp++;
5560 }
5561
5562 if (mymode < 3) {
5563 array_init(return_value);
5564 }
5565
5566 for (inx = 0; inx < 256; inx++) {
5567 switch (mymode) {
5568 case 0:
5569 add_index_long(return_value, inx, chars[inx]);
5570 break;
5571 case 1:
5572 if (chars[inx] != 0) {
5573 add_index_long(return_value, inx, chars[inx]);
5574 }
5575 break;
5576 case 2:
5577 if (chars[inx] == 0) {
5578 add_index_long(return_value, inx, chars[inx]);
5579 }
5580 break;
5581 case 3:
5582 if (chars[inx] != 0) {
5583 retstr[retlen++] = inx;
5584 }
5585 break;
5586 case 4:
5587 if (chars[inx] == 0) {
5588 retstr[retlen++] = inx;
5589 }
5590 break;
5591 }
5592 }
5593
5594 if (mymode == 3 || mymode == 4) {
5595 RETURN_STRINGL(retstr, retlen);
5596 }
5597 }
5598 /* }}} */
5599
5600 /* {{{ php_strnatcmp */
5601 static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, bool is_case_insensitive)
5602 {
5603 zend_string *s1, *s2;
5604
5605 ZEND_PARSE_PARAMETERS_START(2, 2)
5606 Z_PARAM_STR(s1)
5607 Z_PARAM_STR(s2)
5608 ZEND_PARSE_PARAMETERS_END();
5609
5610 RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
5611 ZSTR_VAL(s2), ZSTR_LEN(s2),
5612 is_case_insensitive));
5613 }
5614 /* }}} */
5615
5616 /* {{{ Returns the result of string comparison using 'natural' algorithm */
5617 PHP_FUNCTION(strnatcmp)
5618 {
5619 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
5620 }
5621 /* }}} */
5622
5623 /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5624 PHP_FUNCTION(strnatcasecmp)
5625 {
5626 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5627 }
5628 /* }}} */
5629
5630 /* {{{ Returns numeric formatting information based on the current locale */
5631 PHP_FUNCTION(localeconv)
5632 {
5633 zval grouping, mon_grouping;
5634 size_t len, i;
5635
5636 ZEND_PARSE_PARAMETERS_NONE();
5637
5638 array_init(return_value);
5639 array_init(&grouping);
5640 array_init(&mon_grouping);
5641
5642 {
5643 struct lconv currlocdata;
5644
5645 localeconv_r( &currlocdata );
5646
5647 /* Grab the grouping data out of the array */
5648 len = strlen(currlocdata.grouping);
5649
5650 for (i = 0; i < len; i++) {
5651 add_index_long(&grouping, i, currlocdata.grouping[i]);
5652 }
5653
5654 /* Grab the monetary grouping data out of the array */
5655 len = strlen(currlocdata.mon_grouping);
5656
5657 for (i = 0; i < len; i++) {
5658 add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
5659 }
5660
5661 add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
5662 add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
5663 add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
5664 add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
5665 add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
5666 add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
5667 add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
5668 add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
5669 add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
5670 add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
5671 add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
5672 add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
5673 add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
5674 add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
5675 add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
5676 add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
5677 }
5678
5679 zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
5680 zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
5681 }
5682 /* }}} */
5683
5684 /* {{{ Returns the number of times a substring occurs in the string */
5685 PHP_FUNCTION(substr_count)
5686 {
5687 char *haystack, *needle;
5688 zend_long offset = 0, length = 0;
5689 bool length_is_null = 1;
5690 zend_long count;
5691 size_t haystack_len, needle_len;
5692 const char *p, *endp;
5693
5694 ZEND_PARSE_PARAMETERS_START(2, 4)
5695 Z_PARAM_STRING(haystack, haystack_len)
5696 Z_PARAM_STRING(needle, needle_len)
5697 Z_PARAM_OPTIONAL
5698 Z_PARAM_LONG(offset)
5699 Z_PARAM_LONG_OR_NULL(length, length_is_null)
5700 ZEND_PARSE_PARAMETERS_END();
5701
5702 if (needle_len == 0) {
5703 zend_argument_value_error(2, "cannot be empty");
5704 RETURN_THROWS();
5705 }
5706
5707 p = haystack;
5708
5709 if (offset) {
5710 if (offset < 0) {
5711 offset += (zend_long)haystack_len;
5712 }
5713 if ((offset < 0) || ((size_t)offset > haystack_len)) {
5714 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5715 RETURN_THROWS();
5716 }
5717 p += offset;
5718 haystack_len -= offset;
5719 }
5720
5721 if (!length_is_null) {
5722 if (length < 0) {
5723 length += haystack_len;
5724 }
5725 if (length < 0 || ((size_t)length > haystack_len)) {
5726 zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
5727 RETURN_THROWS();
5728 }
5729 } else {
5730 length = haystack_len;
5731 }
5732
5733 if (needle_len == 1) {
5734 count = count_chars(p, length, needle[0]);
5735 } else {
5736 count = 0;
5737 endp = p + length;
5738 while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
5739 p += needle_len;
5740 count++;
5741 }
5742 }
5743
5744 RETURN_LONG(count);
5745 }
5746 /* }}} */
5747
5748 /* {{{ Returns input string padded on the left or right to specified length with pad_string */
5749 PHP_FUNCTION(str_pad)
5750 {
5751 /* Input arguments */
5752 zend_string *input; /* Input string */
5753 zend_long pad_length; /* Length to pad to */
5754
5755 /* Helper variables */
5756 size_t num_pad_chars; /* Number of padding characters (total - input size) */
5757 char *pad_str = " "; /* Pointer to padding string */
5758 size_t pad_str_len = 1;
5759 zend_long pad_type_val = PHP_STR_PAD_RIGHT; /* The padding type value */
5760 size_t i, left_pad=0, right_pad=0;
5761 zend_string *result = NULL; /* Resulting string */
5762
5763 ZEND_PARSE_PARAMETERS_START(2, 4)
5764 Z_PARAM_STR(input)
5765 Z_PARAM_LONG(pad_length)
5766 Z_PARAM_OPTIONAL
5767 Z_PARAM_STRING(pad_str, pad_str_len)
5768 Z_PARAM_LONG(pad_type_val)
5769 ZEND_PARSE_PARAMETERS_END();
5770
5771 /* If resulting string turns out to be shorter than input string,
5772 we simply copy the input and return. */
5773 if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
5774 RETURN_STR_COPY(input);
5775 }
5776
5777 if (pad_str_len == 0) {
5778 zend_argument_value_error(3, "must be a non-empty string");
5779 RETURN_THROWS();
5780 }
5781
5782 if (pad_type_val < PHP_STR_PAD_LEFT || pad_type_val > PHP_STR_PAD_BOTH) {
5783 zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
5784 RETURN_THROWS();
5785 }
5786
5787 num_pad_chars = pad_length - ZSTR_LEN(input);
5788 result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
5789 ZSTR_LEN(result) = 0;
5790
5791 /* We need to figure out the left/right padding lengths. */
5792 switch (pad_type_val) {
5793 case PHP_STR_PAD_RIGHT:
5794 left_pad = 0;
5795 right_pad = num_pad_chars;
5796 break;
5797
5798 case PHP_STR_PAD_LEFT:
5799 left_pad = num_pad_chars;
5800 right_pad = 0;
5801 break;
5802
5803 case PHP_STR_PAD_BOTH:
5804 left_pad = num_pad_chars / 2;
5805 right_pad = num_pad_chars - left_pad;
5806 break;
5807 }
5808
5809 /* First we pad on the left. */
5810 for (i = 0; i < left_pad; i++)
5811 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5812
5813 /* Then we copy the input string. */
5814 memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
5815 ZSTR_LEN(result) += ZSTR_LEN(input);
5816
5817 /* Finally, we pad on the right. */
5818 for (i = 0; i < right_pad; i++)
5819 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5820
5821 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
5822
5823 RETURN_NEW_STR(result);
5824 }
5825 /* }}} */
5826
5827 /* {{{ Implements an ANSI C compatible sscanf */
5828 PHP_FUNCTION(sscanf)
5829 {
5830 zval *args = NULL;
5831 char *str, *format;
5832 size_t str_len, format_len;
5833 int result, num_args = 0;
5834
5835 ZEND_PARSE_PARAMETERS_START(2, -1)
5836 Z_PARAM_STRING(str, str_len)
5837 Z_PARAM_STRING(format, format_len)
5838 Z_PARAM_VARIADIC('*', args, num_args)
5839 ZEND_PARSE_PARAMETERS_END();
5840
5841 result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
5842
5843 if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
5844 WRONG_PARAM_COUNT;
5845 }
5846 }
5847 /* }}} */
5848
5849 /* static zend_string *php_str_rot13(zend_string *str) {{{ */
5850 static zend_string *php_str_rot13(zend_string *str)
5851 {
5852 zend_string *ret;
5853 const char *p, *e;
5854 char *target;
5855
5856 if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
5857 return ZSTR_EMPTY_ALLOC();
5858 }
5859
5860 ret = zend_string_alloc(ZSTR_LEN(str), 0);
5861
5862 p = ZSTR_VAL(str);
5863 e = p + ZSTR_LEN(str);
5864 target = ZSTR_VAL(ret);
5865
5866 #ifdef __SSE2__
5867 if (e - p > 15) {
5868 const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
5869 const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
5870 const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
5871 const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
5872 const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
5873 const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
5874 const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
5875 const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
5876 const __m128i add = _mm_set1_epi8(13);
5877 const __m128i sub = _mm_set1_epi8(-13);
5878
5879 do {
5880 __m128i in, gt, lt, cmp, delta;
5881
5882 delta = _mm_setzero_si128();
5883 in = _mm_loadu_si128((__m128i *)p);
5884
5885 gt = _mm_cmpgt_epi8(in, a_minus_1);
5886 lt = _mm_cmplt_epi8(in, m_plus_1);
5887 cmp = _mm_and_si128(lt, gt);
5888 if (_mm_movemask_epi8(cmp)) {
5889 cmp = _mm_and_si128(cmp, add);
5890 delta = _mm_or_si128(delta, cmp);
5891 }
5892
5893 gt = _mm_cmpgt_epi8(in, n_minus_1);
5894 lt = _mm_cmplt_epi8(in, z_plus_1);
5895 cmp = _mm_and_si128(lt, gt);
5896 if (_mm_movemask_epi8(cmp)) {
5897 cmp = _mm_and_si128(cmp, sub);
5898 delta = _mm_or_si128(delta, cmp);
5899 }
5900
5901 gt = _mm_cmpgt_epi8(in, A_minus_1);
5902 lt = _mm_cmplt_epi8(in, M_plus_1);
5903 cmp = _mm_and_si128(lt, gt);
5904 if (_mm_movemask_epi8(cmp)) {
5905 cmp = _mm_and_si128(cmp, add);
5906 delta = _mm_or_si128(delta, cmp);
5907 }
5908
5909 gt = _mm_cmpgt_epi8(in, N_minus_1);
5910 lt = _mm_cmplt_epi8(in, Z_plus_1);
5911 cmp = _mm_and_si128(lt, gt);
5912 if (_mm_movemask_epi8(cmp)) {
5913 cmp = _mm_and_si128(cmp, sub);
5914 delta = _mm_or_si128(delta, cmp);
5915 }
5916
5917 in = _mm_add_epi8(in, delta);
5918 _mm_storeu_si128((__m128i *)target, in);
5919
5920 p += 16;
5921 target += 16;
5922 } while (e - p > 15);
5923 }
5924 #endif
5925
5926 while (p < e) {
5927 if (*p >= 'a' && *p <= 'z') {
5928 *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
5929 } else if (*p >= 'A' && *p <= 'Z') {
5930 *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
5931 } else {
5932 *target++ = *p++;
5933 }
5934 }
5935
5936 *target = '\0';
5937
5938 return ret;
5939 }
5940 /* }}} */
5941
5942 /* {{{ Perform the rot13 transform on a string */
5943 PHP_FUNCTION(str_rot13)
5944 {
5945 zend_string *arg;
5946
5947 ZEND_PARSE_PARAMETERS_START(1, 1)
5948 Z_PARAM_STR(arg)
5949 ZEND_PARSE_PARAMETERS_END();
5950
5951 RETURN_STR(php_str_rot13(arg));
5952 }
5953 /* }}} */
5954
5955 /* {{{ php_binary_string_shuffle */
5956 PHPAPI bool php_binary_string_shuffle(php_random_algo_with_state engine, char *str, zend_long len) /* {{{ */
5957 {
5958 const php_random_algo *algo = engine.algo;
5959 void *state = engine.state;
5960
5961 int64_t n_elems, rnd_idx, n_left;
5962 char temp;
5963
5964 /* The implementation is stolen from array_data_shuffle */
5965 /* Thus the characteristics of the randomization are the same */
5966 n_elems = len;
5967
5968 if (n_elems <= 1) {
5969 return true;
5970 }
5971
5972 n_left = n_elems;
5973
5974 while (--n_left) {
5975 rnd_idx = algo->range(state, 0, n_left);
5976 if (EG(exception)) {
5977 return false;
5978 }
5979 if (rnd_idx != n_left) {
5980 temp = str[n_left];
5981 str[n_left] = str[rnd_idx];
5982 str[rnd_idx] = temp;
5983 }
5984 }
5985
5986 return true;
5987 }
5988 /* }}} */
5989
5990 /* {{{ Shuffles string. One permutation of all possible is created */
5991 PHP_FUNCTION(str_shuffle)
5992 {
5993 zend_string *arg;
5994
5995 ZEND_PARSE_PARAMETERS_START(1, 1)
5996 Z_PARAM_STR(arg)
5997 ZEND_PARSE_PARAMETERS_END();
5998
5999 RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
6000 if (Z_STRLEN_P(return_value) > 1) {
6001 php_binary_string_shuffle(
6002 php_random_default_engine(),
6003 Z_STRVAL_P(return_value),
6004 Z_STRLEN_P(return_value)
6005 );
6006 }
6007 }
6008 /* }}} */
6009
6010 /* {{{ Counts the number of words inside a string. If format of 1 is specified,
6011 then the function will return an array containing all the words
6012 found inside the string. If format of 2 is specified, then the function
6013 will return an associated array where the position of the word is the key
6014 and the word itself is the value.
6015 For the purpose of this function, 'word' is defined as a locale dependent
6016 string containing alphabetic characters, which also may contain, but not start
6017 with "'" and "-" characters.
6018 */
6019 PHP_FUNCTION(str_word_count)
6020 {
6021 zend_string *str;
6022 char *char_list = NULL, ch[256];
6023 const char *p, *e, *s;
6024 size_t char_list_len = 0, word_count = 0;
6025 zend_long type = 0;
6026
6027 ZEND_PARSE_PARAMETERS_START(1, 3)
6028 Z_PARAM_STR(str)
6029 Z_PARAM_OPTIONAL
6030 Z_PARAM_LONG(type)
6031 Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
6032 ZEND_PARSE_PARAMETERS_END();
6033
6034 switch(type) {
6035 case 1:
6036 case 2:
6037 array_init(return_value);
6038 if (!ZSTR_LEN(str)) {
6039 return;
6040 }
6041 break;
6042 case 0:
6043 if (!ZSTR_LEN(str)) {
6044 RETURN_LONG(0);
6045 }
6046 /* nothing to be done */
6047 break;
6048 default:
6049 zend_argument_value_error(2, "must be a valid format value");
6050 RETURN_THROWS();
6051 }
6052
6053 if (char_list) {
6054 php_charmask((const unsigned char *) char_list, char_list_len, ch);
6055 }
6056
6057 p = ZSTR_VAL(str);
6058 e = ZSTR_VAL(str) + ZSTR_LEN(str);
6059
6060 /* first character cannot be ' or -, unless explicitly allowed by the user */
6061 if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
6062 p++;
6063 }
6064 /* last character cannot be -, unless explicitly allowed by the user */
6065 if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
6066 e--;
6067 }
6068
6069 while (p < e) {
6070 s = p;
6071 while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
6072 p++;
6073 }
6074 if (p > s) {
6075 switch (type)
6076 {
6077 case 1:
6078 add_next_index_stringl(return_value, s, p - s);
6079 break;
6080 case 2:
6081 add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
6082 break;
6083 default:
6084 word_count++;
6085 break;
6086 }
6087 }
6088 p++;
6089 }
6090
6091 if (!type) {
6092 RETURN_LONG(word_count);
6093 }
6094 }
6095
6096 /* }}} */
6097
6098 /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
6099 PHP_FUNCTION(str_split)
6100 {
6101 zend_string *str;
6102 zend_long split_length = 1;
6103 const char *p;
6104 size_t n_reg_segments;
6105
6106 ZEND_PARSE_PARAMETERS_START(1, 2)
6107 Z_PARAM_STR(str)
6108 Z_PARAM_OPTIONAL
6109 Z_PARAM_LONG(split_length)
6110 ZEND_PARSE_PARAMETERS_END();
6111
6112 if (split_length <= 0) {
6113 zend_argument_value_error(2, "must be greater than 0");
6114 RETURN_THROWS();
6115 }
6116
6117 if ((size_t)split_length >= ZSTR_LEN(str)) {
6118 if (0 == ZSTR_LEN(str)) {
6119 RETURN_EMPTY_ARRAY();
6120 }
6121
6122 array_init_size(return_value, 1);
6123 add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
6124 return;
6125 }
6126
6127 array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
6128
6129 n_reg_segments = ZSTR_LEN(str) / split_length;
6130 p = ZSTR_VAL(str);
6131
6132 while (n_reg_segments-- > 0) {
6133 add_next_index_stringl(return_value, p, split_length);
6134 p += split_length;
6135 }
6136
6137 if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
6138 add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
6139 }
6140 }
6141 /* }}} */
6142
6143 /* {{{ Search a string for any of a set of characters */
6144 PHP_FUNCTION(strpbrk)
6145 {
6146 zend_string *haystack, *char_list;
6147
6148 ZEND_PARSE_PARAMETERS_START(2, 2)
6149 Z_PARAM_STR(haystack)
6150 Z_PARAM_STR(char_list)
6151 ZEND_PARSE_PARAMETERS_END();
6152
6153 if (!ZSTR_LEN(char_list)) {
6154 zend_argument_value_error(2, "must be a non-empty string");
6155 RETURN_THROWS();
6156 }
6157
6158 size_t shift = php_strcspn(
6159 ZSTR_VAL(haystack),
6160 ZSTR_VAL(char_list),
6161 ZSTR_VAL(haystack) + ZSTR_LEN(haystack),
6162 ZSTR_VAL(char_list) + ZSTR_LEN(char_list)
6163 );
6164 if (shift < ZSTR_LEN(haystack)) {
6165 RETURN_STRINGL(ZSTR_VAL(haystack) + shift, ZSTR_LEN(haystack) - shift);
6166 }
6167
6168 RETURN_FALSE;
6169 }
6170 /* }}} */
6171
6172 /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
6173 PHP_FUNCTION(substr_compare)
6174 {
6175 zend_string *s1, *s2;
6176 zend_long offset, len=0;
6177 bool len_is_default=1;
6178 bool cs=0;
6179 size_t cmp_len;
6180
6181 ZEND_PARSE_PARAMETERS_START(3, 5)
6182 Z_PARAM_STR(s1)
6183 Z_PARAM_STR(s2)
6184 Z_PARAM_LONG(offset)
6185 Z_PARAM_OPTIONAL
6186 Z_PARAM_LONG_OR_NULL(len, len_is_default)
6187 Z_PARAM_BOOL(cs)
6188 ZEND_PARSE_PARAMETERS_END();
6189
6190 if (!len_is_default && len <= 0) {
6191 if (len == 0) {
6192 RETURN_LONG(0L);
6193 } else {
6194 zend_argument_value_error(4, "must be greater than or equal to 0");
6195 RETURN_THROWS();
6196 }
6197 }
6198
6199 if (offset < 0) {
6200 offset = ZSTR_LEN(s1) + offset;
6201 offset = (offset < 0) ? 0 : offset;
6202 }
6203
6204 if ((size_t)offset > ZSTR_LEN(s1)) {
6205 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
6206 RETURN_THROWS();
6207 }
6208
6209 cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
6210
6211 if (!cs) {
6212 RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
6213 } else {
6214 RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
6215 }
6216 }
6217 /* }}} */
6218
6219 /* {{{ */
6220 static zend_string *php_utf8_encode(const char *s, size_t len)
6221 {
6222 size_t pos = len;
6223 zend_string *str;
6224 unsigned char c;
6225
6226 str = zend_string_safe_alloc(len, 2, 0, 0);
6227 ZSTR_LEN(str) = 0;
6228 while (pos > 0) {
6229 /* The lower 256 codepoints of Unicode are identical to Latin-1,
6230 * so we don't need to do any mapping here. */
6231 c = (unsigned char)(*s);
6232 if (c < 0x80) {
6233 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
6234 /* We only account for the single-byte and two-byte cases because
6235 * we're only dealing with the first 256 Unicode codepoints. */
6236 } else {
6237 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
6238 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
6239 }
6240 pos--;
6241 s++;
6242 }
6243 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6244 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6245 return str;
6246 }
6247 /* }}} */
6248
6249 /* {{{ */
6250 static zend_string *php_utf8_decode(const char *s, size_t len)
6251 {
6252 size_t pos = 0;
6253 unsigned int c;
6254 zend_string *str;
6255
6256 str = zend_string_alloc(len, 0);
6257 ZSTR_LEN(str) = 0;
6258 while (pos < len) {
6259 zend_result status = FAILURE;
6260 c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
6261
6262 /* The lower 256 codepoints of Unicode are identical to Latin-1,
6263 * so we don't need to do any mapping here beyond replacing non-Latin-1
6264 * characters. */
6265 if (status == FAILURE || c > 0xFFU) {
6266 c = '?';
6267 }
6268
6269 ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
6270 }
6271 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6272 if (ZSTR_LEN(str) < len) {
6273 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6274 }
6275
6276 return str;
6277 }
6278 /* }}} */
6279
6280 /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
6281 PHP_FUNCTION(utf8_encode)
6282 {
6283 char *arg;
6284 size_t arg_len;
6285
6286 ZEND_PARSE_PARAMETERS_START(1, 1)
6287 Z_PARAM_STRING(arg, arg_len)
6288 ZEND_PARSE_PARAMETERS_END();
6289
6290 RETURN_STR(php_utf8_encode(arg, arg_len));
6291 }
6292 /* }}} */
6293
6294 /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
6295 PHP_FUNCTION(utf8_decode)
6296 {
6297 char *arg;
6298 size_t arg_len;
6299
6300 ZEND_PARSE_PARAMETERS_START(1, 1)
6301 Z_PARAM_STRING(arg, arg_len)
6302 ZEND_PARSE_PARAMETERS_END();
6303
6304 RETURN_STR(php_utf8_decode(arg, arg_len));
6305 }
6306 /* }}} */
6307