1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Rasmus Lerdorf <rasmus@php.net> |
14 | Stig Sæther Bakken <ssb@php.net> |
15 | Zeev Suraski <zeev@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include <stdio.h>
20 #include "php.h"
21 #include "php_string.h"
22 #include "php_variables.h"
23 #include <locale.h>
24 #ifdef HAVE_LANGINFO_H
25 # include <langinfo.h>
26 #endif
27
28 #ifdef HAVE_LIBINTL
29 # include <libintl.h> /* For LC_MESSAGES */
30 #endif
31
32 #include "scanf.h"
33 #include "zend_API.h"
34 #include "zend_execute.h"
35 #include "basic_functions.h"
36 #include "zend_smart_str.h"
37 #include <Zend/zend_exceptions.h>
38 #ifdef ZTS
39 #include "TSRM.h"
40 #endif
41
42 /* For str_getcsv() support */
43 #include "ext/standard/file.h"
44 /* For php_next_utf8_char() */
45 #include "ext/standard/html.h"
46 #include "ext/random/php_random.h"
47
48 #ifdef __SSE2__
49 #include <emmintrin.h>
50 #include "Zend/zend_bitset.h"
51 #endif
52
53 /* this is read-only, so it's ok */
54 ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
55
56 /* localeconv mutex */
57 #ifdef ZTS
58 static MUTEX_T locale_mutex = NULL;
59 #endif
60
61 /* {{{ php_bin2hex */
php_bin2hex(const unsigned char * old,const size_t oldlen)62 static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
63 {
64 zend_string *result;
65 size_t i, j;
66
67 result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
68
69 for (i = j = 0; i < oldlen; i++) {
70 ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
71 ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
72 }
73 ZSTR_VAL(result)[j] = '\0';
74
75 return result;
76 }
77 /* }}} */
78
79 /* {{{ php_hex2bin */
php_hex2bin(const unsigned char * old,const size_t oldlen)80 static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
81 {
82 size_t target_length = oldlen >> 1;
83 zend_string *str = zend_string_alloc(target_length, 0);
84 unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
85 size_t i, j;
86
87 for (i = j = 0; i < target_length; i++) {
88 unsigned char c = old[j++];
89 unsigned char l = c & ~0x20;
90 int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
91 unsigned char d;
92
93 /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
94 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
95 d = (l - 0x10 - 0x27 * is_letter) << 4;
96 } else {
97 zend_string_efree(str);
98 return NULL;
99 }
100 c = old[j++];
101 l = c & ~0x20;
102 is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
103 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
104 d |= l - 0x10 - 0x27 * is_letter;
105 } else {
106 zend_string_efree(str);
107 return NULL;
108 }
109 ret[i] = d;
110 }
111 ret[i] = '\0';
112
113 return str;
114 }
115 /* }}} */
116
117 /* {{{ localeconv_r
118 * glibc's localeconv is not reentrant, so lets make it so ... sorta */
localeconv_r(struct lconv * out)119 PHPAPI struct lconv *localeconv_r(struct lconv *out)
120 {
121
122 #ifdef ZTS
123 tsrm_mutex_lock( locale_mutex );
124 #endif
125
126 /* cur->locinfo is struct __crt_locale_info which implementation is
127 hidden in vc14. TODO revisit this and check if a workaround available
128 and needed. */
129 #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
130 {
131 /* Even with the enabled per thread locale, localeconv
132 won't check any locale change in the master thread. */
133 _locale_t cur = _get_current_locale();
134 *out = *cur->locinfo->lconv;
135 _free_locale(cur);
136 }
137 #else
138 /* localeconv doesn't return an error condition */
139 *out = *localeconv();
140 #endif
141
142 #ifdef ZTS
143 tsrm_mutex_unlock( locale_mutex );
144 #endif
145
146 return out;
147 }
148 /* }}} */
149
150 #ifdef ZTS
151 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(localeconv)152 PHP_MINIT_FUNCTION(localeconv)
153 {
154 locale_mutex = tsrm_mutex_alloc();
155 return SUCCESS;
156 }
157 /* }}} */
158
159 /* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(localeconv)160 PHP_MSHUTDOWN_FUNCTION(localeconv)
161 {
162 tsrm_mutex_free( locale_mutex );
163 locale_mutex = NULL;
164 return SUCCESS;
165 }
166 /* }}} */
167 #endif
168
169 /* {{{ Converts the binary representation of data to hex */
PHP_FUNCTION(bin2hex)170 PHP_FUNCTION(bin2hex)
171 {
172 zend_string *result;
173 zend_string *data;
174
175 ZEND_PARSE_PARAMETERS_START(1, 1)
176 Z_PARAM_STR(data)
177 ZEND_PARSE_PARAMETERS_END();
178
179 result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
180
181 RETURN_STR(result);
182 }
183 /* }}} */
184
185 /* {{{ Converts the hex representation of data to binary */
PHP_FUNCTION(hex2bin)186 PHP_FUNCTION(hex2bin)
187 {
188 zend_string *result, *data;
189
190 ZEND_PARSE_PARAMETERS_START(1, 1)
191 Z_PARAM_STR(data)
192 ZEND_PARSE_PARAMETERS_END();
193
194 if (ZSTR_LEN(data) % 2 != 0) {
195 php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
196 RETURN_FALSE;
197 }
198
199 result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
200
201 if (!result) {
202 php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
203 RETURN_FALSE;
204 }
205
206 RETVAL_STR(result);
207 }
208 /* }}} */
209
php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS,bool is_strspn)210 static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, bool is_strspn) /* {{{ */
211 {
212 zend_string *s11, *s22;
213 zend_long start = 0, len = 0;
214 bool len_is_null = 1;
215
216 ZEND_PARSE_PARAMETERS_START(2, 4)
217 Z_PARAM_STR(s11)
218 Z_PARAM_STR(s22)
219 Z_PARAM_OPTIONAL
220 Z_PARAM_LONG(start)
221 Z_PARAM_LONG_OR_NULL(len, len_is_null)
222 ZEND_PARSE_PARAMETERS_END();
223
224 size_t remain_len = ZSTR_LEN(s11);
225 if (start < 0) {
226 start += remain_len;
227 if (start < 0) {
228 start = 0;
229 }
230 } else if ((size_t) start > remain_len) {
231 start = remain_len;
232 }
233
234 remain_len -= start;
235 if (!len_is_null) {
236 if (len < 0) {
237 len += remain_len;
238 if (len < 0) {
239 len = 0;
240 }
241 } else if ((size_t) len > remain_len) {
242 len = remain_len;
243 }
244 } else {
245 len = remain_len;
246 }
247
248 if (len == 0) {
249 RETURN_LONG(0);
250 }
251
252 if (is_strspn) {
253 RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
254 ZSTR_VAL(s22) /*str2_start*/,
255 ZSTR_VAL(s11) + start + len /*str1_end*/,
256 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
257 } else {
258 RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
259 ZSTR_VAL(s22) /*str2_start*/,
260 ZSTR_VAL(s11) + start + len /*str1_end*/,
261 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
262 }
263 }
264 /* }}} */
265
266 /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
PHP_FUNCTION(strspn)267 PHP_FUNCTION(strspn)
268 {
269 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, /* is_strspn */ true);
270 }
271 /* }}} */
272
273 /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
PHP_FUNCTION(strcspn)274 PHP_FUNCTION(strcspn)
275 {
276 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, /* is_strspn */ false);
277 }
278 /* }}} */
279
280 #ifdef HAVE_NL_LANGINFO
281 /* {{{ Query language and locale information */
PHP_FUNCTION(nl_langinfo)282 PHP_FUNCTION(nl_langinfo)
283 {
284 zend_long item;
285 char *value;
286
287 ZEND_PARSE_PARAMETERS_START(1, 1)
288 Z_PARAM_LONG(item)
289 ZEND_PARSE_PARAMETERS_END();
290
291 switch(item) { /* {{{ */
292 #ifdef ABDAY_1
293 case ABDAY_1:
294 case ABDAY_2:
295 case ABDAY_3:
296 case ABDAY_4:
297 case ABDAY_5:
298 case ABDAY_6:
299 case ABDAY_7:
300 #endif
301 #ifdef DAY_1
302 case DAY_1:
303 case DAY_2:
304 case DAY_3:
305 case DAY_4:
306 case DAY_5:
307 case DAY_6:
308 case DAY_7:
309 #endif
310 #ifdef ABMON_1
311 case ABMON_1:
312 case ABMON_2:
313 case ABMON_3:
314 case ABMON_4:
315 case ABMON_5:
316 case ABMON_6:
317 case ABMON_7:
318 case ABMON_8:
319 case ABMON_9:
320 case ABMON_10:
321 case ABMON_11:
322 case ABMON_12:
323 #endif
324 #ifdef MON_1
325 case MON_1:
326 case MON_2:
327 case MON_3:
328 case MON_4:
329 case MON_5:
330 case MON_6:
331 case MON_7:
332 case MON_8:
333 case MON_9:
334 case MON_10:
335 case MON_11:
336 case MON_12:
337 #endif
338 #ifdef AM_STR
339 case AM_STR:
340 #endif
341 #ifdef PM_STR
342 case PM_STR:
343 #endif
344 #ifdef D_T_FMT
345 case D_T_FMT:
346 #endif
347 #ifdef D_FMT
348 case D_FMT:
349 #endif
350 #ifdef T_FMT
351 case T_FMT:
352 #endif
353 #ifdef T_FMT_AMPM
354 case T_FMT_AMPM:
355 #endif
356 #ifdef ERA
357 case ERA:
358 #endif
359 #ifdef ERA_YEAR
360 case ERA_YEAR:
361 #endif
362 #ifdef ERA_D_T_FMT
363 case ERA_D_T_FMT:
364 #endif
365 #ifdef ERA_D_FMT
366 case ERA_D_FMT:
367 #endif
368 #ifdef ERA_T_FMT
369 case ERA_T_FMT:
370 #endif
371 #ifdef ALT_DIGITS
372 case ALT_DIGITS:
373 #endif
374 #ifdef INT_CURR_SYMBOL
375 case INT_CURR_SYMBOL:
376 #endif
377 #ifdef CURRENCY_SYMBOL
378 case CURRENCY_SYMBOL:
379 #endif
380 #ifdef CRNCYSTR
381 case CRNCYSTR:
382 #endif
383 #ifdef MON_DECIMAL_POINT
384 case MON_DECIMAL_POINT:
385 #endif
386 #ifdef MON_THOUSANDS_SEP
387 case MON_THOUSANDS_SEP:
388 #endif
389 #ifdef MON_GROUPING
390 case MON_GROUPING:
391 #endif
392 #ifdef POSITIVE_SIGN
393 case POSITIVE_SIGN:
394 #endif
395 #ifdef NEGATIVE_SIGN
396 case NEGATIVE_SIGN:
397 #endif
398 #ifdef INT_FRAC_DIGITS
399 case INT_FRAC_DIGITS:
400 #endif
401 #ifdef FRAC_DIGITS
402 case FRAC_DIGITS:
403 #endif
404 #ifdef P_CS_PRECEDES
405 case P_CS_PRECEDES:
406 #endif
407 #ifdef P_SEP_BY_SPACE
408 case P_SEP_BY_SPACE:
409 #endif
410 #ifdef N_CS_PRECEDES
411 case N_CS_PRECEDES:
412 #endif
413 #ifdef N_SEP_BY_SPACE
414 case N_SEP_BY_SPACE:
415 #endif
416 #ifdef P_SIGN_POSN
417 case P_SIGN_POSN:
418 #endif
419 #ifdef N_SIGN_POSN
420 case N_SIGN_POSN:
421 #endif
422 #ifdef DECIMAL_POINT
423 case DECIMAL_POINT:
424 #elif defined(RADIXCHAR)
425 case RADIXCHAR:
426 #endif
427 #ifdef THOUSANDS_SEP
428 case THOUSANDS_SEP:
429 #elif defined(THOUSEP)
430 case THOUSEP:
431 #endif
432 #ifdef GROUPING
433 case GROUPING:
434 #endif
435 #ifdef YESEXPR
436 case YESEXPR:
437 #endif
438 #ifdef NOEXPR
439 case NOEXPR:
440 #endif
441 #ifdef YESSTR
442 case YESSTR:
443 #endif
444 #ifdef NOSTR
445 case NOSTR:
446 #endif
447 #ifdef CODESET
448 case CODESET:
449 #endif
450 break;
451 default:
452 php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
453 RETURN_FALSE;
454 }
455 /* }}} */
456
457 value = nl_langinfo(item);
458 if (value == NULL) {
459 RETURN_FALSE;
460 } else {
461 RETURN_STRING(value);
462 }
463 }
464 #endif
465 /* }}} */
466
467 /* {{{ Compares two strings using the current locale */
PHP_FUNCTION(strcoll)468 PHP_FUNCTION(strcoll)
469 {
470 zend_string *s1, *s2;
471
472 ZEND_PARSE_PARAMETERS_START(2, 2)
473 Z_PARAM_STR(s1)
474 Z_PARAM_STR(s2)
475 ZEND_PARSE_PARAMETERS_END();
476
477 RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
478 (const char *) ZSTR_VAL(s2)));
479 }
480 /* }}} */
481
482 /* {{{ php_charmask
483 * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
484 * it needs to be incrementing.
485 * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
486 */
php_charmask(const unsigned char * input,size_t len,char * mask)487 static inline zend_result php_charmask(const unsigned char *input, size_t len, char *mask)
488 {
489 const unsigned char *end;
490 unsigned char c;
491 zend_result result = SUCCESS;
492
493 memset(mask, 0, 256);
494 for (end = input+len; input < end; input++) {
495 c=*input;
496 if ((input+3 < end) && input[1] == '.' && input[2] == '.'
497 && input[3] >= c) {
498 memset(mask+c, 1, input[3] - c + 1);
499 input+=3;
500 } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
501 /* Error, try to be as helpful as possible:
502 (a range ending/starting with '.' won't be captured here) */
503 if (end-len >= input) { /* there was no 'left' char */
504 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
505 result = FAILURE;
506 continue;
507 }
508 if (input+2 >= end) { /* there is no 'right' char */
509 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
510 result = FAILURE;
511 continue;
512 }
513 if (input[-1] > input[2]) { /* wrong order */
514 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
515 result = FAILURE;
516 continue;
517 }
518 /* FIXME: better error (a..b..c is the only left possibility?) */
519 php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
520 result = FAILURE;
521 continue;
522 } else {
523 mask[c]=1;
524 }
525 }
526 return result;
527 }
528 /* }}} */
529
530 /* {{{ php_trim_int()
531 * mode 1 : trim left
532 * mode 2 : trim right
533 * mode 3 : trim left and right
534 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
535 */
php_trim_int(zend_string * str,const char * what,size_t what_len,int mode)536 static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
537 {
538 const char *start = ZSTR_VAL(str);
539 const char *end = start + ZSTR_LEN(str);
540 char mask[256];
541
542 if (what) {
543 if (what_len == 1) {
544 char p = *what;
545 if (mode & 1) {
546 while (start != end) {
547 if (*start == p) {
548 start++;
549 } else {
550 break;
551 }
552 }
553 }
554 if (mode & 2) {
555 while (start != end) {
556 if (*(end-1) == p) {
557 end--;
558 } else {
559 break;
560 }
561 }
562 }
563 } else {
564 php_charmask((const unsigned char *) what, what_len, mask);
565
566 if (mode & 1) {
567 while (start != end) {
568 if (mask[(unsigned char)*start]) {
569 start++;
570 } else {
571 break;
572 }
573 }
574 }
575 if (mode & 2) {
576 while (start != end) {
577 if (mask[(unsigned char)*(end-1)]) {
578 end--;
579 } else {
580 break;
581 }
582 }
583 }
584 }
585 } else {
586 if (mode & 1) {
587 while (start != end) {
588 unsigned char c = (unsigned char)*start;
589
590 if (c <= ' ' &&
591 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
592 start++;
593 } else {
594 break;
595 }
596 }
597 }
598 if (mode & 2) {
599 while (start != end) {
600 unsigned char c = (unsigned char)*(end-1);
601
602 if (c <= ' ' &&
603 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
604 end--;
605 } else {
606 break;
607 }
608 }
609 }
610 }
611
612 if (ZSTR_LEN(str) == end - start) {
613 return zend_string_copy(str);
614 } else if (end - start == 0) {
615 return ZSTR_EMPTY_ALLOC();
616 } else {
617 return zend_string_init(start, end - start, 0);
618 }
619 }
620 /* }}} */
621
622 /* {{{ php_trim_int()
623 * mode 1 : trim left
624 * mode 2 : trim right
625 * mode 3 : trim left and right
626 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
627 */
php_trim(zend_string * str,const char * what,size_t what_len,int mode)628 PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
629 {
630 return php_trim_int(str, what, what_len, mode);
631 }
632 /* }}} */
633
634 /* {{{ php_do_trim
635 * Base for trim(), rtrim() and ltrim() functions.
636 */
php_do_trim(INTERNAL_FUNCTION_PARAMETERS,int mode)637 static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
638 {
639 zend_string *str;
640 zend_string *what = NULL;
641
642 ZEND_PARSE_PARAMETERS_START(1, 2)
643 Z_PARAM_STR(str)
644 Z_PARAM_OPTIONAL
645 Z_PARAM_STR(what)
646 ZEND_PARSE_PARAMETERS_END();
647
648 ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
649 }
650 /* }}} */
651
652 /* {{{ Strips whitespace from the beginning and end of a string */
PHP_FUNCTION(trim)653 PHP_FUNCTION(trim)
654 {
655 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
656 }
657 /* }}} */
658
659 ZEND_FRAMELESS_FUNCTION(trim, 1)
660 {
661 zval str_tmp;
662 zend_string *str;
663
664 Z_FLF_PARAM_STR(1, str, str_tmp);
665
666 ZVAL_STR(return_value, php_trim_int(str, /* what */ NULL, /* what_len */ 0, /* mode */ 3));
667
668 flf_clean:
669 Z_FLF_PARAM_FREE_STR(1, str_tmp);
670 }
671
672 ZEND_FRAMELESS_FUNCTION(trim, 2)
673 {
674 zval str_tmp, what_tmp;
675 zend_string *str, *what;
676
677 Z_FLF_PARAM_STR(1, str, str_tmp);
678 Z_FLF_PARAM_STR(2, what, what_tmp);
679
680 ZVAL_STR(return_value, php_trim_int(str, ZSTR_VAL(what), ZSTR_LEN(what), /* mode */ 3));
681
682 flf_clean:
683 Z_FLF_PARAM_FREE_STR(1, str_tmp);
684 Z_FLF_PARAM_FREE_STR(2, what_tmp);
685 }
686
687 /* {{{ Removes trailing whitespace */
PHP_FUNCTION(rtrim)688 PHP_FUNCTION(rtrim)
689 {
690 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
691 }
692 /* }}} */
693
694 /* {{{ Strips whitespace from the beginning of a string */
PHP_FUNCTION(ltrim)695 PHP_FUNCTION(ltrim)
696 {
697 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
698 }
699 /* }}} */
700
701 /* {{{ Wraps buffer to selected number of characters using string break char */
PHP_FUNCTION(wordwrap)702 PHP_FUNCTION(wordwrap)
703 {
704 zend_string *text;
705 char *breakchar = "\n";
706 size_t newtextlen, chk, breakchar_len = 1;
707 size_t alloced;
708 zend_long current = 0, laststart = 0, lastspace = 0;
709 zend_long linelength = 75;
710 bool docut = 0;
711 zend_string *newtext;
712
713 ZEND_PARSE_PARAMETERS_START(1, 4)
714 Z_PARAM_STR(text)
715 Z_PARAM_OPTIONAL
716 Z_PARAM_LONG(linelength)
717 Z_PARAM_STRING(breakchar, breakchar_len)
718 Z_PARAM_BOOL(docut)
719 ZEND_PARSE_PARAMETERS_END();
720
721 if (ZSTR_LEN(text) == 0) {
722 RETURN_EMPTY_STRING();
723 }
724
725 if (breakchar_len == 0) {
726 zend_argument_must_not_be_empty_error(3);
727 RETURN_THROWS();
728 }
729
730 if (linelength == 0 && docut) {
731 zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
732 RETURN_THROWS();
733 }
734
735 /* Special case for a single-character break as it needs no
736 additional storage space */
737 if (breakchar_len == 1 && !docut) {
738 newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
739
740 laststart = lastspace = 0;
741 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
742 if (ZSTR_VAL(text)[current] == breakchar[0]) {
743 laststart = lastspace = current + 1;
744 } else if (ZSTR_VAL(text)[current] == ' ') {
745 if (current - laststart >= linelength) {
746 ZSTR_VAL(newtext)[current] = breakchar[0];
747 laststart = current + 1;
748 }
749 lastspace = current;
750 } else if (current - laststart >= linelength && laststart != lastspace) {
751 ZSTR_VAL(newtext)[lastspace] = breakchar[0];
752 laststart = lastspace + 1;
753 }
754 }
755
756 RETURN_NEW_STR(newtext);
757 } else {
758 /* Multiple character line break or forced cut */
759 if (linelength > 0) {
760 chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
761 newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
762 alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
763 } else {
764 chk = ZSTR_LEN(text);
765 alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
766 newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
767 }
768
769 /* now keep track of the actual new text length */
770 newtextlen = 0;
771
772 laststart = lastspace = 0;
773 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
774 if (chk == 0) {
775 alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
776 newtext = zend_string_extend(newtext, alloced, 0);
777 chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
778 }
779 /* when we hit an existing break, copy to new buffer, and
780 * fix up laststart and lastspace */
781 if (ZSTR_VAL(text)[current] == breakchar[0]
782 && current + breakchar_len < ZSTR_LEN(text)
783 && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
784 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
785 newtextlen += current - laststart + breakchar_len;
786 current += breakchar_len - 1;
787 laststart = lastspace = current + 1;
788 chk--;
789 }
790 /* if it is a space, check if it is at the line boundary,
791 * copy and insert a break, or just keep track of it */
792 else if (ZSTR_VAL(text)[current] == ' ') {
793 if (current - laststart >= linelength) {
794 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
795 newtextlen += current - laststart;
796 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
797 newtextlen += breakchar_len;
798 laststart = current + 1;
799 chk--;
800 }
801 lastspace = current;
802 }
803 /* if we are cutting, and we've accumulated enough
804 * characters, and we haven't see a space for this line,
805 * copy and insert a break. */
806 else if (current - laststart >= linelength
807 && docut && laststart >= lastspace) {
808 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
809 newtextlen += current - laststart;
810 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
811 newtextlen += breakchar_len;
812 laststart = lastspace = current;
813 chk--;
814 }
815 /* if the current word puts us over the linelength, copy
816 * back up until the last space, insert a break, and move
817 * up the laststart */
818 else if (current - laststart >= linelength
819 && laststart < lastspace) {
820 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
821 newtextlen += lastspace - laststart;
822 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
823 newtextlen += breakchar_len;
824 laststart = lastspace = lastspace + 1;
825 chk--;
826 }
827 }
828
829 /* copy over any stragglers */
830 if (laststart != current) {
831 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
832 newtextlen += current - laststart;
833 }
834
835 ZSTR_VAL(newtext)[newtextlen] = '\0';
836 /* free unused memory */
837 newtext = zend_string_truncate(newtext, newtextlen, 0);
838
839 RETURN_NEW_STR(newtext);
840 }
841 }
842 /* }}} */
843
844 /* {{{ php_explode */
php_explode(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)845 PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
846 {
847 const char *p1 = ZSTR_VAL(str);
848 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
849 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
850 zval tmp;
851
852 if (p2 == NULL) {
853 ZVAL_STR_COPY(&tmp, str);
854 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
855 } else {
856 zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
857 ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
858 do {
859 ZEND_HASH_FILL_GROW();
860 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
861 ZEND_HASH_FILL_NEXT();
862 p1 = p2 + ZSTR_LEN(delim);
863 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
864 } while (p2 != NULL && --limit > 1);
865
866 if (p1 <= endp) {
867 ZEND_HASH_FILL_GROW();
868 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
869 ZEND_HASH_FILL_NEXT();
870 }
871 } ZEND_HASH_FILL_END();
872 }
873 }
874 /* }}} */
875
876 /* {{{ php_explode_negative_limit */
php_explode_negative_limit(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)877 PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
878 {
879 #define EXPLODE_ALLOC_STEP 64
880 const char *p1 = ZSTR_VAL(str);
881 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
882 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
883 zval tmp;
884
885 if (p2 == NULL) {
886 /*
887 do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
888 by doing nothing we return empty array
889 */
890 } else {
891 size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
892 zend_long i, to_return;
893 const char **positions = emalloc(allocated * sizeof(char *));
894
895 positions[found++] = p1;
896 do {
897 if (found >= allocated) {
898 allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
899 positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
900 }
901 positions[found++] = p1 = p2 + ZSTR_LEN(delim);
902 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
903 } while (p2 != NULL);
904
905 to_return = limit + found;
906 /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
907 for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
908 ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
909 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
910 }
911 efree((void *)positions);
912 }
913 #undef EXPLODE_ALLOC_STEP
914 }
915 /* }}} */
916
917 /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
PHP_FUNCTION(explode)918 PHP_FUNCTION(explode)
919 {
920 zend_string *str, *delim;
921 zend_long limit = ZEND_LONG_MAX; /* No limit */
922 zval tmp;
923
924 ZEND_PARSE_PARAMETERS_START(2, 3)
925 Z_PARAM_STR(delim)
926 Z_PARAM_STR(str)
927 Z_PARAM_OPTIONAL
928 Z_PARAM_LONG(limit)
929 ZEND_PARSE_PARAMETERS_END();
930
931 if (ZSTR_LEN(delim) == 0) {
932 zend_argument_must_not_be_empty_error(1);
933 RETURN_THROWS();
934 }
935
936 array_init(return_value);
937
938 if (ZSTR_LEN(str) == 0) {
939 if (limit >= 0) {
940 ZVAL_EMPTY_STRING(&tmp);
941 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
942 }
943 return;
944 }
945
946 if (limit > 1) {
947 php_explode(delim, str, return_value, limit);
948 } else if (limit < 0) {
949 php_explode_negative_limit(delim, str, return_value, limit);
950 } else {
951 ZVAL_STR_COPY(&tmp, str);
952 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
953 }
954 }
955 /* }}} */
956
957 /* {{{ php_implode */
php_implode(const zend_string * glue,HashTable * pieces,zval * return_value)958 PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
959 {
960 zval *tmp;
961 uint32_t numelems;
962 zend_string *str;
963 char *cptr;
964 size_t len = 0;
965 struct {
966 zend_string *str;
967 zend_long lval;
968 } *strings, *ptr;
969 ALLOCA_FLAG(use_heap)
970
971 numelems = zend_hash_num_elements(pieces);
972
973 if (numelems == 0) {
974 RETURN_EMPTY_STRING();
975 } else if (numelems == 1) {
976 /* loop to search the first not undefined element... */
977 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
978 RETURN_STR(zval_get_string(tmp));
979 } ZEND_HASH_FOREACH_END();
980 }
981
982 ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
983
984 uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(glue);
985
986 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
987 if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
988 ptr->str = Z_STR_P(tmp);
989 len += ZSTR_LEN(ptr->str);
990 ptr->lval = 0;
991 flags &= ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(ptr->str);
992 ptr++;
993 } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
994 zend_long val = Z_LVAL_P(tmp);
995
996 ptr->str = NULL;
997 ptr->lval = val;
998 ptr++;
999 if (val <= 0) {
1000 len++;
1001 }
1002 while (val) {
1003 val /= 10;
1004 len++;
1005 }
1006 } else {
1007 ptr->str = zval_get_string_func(tmp);
1008 len += ZSTR_LEN(ptr->str);
1009 ptr->lval = 1;
1010 flags &= ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(ptr->str);
1011 ptr++;
1012 }
1013 } ZEND_HASH_FOREACH_END();
1014
1015 /* numelems cannot be 0, we checked above */
1016 str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
1017 GC_ADD_FLAGS(str, flags);
1018 cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
1019 *cptr = 0;
1020
1021 while (1) {
1022 ptr--;
1023 if (EXPECTED(ptr->str)) {
1024 cptr -= ZSTR_LEN(ptr->str);
1025 memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
1026 if (ptr->lval) {
1027 zend_string_release_ex(ptr->str, 0);
1028 }
1029 } else {
1030 char *oldPtr = cptr;
1031 char oldVal = *cptr;
1032 cptr = zend_print_long_to_buf(cptr, ptr->lval);
1033 *oldPtr = oldVal;
1034 }
1035
1036 if (ptr == strings) {
1037 break;
1038 }
1039
1040 cptr -= ZSTR_LEN(glue);
1041 memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
1042 }
1043
1044 free_alloca(strings, use_heap);
1045 RETURN_NEW_STR(str);
1046 }
1047 /* }}} */
1048
1049 /* {{{ Joins array elements placing glue string between items and return one string */
PHP_FUNCTION(implode)1050 PHP_FUNCTION(implode)
1051 {
1052 zend_string *arg1_str = NULL;
1053 HashTable *arg1_array = NULL;
1054 zend_array *pieces = NULL;
1055
1056 ZEND_PARSE_PARAMETERS_START(1, 2)
1057 Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
1058 Z_PARAM_OPTIONAL
1059 Z_PARAM_ARRAY_HT_OR_NULL(pieces)
1060 ZEND_PARSE_PARAMETERS_END();
1061
1062 if (pieces == NULL) {
1063 if (arg1_array == NULL) {
1064 zend_type_error(
1065 "%s(): If argument #1 ($separator) is of type string, "
1066 "argument #2 ($array) must be of type array, null given",
1067 get_active_function_name()
1068 );
1069 RETURN_THROWS();
1070 }
1071
1072 arg1_str = ZSTR_EMPTY_ALLOC();
1073 pieces = arg1_array;
1074 } else {
1075 if (arg1_str == NULL) {
1076 zend_argument_type_error(1, "must be of type string, array given");
1077 RETURN_THROWS();
1078 }
1079 }
1080
1081 php_implode(arg1_str, pieces, return_value);
1082 }
1083 /* }}} */
1084
1085 ZEND_FRAMELESS_FUNCTION(implode, 1)
1086 {
1087 zval *pieces;
1088
1089 /* Manual parsing for more accurate error message. */
1090 if (!zend_parse_arg_array(arg1, &pieces, /* null_check */ false, /* or_object */ false)) { \
1091 zend_type_error(
1092 "%s(): If argument #1 ($separator) is of type string, "
1093 "argument #2 ($array) must be of type array, null given",
1094 get_active_function_name()
1095 );
1096 goto flf_clean; \
1097 }
1098
1099 zend_string *str = ZSTR_EMPTY_ALLOC();
1100
1101 php_implode(str, Z_ARR_P(pieces), return_value);
1102
1103 flf_clean:;
1104 }
1105
1106 ZEND_FRAMELESS_FUNCTION(implode, 2)
1107 {
1108 zval str_tmp;
1109 zend_string *str;
1110 zval *pieces;
1111
1112 Z_FLF_PARAM_STR(1, str, str_tmp);
1113 Z_FLF_PARAM_ARRAY_OR_NULL(2, pieces);
1114
1115 if (!pieces) {
1116 zend_type_error(
1117 "%s(): If argument #1 ($separator) is of type string, "
1118 "argument #2 ($array) must be of type array, null given",
1119 get_active_function_name()
1120 );
1121 goto flf_clean;
1122 }
1123
1124 php_implode(str, Z_ARR_P(pieces), return_value);
1125
1126 flf_clean:;
1127 Z_FLF_PARAM_FREE_STR(1, str_tmp);
1128 }
1129
1130 #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
1131
1132 /* {{{ Tokenize a string */
PHP_FUNCTION(strtok)1133 PHP_FUNCTION(strtok)
1134 {
1135 zend_string *str, *tok = NULL;
1136 char *token;
1137 char *token_end;
1138 char *p;
1139 char *pe;
1140 size_t skipped = 0;
1141
1142 ZEND_PARSE_PARAMETERS_START(1, 2)
1143 Z_PARAM_STR(str)
1144 Z_PARAM_OPTIONAL
1145 Z_PARAM_STR_OR_NULL(tok)
1146 ZEND_PARSE_PARAMETERS_END();
1147
1148 if (!tok) {
1149 tok = str;
1150 } else {
1151 if (BG(strtok_string)) {
1152 zend_string_release(BG(strtok_string));
1153 }
1154 BG(strtok_string) = zend_string_copy(str);
1155 BG(strtok_last) = ZSTR_VAL(str);
1156 BG(strtok_len) = ZSTR_LEN(str);
1157 }
1158
1159 if (!BG(strtok_string)) {
1160 /* String to tokenize not set. */
1161 php_error_docref(NULL, E_WARNING, "Both arguments must be provided when starting tokenization");
1162 RETURN_FALSE;
1163 }
1164
1165 p = BG(strtok_last); /* Where we start to search */
1166 pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
1167 if (p >= pe) {
1168 /* Reached the end of the string. */
1169 RETURN_FALSE;
1170 }
1171
1172 token = ZSTR_VAL(tok);
1173 token_end = token + ZSTR_LEN(tok);
1174
1175 while (token < token_end) {
1176 STRTOK_TABLE(token++) = 1;
1177 }
1178
1179 /* Skip leading delimiters */
1180 while (STRTOK_TABLE(p)) {
1181 if (++p >= pe) {
1182 /* no other chars left */
1183 goto return_false;
1184 }
1185 skipped++;
1186 }
1187
1188 /* We know at this place that *p is no delimiter, so skip it */
1189 while (++p < pe) {
1190 if (STRTOK_TABLE(p)) {
1191 goto return_token;
1192 }
1193 }
1194
1195 if (p - BG(strtok_last)) {
1196 return_token:
1197 RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
1198 BG(strtok_last) = p + 1;
1199 } else {
1200 return_false:
1201 RETVAL_FALSE;
1202 zend_string_release(BG(strtok_string));
1203 BG(strtok_string) = NULL;
1204 }
1205
1206 /* Restore table -- usually faster then memset'ing the table on every invocation */
1207 token = ZSTR_VAL(tok);
1208 while (token < token_end) {
1209 STRTOK_TABLE(token++) = 0;
1210 }
1211 }
1212 /* }}} */
1213
1214 /* {{{ Makes a string uppercase */
PHP_FUNCTION(strtoupper)1215 PHP_FUNCTION(strtoupper)
1216 {
1217 zend_string *arg;
1218
1219 ZEND_PARSE_PARAMETERS_START(1, 1)
1220 Z_PARAM_STR(arg)
1221 ZEND_PARSE_PARAMETERS_END();
1222
1223 RETURN_STR(zend_string_toupper(arg));
1224 }
1225 /* }}} */
1226
1227 /* {{{ Makes a string lowercase */
PHP_FUNCTION(strtolower)1228 PHP_FUNCTION(strtolower)
1229 {
1230 zend_string *str;
1231
1232 ZEND_PARSE_PARAMETERS_START(1, 1)
1233 Z_PARAM_STR(str)
1234 ZEND_PARSE_PARAMETERS_END();
1235
1236 RETURN_STR(zend_string_tolower(str));
1237 }
1238 /* }}} */
1239
PHP_FUNCTION(str_increment)1240 PHP_FUNCTION(str_increment)
1241 {
1242 zend_string *str;
1243
1244 ZEND_PARSE_PARAMETERS_START(1, 1)
1245 Z_PARAM_STR(str)
1246 ZEND_PARSE_PARAMETERS_END();
1247
1248 if (ZSTR_LEN(str) == 0) {
1249 zend_argument_must_not_be_empty_error(1);
1250 RETURN_THROWS();
1251 }
1252 if (!zend_string_only_has_ascii_alphanumeric(str)) {
1253 zend_argument_value_error(1, "must be composed only of alphanumeric ASCII characters");
1254 RETURN_THROWS();
1255 }
1256
1257 zend_string *incremented = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), /* persistent */ false);
1258 size_t position = ZSTR_LEN(str)-1;
1259 bool carry = false;
1260
1261 do {
1262 char c = ZSTR_VAL(incremented)[position];
1263 /* We know c is in ['a', 'z'], ['A', 'Z'], or ['0', '9'] range from zend_string_only_has_ascii_alphanumeric() */
1264 if (EXPECTED( c != 'z' && c != 'Z' && c != '9' )) {
1265 carry = false;
1266 ZSTR_VAL(incremented)[position]++;
1267 } else { /* if 'z', 'Z', or '9' */
1268 carry = true;
1269 if (c == '9') {
1270 ZSTR_VAL(incremented)[position] = '0';
1271 } else {
1272 ZSTR_VAL(incremented)[position] -= 25;
1273 }
1274 }
1275 } while (carry && position-- > 0);
1276
1277 if (UNEXPECTED(carry)) {
1278 zend_string *tmp = zend_string_alloc(ZSTR_LEN(incremented)+1, 0);
1279 memcpy(ZSTR_VAL(tmp) + 1, ZSTR_VAL(incremented), ZSTR_LEN(incremented));
1280 ZSTR_VAL(tmp)[ZSTR_LEN(incremented)+1] = '\0';
1281 switch (ZSTR_VAL(incremented)[0]) {
1282 case '0':
1283 ZSTR_VAL(tmp)[0] = '1';
1284 break;
1285 default:
1286 ZSTR_VAL(tmp)[0] = ZSTR_VAL(incremented)[0];
1287 break;
1288 }
1289 zend_string_release_ex(incremented, /* persistent */ false);
1290 RETURN_STR(tmp);
1291 }
1292 RETURN_STR(incremented);
1293 }
1294
1295
PHP_FUNCTION(str_decrement)1296 PHP_FUNCTION(str_decrement)
1297 {
1298 zend_string *str;
1299
1300 ZEND_PARSE_PARAMETERS_START(1, 1)
1301 Z_PARAM_STR(str)
1302 ZEND_PARSE_PARAMETERS_END();
1303
1304 if (ZSTR_LEN(str) == 0) {
1305 zend_argument_must_not_be_empty_error(1);
1306 RETURN_THROWS();
1307 }
1308 if (!zend_string_only_has_ascii_alphanumeric(str)) {
1309 zend_argument_value_error(1, "must be composed only of alphanumeric ASCII characters");
1310 RETURN_THROWS();
1311 }
1312 if (ZSTR_LEN(str) >= 1 && ZSTR_VAL(str)[0] == '0') {
1313 zend_argument_value_error(1, "\"%s\" is out of decrement range", ZSTR_VAL(str));
1314 RETURN_THROWS();
1315 }
1316
1317 zend_string *decremented = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), /* persistent */ false);
1318 size_t position = ZSTR_LEN(str)-1;
1319 bool carry = false;
1320
1321 do {
1322 char c = ZSTR_VAL(decremented)[position];
1323 /* We know c is in ['a', 'z'], ['A', 'Z'], or ['0', '9'] range from zend_string_only_has_ascii_alphanumeric() */
1324 if (EXPECTED( c != 'a' && c != 'A' && c != '0' )) {
1325 carry = false;
1326 ZSTR_VAL(decremented)[position]--;
1327 } else { /* if 'a', 'A', or '0' */
1328 carry = true;
1329 if (c == '0') {
1330 ZSTR_VAL(decremented)[position] = '9';
1331 } else {
1332 ZSTR_VAL(decremented)[position] += 25;
1333 }
1334 }
1335 } while (carry && position-- > 0);
1336
1337 if (UNEXPECTED(carry || (ZSTR_VAL(decremented)[0] == '0' && ZSTR_LEN(decremented) > 1))) {
1338 if (ZSTR_LEN(decremented) == 1) {
1339 zend_string_release_ex(decremented, /* persistent */ false);
1340 zend_argument_value_error(1, "\"%s\" is out of decrement range", ZSTR_VAL(str));
1341 RETURN_THROWS();
1342 }
1343 zend_string *tmp = zend_string_alloc(ZSTR_LEN(decremented) - 1, 0);
1344 memcpy(ZSTR_VAL(tmp), ZSTR_VAL(decremented) + 1, ZSTR_LEN(decremented) - 1);
1345 ZSTR_VAL(tmp)[ZSTR_LEN(decremented) - 1] = '\0';
1346 zend_string_release_ex(decremented, /* persistent */ false);
1347 RETURN_STR(tmp);
1348 }
1349 RETURN_STR(decremented);
1350 }
1351
1352 #if defined(PHP_WIN32)
_is_basename_start(const char * start,const char * pos)1353 static bool _is_basename_start(const char *start, const char *pos)
1354 {
1355 if (pos - start >= 1
1356 && *(pos-1) != '/'
1357 && *(pos-1) != '\\') {
1358 if (pos - start == 1) {
1359 return 1;
1360 } else if (*(pos-2) == '/' || *(pos-2) == '\\') {
1361 return 1;
1362 } else if (*(pos-2) == ':'
1363 && _is_basename_start(start, pos - 2)) {
1364 return 1;
1365 }
1366 }
1367 return 0;
1368 }
1369 #endif
1370
1371 /* {{{ php_basename */
php_basename(const char * s,size_t len,const char * suffix,size_t suffix_len)1372 PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
1373 {
1374 const char *basename_start;
1375 const char *basename_end;
1376
1377 if (CG(ascii_compatible_locale)) {
1378 basename_end = s + len - 1;
1379
1380 /* Strip trailing slashes */
1381 while (basename_end >= s
1382 #ifdef PHP_WIN32
1383 && (*basename_end == '/'
1384 || *basename_end == '\\'
1385 || (*basename_end == ':'
1386 && _is_basename_start(s, basename_end)))) {
1387 #else
1388 && *basename_end == '/') {
1389 #endif
1390 basename_end--;
1391 }
1392 if (basename_end < s) {
1393 return ZSTR_EMPTY_ALLOC();
1394 }
1395
1396 /* Extract filename */
1397 basename_start = basename_end;
1398 basename_end++;
1399 while (basename_start > s
1400 #ifdef PHP_WIN32
1401 && *(basename_start-1) != '/'
1402 && *(basename_start-1) != '\\') {
1403
1404 if (*(basename_start-1) == ':' &&
1405 _is_basename_start(s, basename_start - 1)) {
1406 break;
1407 }
1408 #else
1409 && *(basename_start-1) != '/') {
1410 #endif
1411 basename_start--;
1412 }
1413 } else {
1414 /* State 0 is directly after a directory separator (or at the start of the string).
1415 * State 1 is everything else. */
1416 int state = 0;
1417
1418 basename_start = s;
1419 basename_end = s;
1420 while (len > 0) {
1421 int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
1422
1423 switch (inc_len) {
1424 case 0:
1425 goto quit_loop;
1426 case 1:
1427 #ifdef PHP_WIN32
1428 if (*s == '/' || *s == '\\') {
1429 #else
1430 if (*s == '/') {
1431 #endif
1432 if (state == 1) {
1433 state = 0;
1434 basename_end = s;
1435 }
1436 #ifdef PHP_WIN32
1437 /* Catch relative paths in c:file.txt style. They're not to confuse
1438 with the NTFS streams. This part ensures also, that no drive
1439 letter traversing happens. */
1440 } else if ((*s == ':' && (s - basename_start == 1))) {
1441 if (state == 0) {
1442 basename_start = s;
1443 state = 1;
1444 } else {
1445 basename_end = s;
1446 state = 0;
1447 }
1448 #endif
1449 } else {
1450 if (state == 0) {
1451 basename_start = s;
1452 state = 1;
1453 }
1454 }
1455 break;
1456 default:
1457 if (inc_len < 0) {
1458 /* If character is invalid, treat it like other non-significant characters. */
1459 inc_len = 1;
1460 php_mb_reset();
1461 }
1462 if (state == 0) {
1463 basename_start = s;
1464 state = 1;
1465 }
1466 break;
1467 }
1468 s += inc_len;
1469 len -= inc_len;
1470 }
1471
1472 quit_loop:
1473 if (state == 1) {
1474 basename_end = s;
1475 }
1476 }
1477
1478 if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
1479 memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
1480 basename_end -= suffix_len;
1481 }
1482
1483 return zend_string_init(basename_start, basename_end - basename_start, 0);
1484 }
1485 /* }}} */
1486
1487 /* {{{ Returns the filename component of the path */
1488 PHP_FUNCTION(basename)
1489 {
1490 char *string, *suffix = NULL;
1491 size_t string_len, suffix_len = 0;
1492
1493 ZEND_PARSE_PARAMETERS_START(1, 2)
1494 Z_PARAM_STRING(string, string_len)
1495 Z_PARAM_OPTIONAL
1496 Z_PARAM_STRING(suffix, suffix_len)
1497 ZEND_PARSE_PARAMETERS_END();
1498
1499 RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
1500 }
1501 /* }}} */
1502
1503 /* {{{ php_dirname
1504 Returns directory name component of path */
1505 PHPAPI size_t php_dirname(char *path, size_t len)
1506 {
1507 return zend_dirname(path, len);
1508 }
1509 /* }}} */
1510
1511 static inline void _zend_dirname(zval *return_value, zend_string *str, zend_long levels)
1512 {
1513 zend_string *ret;
1514
1515 ret = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
1516
1517 if (levels == 1) {
1518 /* Default case */
1519 #ifdef PHP_WIN32
1520 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), ZSTR_LEN(str));
1521 #else
1522 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), ZSTR_LEN(str));
1523 #endif
1524 } else if (levels < 1) {
1525 zend_argument_value_error(2, "must be greater than or equal to 1");
1526 zend_string_efree(ret);
1527 RETURN_THROWS();
1528 } else {
1529 /* Some levels up */
1530 size_t str_len;
1531 do {
1532 #ifdef PHP_WIN32
1533 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1534 #else
1535 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1536 #endif
1537 } while (ZSTR_LEN(ret) < str_len && --levels);
1538 }
1539
1540 RETURN_NEW_STR(ret);
1541 }
1542
1543 /* {{{ Returns the directory name component of the path */
1544 PHP_FUNCTION(dirname)
1545 {
1546 zend_string *str;
1547 zend_long levels = 1;
1548
1549 ZEND_PARSE_PARAMETERS_START(1, 2)
1550 Z_PARAM_STR(str)
1551 Z_PARAM_OPTIONAL
1552 Z_PARAM_LONG(levels)
1553 ZEND_PARSE_PARAMETERS_END();
1554
1555 _zend_dirname(return_value, str, levels);
1556 }
1557 /* }}} */
1558
1559 ZEND_FRAMELESS_FUNCTION(dirname, 1)
1560 {
1561 zval str_tmp;
1562 zend_string *str;
1563
1564 Z_FLF_PARAM_STR(1, str, str_tmp);
1565
1566 _zend_dirname(return_value, str, 1);
1567
1568 flf_clean:
1569 Z_FLF_PARAM_FREE_STR(1, str_tmp);
1570 }
1571
1572 ZEND_FRAMELESS_FUNCTION(dirname, 2)
1573 {
1574 zval str_tmp;
1575 zend_string *str;
1576 zend_long levels;
1577
1578 Z_FLF_PARAM_STR(1, str, str_tmp);
1579 Z_FLF_PARAM_LONG(2, levels);
1580
1581 _zend_dirname(return_value, str, levels);
1582
1583 flf_clean:
1584 Z_FLF_PARAM_FREE_STR(1, str_tmp);
1585 }
1586
1587 /* {{{ Returns information about a certain string */
1588 PHP_FUNCTION(pathinfo)
1589 {
1590 zval tmp;
1591 char *path, *dirname;
1592 size_t path_len;
1593 bool have_basename;
1594 zend_long opt = PHP_PATHINFO_ALL;
1595 zend_string *ret = NULL;
1596
1597 ZEND_PARSE_PARAMETERS_START(1, 2)
1598 Z_PARAM_STRING(path, path_len)
1599 Z_PARAM_OPTIONAL
1600 Z_PARAM_LONG(opt)
1601 ZEND_PARSE_PARAMETERS_END();
1602
1603 have_basename = (opt & PHP_PATHINFO_BASENAME);
1604
1605 array_init(&tmp);
1606
1607 if (opt & PHP_PATHINFO_DIRNAME) {
1608 dirname = estrndup(path, path_len);
1609 php_dirname(dirname, path_len);
1610 if (*dirname) {
1611 add_assoc_string(&tmp, "dirname", dirname);
1612 }
1613 efree(dirname);
1614 }
1615
1616 if (have_basename) {
1617 ret = php_basename(path, path_len, NULL, 0);
1618 add_assoc_str(&tmp, "basename", zend_string_copy(ret));
1619 }
1620
1621 if (opt & PHP_PATHINFO_EXTENSION) {
1622 const char *p;
1623 ptrdiff_t idx;
1624
1625 if (!have_basename) {
1626 ret = php_basename(path, path_len, NULL, 0);
1627 }
1628
1629 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1630
1631 if (p) {
1632 idx = p - ZSTR_VAL(ret);
1633 add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
1634 }
1635 }
1636
1637 if (opt & PHP_PATHINFO_FILENAME) {
1638 const char *p;
1639 ptrdiff_t idx;
1640
1641 /* Have we already looked up the basename? */
1642 if (!have_basename && !ret) {
1643 ret = php_basename(path, path_len, NULL, 0);
1644 }
1645
1646 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1647
1648 idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
1649 add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
1650 }
1651
1652 if (ret) {
1653 zend_string_release_ex(ret, 0);
1654 }
1655
1656 if (opt == PHP_PATHINFO_ALL) {
1657 RETURN_COPY_VALUE(&tmp);
1658 } else {
1659 zval *element;
1660 if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
1661 RETVAL_COPY_DEREF(element);
1662 } else {
1663 RETVAL_EMPTY_STRING();
1664 }
1665 zval_ptr_dtor(&tmp);
1666 }
1667 }
1668 /* }}} */
1669
1670 /* {{{ php_stristr
1671 case insensitive strstr */
1672 PHPAPI char *php_stristr(const char *s, const char *t, size_t s_len, size_t t_len)
1673 {
1674 return (char*)php_memnistr(s, t, t_len, s + s_len);
1675 }
1676 /* }}} */
1677
1678 static size_t php_strspn_strcspn_common(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end, bool must_match)
1679 {
1680 /* Fast path for short strings.
1681 * The table lookup cannot be faster in this case because we not only have to compare, but also build the table.
1682 * We only compare in this case.
1683 * Empirically tested that the table lookup approach is only beneficial if characters is longer than 1 character. */
1684 if (characters_end - characters == 1) {
1685 const char *ptr = haystack;
1686 while (ptr < haystack_end && (*ptr == *characters) == must_match) {
1687 ptr++;
1688 }
1689 return ptr - haystack;
1690 }
1691
1692 /* Every character in characters will set a boolean in this lookup table.
1693 * We'll use the lookup table as a fast lookup for the characters in characters while looping over haystack. */
1694 bool table[256];
1695 /* Use multiple small memsets to inline the memset with intrinsics, trick learned from glibc. */
1696 memset(table, 0, 64);
1697 memset(table + 64, 0, 64);
1698 memset(table + 128, 0, 64);
1699 memset(table + 192, 0, 64);
1700
1701 while (characters < characters_end) {
1702 table[(unsigned char) *characters] = true;
1703 characters++;
1704 }
1705
1706 const char *ptr = haystack;
1707 while (ptr < haystack_end && table[(unsigned char) *ptr] == must_match) {
1708 ptr++;
1709 }
1710
1711 return ptr - haystack;
1712 }
1713
1714 /* {{{ php_strspn */
1715 PHPAPI size_t php_strspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end)
1716 {
1717 return php_strspn_strcspn_common(haystack, characters, haystack_end, characters_end, true);
1718 }
1719 /* }}} */
1720
1721 /* {{{ php_strcspn */
1722 PHPAPI size_t php_strcspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end)
1723 {
1724 return php_strspn_strcspn_common(haystack, characters, haystack_end, characters_end, false);
1725 }
1726 /* }}} */
1727
1728 /* {{{ Finds first occurrence of a string within another, case insensitive */
1729 PHP_FUNCTION(stristr)
1730 {
1731 zend_string *haystack, *needle;
1732 const char *found = NULL;
1733 size_t found_offset;
1734 bool part = 0;
1735
1736 ZEND_PARSE_PARAMETERS_START(2, 3)
1737 Z_PARAM_STR(haystack)
1738 Z_PARAM_STR(needle)
1739 Z_PARAM_OPTIONAL
1740 Z_PARAM_BOOL(part)
1741 ZEND_PARSE_PARAMETERS_END();
1742
1743 found = php_stristr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle));
1744
1745 if (UNEXPECTED(!found)) {
1746 RETURN_FALSE;
1747 }
1748 found_offset = found - ZSTR_VAL(haystack);
1749 if (part) {
1750 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1751 }
1752 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1753 }
1754 /* }}} */
1755
1756 static inline void _zend_strstr(zval *return_value, zend_string *haystack, zend_string *needle, bool part)
1757 {
1758 const char *found = NULL;
1759 zend_long found_offset;
1760
1761 found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1762
1763 if (UNEXPECTED(!found)) {
1764 RETURN_FALSE;
1765 }
1766 found_offset = found - ZSTR_VAL(haystack);
1767 if (part) {
1768 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1769 }
1770 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1771 }
1772
1773 /* {{{ Finds first occurrence of a string within another */
1774 PHP_FUNCTION(strstr)
1775 {
1776 zend_string *haystack, *needle;
1777 bool part = 0;
1778
1779 ZEND_PARSE_PARAMETERS_START(2, 3)
1780 Z_PARAM_STR(haystack)
1781 Z_PARAM_STR(needle)
1782 Z_PARAM_OPTIONAL
1783 Z_PARAM_BOOL(part)
1784 ZEND_PARSE_PARAMETERS_END();
1785
1786 _zend_strstr(return_value, haystack, needle, part);
1787 }
1788 /* }}} */
1789
1790 ZEND_FRAMELESS_FUNCTION(strstr, 2)
1791 {
1792 zval haystack_tmp, needle_tmp;
1793 zend_string *haystack, *needle;
1794
1795 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1796 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1797
1798 _zend_strstr(return_value, haystack, needle, /* part */ false);
1799
1800 flf_clean:
1801 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1802 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1803 }
1804
1805 ZEND_FRAMELESS_FUNCTION(strstr, 3)
1806 {
1807 zval haystack_tmp, needle_tmp;
1808 zend_string *haystack, *needle;
1809 bool part;
1810
1811 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1812 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1813 Z_FLF_PARAM_BOOL(3, part);
1814
1815 _zend_strstr(return_value, haystack, needle, part);
1816
1817 flf_clean:
1818 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1819 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1820 }
1821
1822 /* {{{ Checks if a string contains another */
1823 PHP_FUNCTION(str_contains)
1824 {
1825 zend_string *haystack, *needle;
1826
1827 ZEND_PARSE_PARAMETERS_START(2, 2)
1828 Z_PARAM_STR(haystack)
1829 Z_PARAM_STR(needle)
1830 ZEND_PARSE_PARAMETERS_END();
1831
1832 RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1833 }
1834 /* }}} */
1835
1836 ZEND_FRAMELESS_FUNCTION(str_contains, 2)
1837 {
1838 zval haystack_tmp, needle_tmp;
1839 zend_string *haystack, *needle;
1840
1841 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1842 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1843
1844 RETVAL_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1845
1846 flf_clean:
1847 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1848 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1849 }
1850
1851 /* {{{ Checks if haystack starts with needle */
1852 PHP_FUNCTION(str_starts_with)
1853 {
1854 zend_string *haystack, *needle;
1855
1856 ZEND_PARSE_PARAMETERS_START(2, 2)
1857 Z_PARAM_STR(haystack)
1858 Z_PARAM_STR(needle)
1859 ZEND_PARSE_PARAMETERS_END();
1860
1861 RETURN_BOOL(zend_string_starts_with(haystack, needle));
1862 }
1863 /* }}} */
1864
1865 ZEND_FRAMELESS_FUNCTION(str_starts_with, 2)
1866 {
1867 zval haystack_tmp, needle_tmp;
1868 zend_string *haystack, *needle;
1869
1870 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1871 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1872
1873 RETVAL_BOOL(zend_string_starts_with(haystack, needle));
1874
1875 flf_clean:
1876 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1877 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1878 }
1879
1880 /* {{{ Checks if haystack ends with needle */
1881 PHP_FUNCTION(str_ends_with)
1882 {
1883 zend_string *haystack, *needle;
1884
1885 ZEND_PARSE_PARAMETERS_START(2, 2)
1886 Z_PARAM_STR(haystack)
1887 Z_PARAM_STR(needle)
1888 ZEND_PARSE_PARAMETERS_END();
1889
1890 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1891 RETURN_FALSE;
1892 }
1893
1894 RETURN_BOOL(memcmp(
1895 ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
1896 ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1897 }
1898 /* }}} */
1899
1900 static inline void _zend_strpos(zval *return_value, zend_string *haystack, zend_string *needle, zend_long offset)
1901 {
1902 const char *found = NULL;
1903
1904 if (offset < 0) {
1905 offset += (zend_long)ZSTR_LEN(haystack);
1906 }
1907 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1908 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1909 RETURN_THROWS();
1910 }
1911
1912 found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
1913 ZSTR_VAL(needle), ZSTR_LEN(needle),
1914 ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1915
1916 if (UNEXPECTED(!found)) {
1917 RETURN_FALSE;
1918 }
1919 RETURN_LONG(found - ZSTR_VAL(haystack));
1920 }
1921
1922 /* {{{ Finds position of first occurrence of a string within another */
1923 PHP_FUNCTION(strpos)
1924 {
1925 zend_string *haystack, *needle;
1926 zend_long offset = 0;
1927
1928 ZEND_PARSE_PARAMETERS_START(2, 3)
1929 Z_PARAM_STR(haystack)
1930 Z_PARAM_STR(needle)
1931 Z_PARAM_OPTIONAL
1932 Z_PARAM_LONG(offset)
1933 ZEND_PARSE_PARAMETERS_END();
1934
1935 _zend_strpos(return_value, haystack, needle, offset);
1936 }
1937 /* }}} */
1938
1939 ZEND_FRAMELESS_FUNCTION(strpos, 2)
1940 {
1941 zval haystack_tmp, needle_tmp;
1942 zend_string *haystack, *needle;
1943
1944 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1945 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1946
1947 _zend_strpos(return_value, haystack, needle, 0);
1948
1949 flf_clean:
1950 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1951 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1952 }
1953
1954 ZEND_FRAMELESS_FUNCTION(strpos, 3)
1955 {
1956 zval haystack_tmp, needle_tmp;
1957 zend_string *haystack, *needle;
1958 zend_long offset;
1959
1960 Z_FLF_PARAM_STR(1, haystack, haystack_tmp);
1961 Z_FLF_PARAM_STR(2, needle, needle_tmp);
1962 Z_FLF_PARAM_LONG(3, offset);
1963
1964 _zend_strpos(return_value, haystack, needle, offset);
1965
1966 flf_clean:
1967 Z_FLF_PARAM_FREE_STR(1, haystack_tmp);
1968 Z_FLF_PARAM_FREE_STR(2, needle_tmp);
1969 }
1970
1971 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
1972 PHP_FUNCTION(stripos)
1973 {
1974 const char *found = NULL;
1975 zend_string *haystack, *needle;
1976 zend_long offset = 0;
1977
1978 ZEND_PARSE_PARAMETERS_START(2, 3)
1979 Z_PARAM_STR(haystack)
1980 Z_PARAM_STR(needle)
1981 Z_PARAM_OPTIONAL
1982 Z_PARAM_LONG(offset)
1983 ZEND_PARSE_PARAMETERS_END();
1984
1985 if (offset < 0) {
1986 offset += (zend_long)ZSTR_LEN(haystack);
1987 }
1988 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1989 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1990 RETURN_THROWS();
1991 }
1992
1993 found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset,
1994 ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1995
1996 if (UNEXPECTED(!found)) {
1997 RETURN_FALSE;
1998 }
1999 RETURN_LONG(found - ZSTR_VAL(haystack));
2000 }
2001 /* }}} */
2002
2003 /* {{{ Finds position of last occurrence of a string within another string */
2004 PHP_FUNCTION(strrpos)
2005 {
2006 zend_string *needle;
2007 zend_string *haystack;
2008 zend_long offset = 0;
2009 const char *p, *e, *found;
2010
2011 ZEND_PARSE_PARAMETERS_START(2, 3)
2012 Z_PARAM_STR(haystack)
2013 Z_PARAM_STR(needle)
2014 Z_PARAM_OPTIONAL
2015 Z_PARAM_LONG(offset)
2016 ZEND_PARSE_PARAMETERS_END();
2017
2018 if (offset >= 0) {
2019 if ((size_t)offset > ZSTR_LEN(haystack)) {
2020 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2021 RETURN_THROWS();
2022 }
2023 p = ZSTR_VAL(haystack) + (size_t)offset;
2024 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2025 } else {
2026 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2027 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2028 RETURN_THROWS();
2029 }
2030
2031 p = ZSTR_VAL(haystack);
2032 if ((size_t)-offset < ZSTR_LEN(needle)) {
2033 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2034 } else {
2035 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
2036 }
2037 }
2038
2039 found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e);
2040
2041 if (UNEXPECTED(!found)) {
2042 RETURN_FALSE;
2043 }
2044 RETURN_LONG(found - ZSTR_VAL(haystack));
2045 }
2046 /* }}} */
2047
2048 /* {{{ Finds position of last occurrence of a string within another string */
2049 PHP_FUNCTION(strripos)
2050 {
2051 zend_string *needle;
2052 zend_string *haystack;
2053 zend_long offset = 0;
2054 const char *p, *e, *found;
2055 zend_string *needle_dup, *haystack_dup;
2056
2057 ZEND_PARSE_PARAMETERS_START(2, 3)
2058 Z_PARAM_STR(haystack)
2059 Z_PARAM_STR(needle)
2060 Z_PARAM_OPTIONAL
2061 Z_PARAM_LONG(offset)
2062 ZEND_PARSE_PARAMETERS_END();
2063
2064 if (ZSTR_LEN(needle) == 1) {
2065 /* Single character search can shortcut memcmps
2066 Can also avoid tolower emallocs */
2067 char lowered;
2068 if (offset >= 0) {
2069 if ((size_t)offset > ZSTR_LEN(haystack)) {
2070 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2071 RETURN_THROWS();
2072 }
2073 p = ZSTR_VAL(haystack) + (size_t)offset;
2074 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
2075 } else {
2076 p = ZSTR_VAL(haystack);
2077 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2078 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2079 RETURN_THROWS();
2080 }
2081 e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
2082 }
2083 lowered = zend_tolower_ascii(*ZSTR_VAL(needle));
2084 while (e >= p) {
2085 if (zend_tolower_ascii(*e) == lowered) {
2086 RETURN_LONG(e - p + (offset > 0 ? offset : 0));
2087 }
2088 e--;
2089 }
2090 RETURN_FALSE;
2091 }
2092
2093 haystack_dup = zend_string_tolower(haystack);
2094 if (offset >= 0) {
2095 if ((size_t)offset > ZSTR_LEN(haystack)) {
2096 zend_string_release_ex(haystack_dup, 0);
2097 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2098 RETURN_THROWS();
2099 }
2100 p = ZSTR_VAL(haystack_dup) + offset;
2101 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2102 } else {
2103 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2104 zend_string_release_ex(haystack_dup, 0);
2105 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2106 RETURN_THROWS();
2107 }
2108
2109 p = ZSTR_VAL(haystack_dup);
2110 if ((size_t)-offset < ZSTR_LEN(needle)) {
2111 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2112 } else {
2113 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
2114 }
2115 }
2116
2117 needle_dup = zend_string_tolower(needle);
2118 if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
2119 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
2120 zend_string_release_ex(needle_dup, 0);
2121 zend_string_release_ex(haystack_dup, 0);
2122 } else {
2123 zend_string_release_ex(needle_dup, 0);
2124 zend_string_release_ex(haystack_dup, 0);
2125 RETURN_FALSE;
2126 }
2127 }
2128 /* }}} */
2129
2130 /* {{{ Finds the last occurrence of a character in a string within another */
2131 PHP_FUNCTION(strrchr)
2132 {
2133 zend_string *haystack, *needle;
2134 const char *found = NULL;
2135 zend_long found_offset;
2136 bool part = 0;
2137
2138 ZEND_PARSE_PARAMETERS_START(2, 3)
2139 Z_PARAM_STR(haystack)
2140 Z_PARAM_STR(needle)
2141 Z_PARAM_OPTIONAL
2142 Z_PARAM_BOOL(part)
2143 ZEND_PARSE_PARAMETERS_END();
2144
2145 found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
2146 if (UNEXPECTED(!found)) {
2147 RETURN_FALSE;
2148 }
2149 found_offset = found - ZSTR_VAL(haystack);
2150 if (part) {
2151 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
2152 }
2153 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
2154 }
2155 /* }}} */
2156
2157 /* {{{ php_chunk_split */
2158 static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
2159 {
2160 char *q;
2161 const char *p;
2162 size_t chunks;
2163 size_t restlen;
2164 zend_string *dest;
2165
2166 chunks = srclen / chunklen;
2167 restlen = srclen - chunks * chunklen; /* srclen % chunklen */
2168 if (restlen) {
2169 /* We want chunks to be rounded up rather than rounded down.
2170 * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
2171 chunks++;
2172 }
2173
2174 dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
2175
2176 for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
2177 q = zend_mempcpy(q, p, chunklen);
2178 q = zend_mempcpy(q, end, endlen);
2179 p += chunklen;
2180 }
2181
2182 if (restlen) {
2183 q = zend_mempcpy(q, p, restlen);
2184 q = zend_mempcpy(q, end, endlen);
2185 }
2186
2187 *q = '\0';
2188 ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
2189
2190 return dest;
2191 }
2192 /* }}} */
2193
2194 /* {{{ Returns split line */
2195 PHP_FUNCTION(chunk_split)
2196 {
2197 zend_string *str;
2198 char *end = "\r\n";
2199 size_t endlen = 2;
2200 zend_long chunklen = 76;
2201 zend_string *result;
2202
2203 ZEND_PARSE_PARAMETERS_START(1, 3)
2204 Z_PARAM_STR(str)
2205 Z_PARAM_OPTIONAL
2206 Z_PARAM_LONG(chunklen)
2207 Z_PARAM_STRING(end, endlen)
2208 ZEND_PARSE_PARAMETERS_END();
2209
2210 if (chunklen <= 0) {
2211 zend_argument_value_error(2, "must be greater than 0");
2212 RETURN_THROWS();
2213 }
2214
2215 if ((size_t)chunklen > ZSTR_LEN(str)) {
2216 /* to maintain BC, we must return original string + ending */
2217 result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
2218 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
2219 memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
2220 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2221 RETURN_NEW_STR(result);
2222 }
2223
2224 if (!ZSTR_LEN(str)) {
2225 RETURN_EMPTY_STRING();
2226 }
2227
2228 result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
2229
2230 RETURN_STR(result);
2231 }
2232 /* }}} */
2233
2234 static inline void _zend_substr(zval *return_value, zend_string *str, zend_long f, bool len_is_null, zend_long l)
2235 {
2236 if (f < 0) {
2237 /* if "from" position is negative, count start position from the end
2238 * of the string
2239 */
2240 if (-(size_t)f > ZSTR_LEN(str)) {
2241 f = 0;
2242 } else {
2243 f = (zend_long)ZSTR_LEN(str) + f;
2244 }
2245 } else if ((size_t)f > ZSTR_LEN(str)) {
2246 RETURN_EMPTY_STRING();
2247 }
2248
2249 if (!len_is_null) {
2250 if (l < 0) {
2251 /* if "length" position is negative, set it to the length
2252 * needed to stop that many chars from the end of the string
2253 */
2254 if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
2255 l = 0;
2256 } else {
2257 l = (zend_long)ZSTR_LEN(str) - f + l;
2258 }
2259 } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
2260 l = (zend_long)ZSTR_LEN(str) - f;
2261 }
2262 } else {
2263 l = (zend_long)ZSTR_LEN(str) - f;
2264 }
2265
2266 if (l == ZSTR_LEN(str)) {
2267 RETURN_STR_COPY(str);
2268 } else {
2269 RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
2270 }
2271 }
2272
2273 /* {{{ Returns part of a string */
2274 PHP_FUNCTION(substr)
2275 {
2276 zend_string *str;
2277 zend_long l = 0, f;
2278 bool len_is_null = 1;
2279
2280 ZEND_PARSE_PARAMETERS_START(2, 3)
2281 Z_PARAM_STR(str)
2282 Z_PARAM_LONG(f)
2283 Z_PARAM_OPTIONAL
2284 Z_PARAM_LONG_OR_NULL(l, len_is_null)
2285 ZEND_PARSE_PARAMETERS_END();
2286
2287 _zend_substr(return_value, str, f, len_is_null, l);
2288 }
2289 /* }}} */
2290
2291 ZEND_FRAMELESS_FUNCTION(substr, 2)
2292 {
2293 zval str_tmp;
2294 zend_string *str;
2295 zend_long f;
2296
2297 Z_FLF_PARAM_STR(1, str, str_tmp);
2298 Z_FLF_PARAM_LONG(2, f);
2299
2300 _zend_substr(return_value, str, f, /* len_is_null */ true, 0);
2301
2302 flf_clean:
2303 Z_FLF_PARAM_FREE_STR(1, str_tmp);
2304 }
2305
2306 ZEND_FRAMELESS_FUNCTION(substr, 3)
2307 {
2308 zval str_tmp;
2309 zend_string *str;
2310 zend_long f, l;
2311 bool len_is_null;
2312
2313 Z_FLF_PARAM_STR(1, str, str_tmp);
2314 Z_FLF_PARAM_LONG(2, f);
2315 Z_FLF_PARAM_LONG_OR_NULL(3, len_is_null, l);
2316
2317 _zend_substr(return_value, str, f, len_is_null, l);
2318
2319 flf_clean:
2320 Z_FLF_PARAM_FREE_STR(1, str_tmp);
2321 }
2322
2323 /* {{{ Replaces part of a string with another string */
2324 PHP_FUNCTION(substr_replace)
2325 {
2326 zend_string *str, *repl_str;
2327 HashTable *str_ht, *repl_ht;
2328 HashTable *from_ht;
2329 zend_long from_long;
2330 HashTable *len_ht = NULL;
2331 zend_long len_long;
2332 bool len_is_null = 1;
2333 zend_long l = 0;
2334 zend_long f;
2335 zend_string *result;
2336 HashPosition from_idx, repl_idx, len_idx;
2337 zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
2338
2339 ZEND_PARSE_PARAMETERS_START(3, 4)
2340 Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
2341 Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
2342 Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
2343 Z_PARAM_OPTIONAL
2344 Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
2345 ZEND_PARSE_PARAMETERS_END();
2346
2347 if (len_is_null) {
2348 if (str) {
2349 l = ZSTR_LEN(str);
2350 }
2351 } else if (!len_ht) {
2352 l = len_long;
2353 }
2354
2355 if (str) {
2356 if (from_ht) {
2357 zend_argument_type_error(3, "cannot be an array when working on a single string");
2358 RETURN_THROWS();
2359 }
2360 if (len_ht) {
2361 zend_argument_type_error(4, "cannot be an array when working on a single string");
2362 RETURN_THROWS();
2363 }
2364
2365 f = from_long;
2366
2367 /* if "from" position is negative, count start position from the end
2368 * of the string
2369 */
2370 if (f < 0) {
2371 f = (zend_long)ZSTR_LEN(str) + f;
2372 if (f < 0) {
2373 f = 0;
2374 }
2375 } else if ((size_t)f > ZSTR_LEN(str)) {
2376 f = ZSTR_LEN(str);
2377 }
2378 /* if "length" position is negative, set it to the length
2379 * needed to stop that many chars from the end of the string
2380 */
2381 if (l < 0) {
2382 l = ((zend_long)ZSTR_LEN(str) - f) + l;
2383 if (l < 0) {
2384 l = 0;
2385 }
2386 }
2387
2388 if ((size_t)l > ZSTR_LEN(str)) {
2389 l = ZSTR_LEN(str);
2390 }
2391
2392 if ((f + l) > (zend_long)ZSTR_LEN(str)) {
2393 l = ZSTR_LEN(str) - f;
2394 }
2395
2396 zend_string *tmp_repl_str = NULL;
2397 if (repl_ht) {
2398 repl_idx = 0;
2399 if (HT_IS_PACKED(repl_ht)) {
2400 while (repl_idx < repl_ht->nNumUsed) {
2401 tmp_repl = &repl_ht->arPacked[repl_idx];
2402 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2403 break;
2404 }
2405 repl_idx++;
2406 }
2407 } else {
2408 while (repl_idx < repl_ht->nNumUsed) {
2409 tmp_repl = &repl_ht->arData[repl_idx].val;
2410 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2411 break;
2412 }
2413 repl_idx++;
2414 }
2415 }
2416 if (repl_idx < repl_ht->nNumUsed) {
2417 repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2418 } else {
2419 repl_str = STR_EMPTY_ALLOC();
2420 }
2421 }
2422
2423 result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
2424
2425 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
2426 if (ZSTR_LEN(repl_str)) {
2427 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2428 }
2429 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
2430 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2431 zend_tmp_string_release(tmp_repl_str);
2432 RETURN_NEW_STR(result);
2433 } else { /* str is array of strings */
2434 zend_string *str_index = NULL;
2435 size_t result_len;
2436 zend_ulong num_index;
2437
2438 /* TODO
2439 if (!len_is_null && from_ht) {
2440 if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
2441 php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
2442 RETURN_STR_COPY(str);
2443 }
2444 }
2445 */
2446
2447 array_init(return_value);
2448
2449 from_idx = len_idx = repl_idx = 0;
2450
2451 ZEND_HASH_FOREACH_KEY_VAL(str_ht, num_index, str_index, tmp_str) {
2452 zend_string *tmp_orig_str;
2453 zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
2454
2455 if (from_ht) {
2456 if (HT_IS_PACKED(from_ht)) {
2457 while (from_idx < from_ht->nNumUsed) {
2458 tmp_from = &from_ht->arPacked[from_idx];
2459 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2460 break;
2461 }
2462 from_idx++;
2463 }
2464 } else {
2465 while (from_idx < from_ht->nNumUsed) {
2466 tmp_from = &from_ht->arData[from_idx].val;
2467 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2468 break;
2469 }
2470 from_idx++;
2471 }
2472 }
2473 if (from_idx < from_ht->nNumUsed) {
2474 f = zval_get_long(tmp_from);
2475
2476 if (f < 0) {
2477 f = (zend_long)ZSTR_LEN(orig_str) + f;
2478 if (f < 0) {
2479 f = 0;
2480 }
2481 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2482 f = ZSTR_LEN(orig_str);
2483 }
2484 from_idx++;
2485 } else {
2486 f = 0;
2487 }
2488 } else {
2489 f = from_long;
2490 if (f < 0) {
2491 f = (zend_long)ZSTR_LEN(orig_str) + f;
2492 if (f < 0) {
2493 f = 0;
2494 }
2495 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2496 f = ZSTR_LEN(orig_str);
2497 }
2498 }
2499
2500 if (len_ht) {
2501 if (HT_IS_PACKED(len_ht)) {
2502 while (len_idx < len_ht->nNumUsed) {
2503 tmp_len = &len_ht->arPacked[len_idx];
2504 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2505 break;
2506 }
2507 len_idx++;
2508 }
2509 } else {
2510 while (len_idx < len_ht->nNumUsed) {
2511 tmp_len = &len_ht->arData[len_idx].val;
2512 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2513 break;
2514 }
2515 len_idx++;
2516 }
2517 }
2518 if (len_idx < len_ht->nNumUsed) {
2519 l = zval_get_long(tmp_len);
2520 len_idx++;
2521 } else {
2522 l = ZSTR_LEN(orig_str);
2523 }
2524 } else if (!len_is_null) {
2525 l = len_long;
2526 } else {
2527 l = ZSTR_LEN(orig_str);
2528 }
2529
2530 if (l < 0) {
2531 l = (ZSTR_LEN(orig_str) - f) + l;
2532 if (l < 0) {
2533 l = 0;
2534 }
2535 }
2536
2537 ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
2538 ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
2539 if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
2540 l = ZSTR_LEN(orig_str) - f;
2541 }
2542
2543 result_len = ZSTR_LEN(orig_str) - l;
2544
2545 if (repl_ht) {
2546 if (HT_IS_PACKED(repl_ht)) {
2547 while (repl_idx < repl_ht->nNumUsed) {
2548 tmp_repl = &repl_ht->arPacked[repl_idx];
2549 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2550 break;
2551 }
2552 repl_idx++;
2553 }
2554 } else {
2555 while (repl_idx < repl_ht->nNumUsed) {
2556 tmp_repl = &repl_ht->arData[repl_idx].val;
2557 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2558 break;
2559 }
2560 repl_idx++;
2561 }
2562 }
2563 if (repl_idx < repl_ht->nNumUsed) {
2564 zend_string *tmp_repl_str;
2565 zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2566
2567 result_len += ZSTR_LEN(repl_str);
2568 repl_idx++;
2569 result = zend_string_safe_alloc(1, result_len, 0, 0);
2570
2571 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2572 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2573 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2574 zend_tmp_string_release(tmp_repl_str);
2575 } else {
2576 result = zend_string_safe_alloc(1, result_len, 0, 0);
2577
2578 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2579 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2580 }
2581 } else {
2582 result_len += ZSTR_LEN(repl_str);
2583
2584 result = zend_string_safe_alloc(1, result_len, 0, 0);
2585
2586 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2587 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2588 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2589 }
2590
2591 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2592
2593 if (str_index) {
2594 zval tmp;
2595
2596 ZVAL_NEW_STR(&tmp, result);
2597 zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
2598 } else {
2599 add_index_str(return_value, num_index, result);
2600 }
2601
2602 zend_tmp_string_release(tmp_orig_str);
2603 } ZEND_HASH_FOREACH_END();
2604 } /* if */
2605 }
2606 /* }}} */
2607
2608 /* {{{ Quotes meta characters */
2609 PHP_FUNCTION(quotemeta)
2610 {
2611 zend_string *old;
2612 const char *old_end, *p;
2613 char *q;
2614 char c;
2615 zend_string *str;
2616
2617 ZEND_PARSE_PARAMETERS_START(1, 1)
2618 Z_PARAM_STR(old)
2619 ZEND_PARSE_PARAMETERS_END();
2620
2621 old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
2622
2623 if (ZSTR_LEN(old) == 0) {
2624 RETURN_EMPTY_STRING();
2625 }
2626
2627 str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
2628
2629 for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
2630 c = *p;
2631 switch (c) {
2632 case '.':
2633 case '\\':
2634 case '+':
2635 case '*':
2636 case '?':
2637 case '[':
2638 case '^':
2639 case ']':
2640 case '$':
2641 case '(':
2642 case ')':
2643 *q++ = '\\';
2644 ZEND_FALLTHROUGH;
2645 default:
2646 *q++ = c;
2647 }
2648 }
2649
2650 *q = '\0';
2651
2652 RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
2653 }
2654 /* }}} */
2655
2656 /* {{{ Returns ASCII value of character
2657 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
2658 PHP_FUNCTION(ord)
2659 {
2660 zend_string *str;
2661
2662 ZEND_PARSE_PARAMETERS_START(1, 1)
2663 Z_PARAM_STR(str)
2664 ZEND_PARSE_PARAMETERS_END();
2665
2666 RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
2667 }
2668 /* }}} */
2669
2670 /* {{{ Converts ASCII code to a character
2671 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
2672 PHP_FUNCTION(chr)
2673 {
2674 zend_long c;
2675
2676 ZEND_PARSE_PARAMETERS_START(1, 1)
2677 Z_PARAM_LONG(c)
2678 ZEND_PARSE_PARAMETERS_END();
2679
2680 c &= 0xff;
2681 RETURN_CHAR(c);
2682 }
2683 /* }}} */
2684
2685 /* {{{ php_ucfirst
2686 Uppercase the first character of the word in a native string */
2687 static zend_string* php_ucfirst(zend_string *str)
2688 {
2689 const unsigned char ch = ZSTR_VAL(str)[0];
2690 unsigned char r = zend_toupper_ascii(ch);
2691 if (r == ch) {
2692 return zend_string_copy(str);
2693 } else {
2694 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2695 ZSTR_VAL(s)[0] = r;
2696 return s;
2697 }
2698 }
2699 /* }}} */
2700
2701 /* {{{ Makes a string's first character uppercase */
2702 PHP_FUNCTION(ucfirst)
2703 {
2704 zend_string *str;
2705
2706 ZEND_PARSE_PARAMETERS_START(1, 1)
2707 Z_PARAM_STR(str)
2708 ZEND_PARSE_PARAMETERS_END();
2709
2710 if (!ZSTR_LEN(str)) {
2711 RETURN_EMPTY_STRING();
2712 }
2713
2714 RETURN_STR(php_ucfirst(str));
2715 }
2716 /* }}} */
2717
2718 /* {{{
2719 Lowercase the first character of the word in a native string */
2720 static zend_string* php_lcfirst(zend_string *str)
2721 {
2722 unsigned char r = zend_tolower_ascii(ZSTR_VAL(str)[0]);
2723 if (r == ZSTR_VAL(str)[0]) {
2724 return zend_string_copy(str);
2725 } else {
2726 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2727 ZSTR_VAL(s)[0] = r;
2728 return s;
2729 }
2730 }
2731 /* }}} */
2732
2733 /* {{{ Make a string's first character lowercase */
2734 PHP_FUNCTION(lcfirst)
2735 {
2736 zend_string *str;
2737
2738 ZEND_PARSE_PARAMETERS_START(1, 1)
2739 Z_PARAM_STR(str)
2740 ZEND_PARSE_PARAMETERS_END();
2741
2742 if (!ZSTR_LEN(str)) {
2743 RETURN_EMPTY_STRING();
2744 }
2745
2746 RETURN_STR(php_lcfirst(str));
2747 }
2748 /* }}} */
2749
2750 /* {{{ Uppercase the first character of every word in a string */
2751 PHP_FUNCTION(ucwords)
2752 {
2753 zend_string *str;
2754 char *delims = " \t\r\n\f\v";
2755 char *r;
2756 const char *r_end;
2757 size_t delims_len = 6;
2758 char mask[256];
2759
2760 ZEND_PARSE_PARAMETERS_START(1, 2)
2761 Z_PARAM_STR(str)
2762 Z_PARAM_OPTIONAL
2763 Z_PARAM_STRING(delims, delims_len)
2764 ZEND_PARSE_PARAMETERS_END();
2765
2766 if (!ZSTR_LEN(str)) {
2767 RETURN_EMPTY_STRING();
2768 }
2769
2770 php_charmask((const unsigned char *) delims, delims_len, mask);
2771
2772 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
2773 r = Z_STRVAL_P(return_value);
2774
2775 *r = zend_toupper_ascii((unsigned char) *r);
2776 for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
2777 if (mask[(unsigned char)*r++]) {
2778 *r = zend_toupper_ascii((unsigned char) *r);
2779 }
2780 }
2781 }
2782 /* }}} */
2783
2784 /* {{{ php_strtr */
2785 PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
2786 {
2787 size_t i;
2788
2789 if (UNEXPECTED(trlen < 1)) {
2790 return str;
2791 } else if (trlen == 1) {
2792 char ch_from = *str_from;
2793 char ch_to = *str_to;
2794
2795 for (i = 0; i < len; i++) {
2796 if (str[i] == ch_from) {
2797 str[i] = ch_to;
2798 }
2799 }
2800 } else {
2801 unsigned char xlat[256];
2802
2803 memset(xlat, 0, sizeof(xlat));
2804
2805 for (i = 0; i < trlen; i++) {
2806 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2807 }
2808
2809 for (i = 0; i < len; i++) {
2810 str[i] += xlat[(size_t)(unsigned char) str[i]];
2811 }
2812 }
2813
2814 return str;
2815 }
2816 /* }}} */
2817
2818 /* {{{ php_strtr_ex */
2819 static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
2820 {
2821 zend_string *new_str = NULL;
2822 size_t i;
2823
2824 if (UNEXPECTED(trlen < 1)) {
2825 return zend_string_copy(str);
2826 } else if (trlen == 1) {
2827 char ch_from = *str_from;
2828 char ch_to = *str_to;
2829 char *output;
2830 char *input = ZSTR_VAL(str);
2831 size_t len = ZSTR_LEN(str);
2832
2833 #ifdef __SSE2__
2834 if (ZSTR_LEN(str) >= sizeof(__m128i)) {
2835 __m128i search = _mm_set1_epi8(ch_from);
2836 __m128i delta = _mm_set1_epi8(ch_to - ch_from);
2837
2838 do {
2839 __m128i src = _mm_loadu_si128((__m128i*)(input));
2840 __m128i mask = _mm_cmpeq_epi8(src, search);
2841 if (_mm_movemask_epi8(mask)) {
2842 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2843 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2844 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2845 _mm_storeu_si128((__m128i *)(output),
2846 _mm_add_epi8(src,
2847 _mm_and_si128(mask, delta)));
2848 input += sizeof(__m128i);
2849 output += sizeof(__m128i);
2850 len -= sizeof(__m128i);
2851 for (; len >= sizeof(__m128i); input += sizeof(__m128i), output += sizeof(__m128i), len -= sizeof(__m128i)) {
2852 src = _mm_loadu_si128((__m128i*)(input));
2853 mask = _mm_cmpeq_epi8(src, search);
2854 _mm_storeu_si128((__m128i *)(output),
2855 _mm_add_epi8(src,
2856 _mm_and_si128(mask, delta)));
2857 }
2858 for (; len > 0; input++, output++, len--) {
2859 *output = (*input == ch_from) ? ch_to : *input;
2860 }
2861 *output = 0;
2862 return new_str;
2863 }
2864 input += sizeof(__m128i);
2865 len -= sizeof(__m128i);
2866 } while (len >= sizeof(__m128i));
2867 }
2868 #endif
2869 for (; len > 0; input++, len--) {
2870 if (*input == ch_from) {
2871 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2872 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2873 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2874 *output = ch_to;
2875 input++;
2876 output++;
2877 len--;
2878 for (; len > 0; input++, output++, len--) {
2879 *output = (*input == ch_from) ? ch_to : *input;
2880 }
2881 *output = 0;
2882 return new_str;
2883 }
2884 }
2885 } else {
2886 unsigned char xlat[256];
2887
2888 memset(xlat, 0, sizeof(xlat));;
2889
2890 for (i = 0; i < trlen; i++) {
2891 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2892 }
2893
2894 for (i = 0; i < ZSTR_LEN(str); i++) {
2895 if (xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
2896 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2897 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2898 do {
2899 ZSTR_VAL(new_str)[i] = ZSTR_VAL(str)[i] + xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
2900 i++;
2901 } while (i < ZSTR_LEN(str));
2902 ZSTR_VAL(new_str)[i] = 0;
2903 return new_str;
2904 }
2905 }
2906 }
2907
2908 return zend_string_copy(str);
2909 }
2910 /* }}} */
2911
2912 static void php_strtr_array_ex(zval *return_value, zend_string *input, HashTable *pats)
2913 {
2914 const char *str = ZSTR_VAL(input);
2915 size_t slen = ZSTR_LEN(input);
2916 zend_ulong num_key;
2917 zend_string *str_key;
2918 size_t len, pos, old_pos;
2919 bool has_num_keys = false;
2920 size_t minlen = 128*1024;
2921 size_t maxlen = 0;
2922 HashTable str_hash;
2923 zval *entry;
2924 const char *key;
2925 smart_str result = {0};
2926 zend_ulong bitset[256/sizeof(zend_ulong)];
2927 zend_ulong *num_bitset;
2928
2929 /* we will collect all possible key lengths */
2930 num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
2931 memset(bitset, 0, sizeof(bitset));
2932
2933 /* check if original array has numeric keys */
2934 ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
2935 if (UNEXPECTED(!str_key)) {
2936 has_num_keys = true;
2937 } else {
2938 len = ZSTR_LEN(str_key);
2939 if (UNEXPECTED(len == 0)) {
2940 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
2941 continue;
2942 } else if (UNEXPECTED(len > slen)) {
2943 /* skip long patterns */
2944 continue;
2945 }
2946 if (len > maxlen) {
2947 maxlen = len;
2948 }
2949 if (len < minlen) {
2950 minlen = len;
2951 }
2952 /* remember possible key length */
2953 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2954 bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
2955 }
2956 } ZEND_HASH_FOREACH_END();
2957
2958 if (UNEXPECTED(has_num_keys)) {
2959 zend_string *key_used;
2960 /* we have to rebuild HashTable with numeric keys */
2961 zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
2962 ZEND_HASH_FOREACH_KEY_VAL(pats, num_key, str_key, entry) {
2963 if (UNEXPECTED(!str_key)) {
2964 key_used = zend_long_to_str(num_key);
2965 len = ZSTR_LEN(key_used);
2966 if (UNEXPECTED(len > slen)) {
2967 /* skip long patterns */
2968 zend_string_release(key_used);
2969 continue;
2970 }
2971 if (len > maxlen) {
2972 maxlen = len;
2973 }
2974 if (len < minlen) {
2975 minlen = len;
2976 }
2977 /* remember possible key length */
2978 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2979 bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
2980 } else {
2981 key_used = str_key;
2982 len = ZSTR_LEN(key_used);
2983 if (UNEXPECTED(len > slen)) {
2984 /* skip long patterns */
2985 continue;
2986 }
2987 }
2988 zend_hash_add(&str_hash, key_used, entry);
2989 if (UNEXPECTED(!str_key)) {
2990 zend_string_release_ex(key_used, 0);
2991 }
2992 } ZEND_HASH_FOREACH_END();
2993 pats = &str_hash;
2994 }
2995
2996 if (UNEXPECTED(minlen > maxlen)) {
2997 /* return the original string */
2998 if (pats == &str_hash) {
2999 zend_hash_destroy(&str_hash);
3000 }
3001 efree(num_bitset);
3002 RETURN_STR_COPY(input);
3003 }
3004
3005 old_pos = pos = 0;
3006 while (pos <= slen - minlen) {
3007 key = str + pos;
3008 if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
3009 len = maxlen;
3010 if (len > slen - pos) {
3011 len = slen - pos;
3012 }
3013 while (len >= minlen) {
3014 if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
3015 entry = zend_hash_str_find(pats, key, len);
3016 if (entry != NULL) {
3017 zend_string *tmp;
3018 zend_string *s = zval_get_tmp_string(entry, &tmp);
3019 smart_str_appendl(&result, str + old_pos, pos - old_pos);
3020 smart_str_append(&result, s);
3021 old_pos = pos + len;
3022 pos = old_pos - 1;
3023 zend_tmp_string_release(tmp);
3024 break;
3025 }
3026 }
3027 len--;
3028 }
3029 }
3030 pos++;
3031 }
3032
3033 if (result.s) {
3034 smart_str_appendl(&result, str + old_pos, slen - old_pos);
3035 RETVAL_STR(smart_str_extract(&result));
3036 } else {
3037 smart_str_free(&result);
3038 RETVAL_STR_COPY(input);
3039 }
3040
3041 if (pats == &str_hash) {
3042 zend_hash_destroy(&str_hash);
3043 }
3044 efree(num_bitset);
3045 }
3046
3047 /* {{{ count_chars */
3048 static zend_always_inline zend_long count_chars(const char *p, zend_long length, char ch)
3049 {
3050 zend_long count = 0;
3051 const char *endp;
3052
3053 #ifdef __SSE2__
3054 if (length >= sizeof(__m128i)) {
3055 __m128i search = _mm_set1_epi8(ch);
3056
3057 do {
3058 __m128i src = _mm_loadu_si128((__m128i*)(p));
3059 uint32_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(src, search));
3060 // TODO: It would be great to use POPCNT, but it's available only with SSE4.1
3061 #if 1
3062 while (mask != 0) {
3063 count++;
3064 mask = mask & (mask - 1);
3065 }
3066 #else
3067 if (mask) {
3068 mask = mask - ((mask >> 1) & 0x5555);
3069 mask = (mask & 0x3333) + ((mask >> 2) & 0x3333);
3070 mask = (mask + (mask >> 4)) & 0x0F0F;
3071 mask = (mask + (mask >> 8)) & 0x00ff;
3072 count += mask;
3073 }
3074 #endif
3075 p += sizeof(__m128i);
3076 length -= sizeof(__m128i);
3077 } while (length >= sizeof(__m128i));
3078 }
3079 endp = p + length;
3080 while (p != endp) {
3081 count += (*p == ch);
3082 p++;
3083 }
3084 #else
3085 endp = p + length;
3086 while ((p = memchr(p, ch, endp-p))) {
3087 count++;
3088 p++;
3089 }
3090 #endif
3091 return count;
3092 }
3093 /* }}} */
3094
3095 /* {{{ php_char_to_str_ex */
3096 static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, bool case_sensitivity, zend_long *replace_count)
3097 {
3098 zend_string *result;
3099 size_t char_count;
3100 int lc_from = 0;
3101 const char *source, *source_end;
3102 char *target;
3103
3104 if (case_sensitivity) {
3105 char_count = count_chars(ZSTR_VAL(str), ZSTR_LEN(str), from);
3106 } else {
3107 char_count = 0;
3108 lc_from = zend_tolower_ascii(from);
3109 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
3110 for (source = ZSTR_VAL(str); source < source_end; source++) {
3111 if (zend_tolower_ascii(*source) == lc_from) {
3112 char_count++;
3113 }
3114 }
3115 }
3116
3117 if (char_count == 0) {
3118 return zend_string_copy(str);
3119 }
3120
3121 if (replace_count) {
3122 *replace_count += char_count;
3123 }
3124
3125 if (to_len > 0) {
3126 result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
3127 } else {
3128 result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
3129 }
3130 target = ZSTR_VAL(result);
3131
3132 if (case_sensitivity) {
3133 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
3134
3135 while ((p = memchr(p, from, (e - p)))) {
3136 target = zend_mempcpy(target, s, (p - s));
3137 target = zend_mempcpy(target, to, to_len);
3138 p++;
3139 s = p;
3140 if (--char_count == 0) break;
3141 }
3142 if (s < e) {
3143 target = zend_mempcpy(target, s, e - s);
3144 }
3145 } else {
3146 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
3147 for (source = ZSTR_VAL(str); source < source_end; source++) {
3148 if (zend_tolower_ascii(*source) == lc_from) {
3149 target = zend_mempcpy(target, to, to_len);
3150 } else {
3151 *target = *source;
3152 target++;
3153 }
3154 }
3155 }
3156 *target = 0;
3157 return result;
3158 }
3159 /* }}} */
3160
3161 /* {{{ php_str_to_str_ex */
3162 static zend_string *php_str_to_str_ex(zend_string *haystack,
3163 const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
3164 {
3165
3166 if (needle_len < ZSTR_LEN(haystack)) {
3167 zend_string *new_str;
3168 const char *end;
3169 const char *p, *r;
3170 char *e;
3171
3172 if (needle_len == str_len) {
3173 new_str = NULL;
3174 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3175 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3176 if (!new_str) {
3177 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3178 }
3179 memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
3180 (*replace_count)++;
3181 }
3182 if (!new_str) {
3183 goto nothing_todo;
3184 }
3185 return new_str;
3186 } else {
3187 size_t count = 0;
3188 const char *o = ZSTR_VAL(haystack);
3189 const char *n = needle;
3190 const char *endp = o + ZSTR_LEN(haystack);
3191
3192 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3193 o += needle_len;
3194 count++;
3195 }
3196 if (count == 0) {
3197 /* Needle doesn't occur, shortcircuit the actual replacement. */
3198 goto nothing_todo;
3199 }
3200 if (str_len > needle_len) {
3201 new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
3202 } else {
3203 new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
3204 }
3205
3206 e = ZSTR_VAL(new_str);
3207 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3208 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3209 e = zend_mempcpy(e, p, r - p);
3210 e = zend_mempcpy(e, str, str_len);
3211 (*replace_count)++;
3212 }
3213
3214 if (p < end) {
3215 e = zend_mempcpy(e, p, end - p);
3216 }
3217
3218 *e = '\0';
3219 return new_str;
3220 }
3221 } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
3222 nothing_todo:
3223 return zend_string_copy(haystack);
3224 } else {
3225 (*replace_count)++;
3226 return zend_string_init_fast(str, str_len);
3227 }
3228 }
3229 /* }}} */
3230
3231 /* {{{ php_str_to_str_i_ex */
3232 static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
3233 zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
3234 {
3235 zend_string *new_str = NULL;
3236 zend_string *lc_needle;
3237
3238 if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
3239 const char *end;
3240 const char *p, *r;
3241 char *e;
3242
3243 if (ZSTR_LEN(needle) == str_len) {
3244 lc_needle = zend_string_tolower(needle);
3245 end = lc_haystack + ZSTR_LEN(haystack);
3246 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3247 if (!new_str) {
3248 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3249 }
3250 memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
3251 (*replace_count)++;
3252 }
3253 zend_string_release_ex(lc_needle, 0);
3254
3255 if (!new_str) {
3256 goto nothing_todo;
3257 }
3258 return new_str;
3259 } else {
3260 size_t count = 0;
3261 const char *o = lc_haystack;
3262 const char *n;
3263 const char *endp = o + ZSTR_LEN(haystack);
3264
3265 lc_needle = zend_string_tolower(needle);
3266 n = ZSTR_VAL(lc_needle);
3267
3268 while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
3269 o += ZSTR_LEN(lc_needle);
3270 count++;
3271 }
3272 if (count == 0) {
3273 /* Needle doesn't occur, shortcircuit the actual replacement. */
3274 zend_string_release_ex(lc_needle, 0);
3275 goto nothing_todo;
3276 }
3277
3278 if (str_len > ZSTR_LEN(lc_needle)) {
3279 new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
3280 } else {
3281 new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
3282 }
3283
3284 e = ZSTR_VAL(new_str);
3285 end = lc_haystack + ZSTR_LEN(haystack);
3286
3287 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3288 e = zend_mempcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
3289 e = zend_mempcpy(e, str, str_len);
3290 (*replace_count)++;
3291 }
3292
3293 if (p < end) {
3294 e = zend_mempcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
3295 }
3296 *e = '\0';
3297
3298 zend_string_release_ex(lc_needle, 0);
3299
3300 return new_str;
3301 }
3302 } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
3303 nothing_todo:
3304 return zend_string_copy(haystack);
3305 } else {
3306 lc_needle = zend_string_tolower(needle);
3307
3308 if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
3309 zend_string_release_ex(lc_needle, 0);
3310 goto nothing_todo;
3311 }
3312 zend_string_release_ex(lc_needle, 0);
3313
3314 new_str = zend_string_init(str, str_len, 0);
3315
3316 (*replace_count)++;
3317 return new_str;
3318 }
3319 }
3320 /* }}} */
3321
3322 /* {{{ php_str_to_str */
3323 PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
3324 {
3325 zend_string *new_str;
3326
3327 if (needle_len < length) {
3328 const char *end;
3329 const char *s, *p;
3330 char *e, *r;
3331
3332 if (needle_len == str_len) {
3333 new_str = zend_string_init(haystack, length, 0);
3334 end = ZSTR_VAL(new_str) + length;
3335 for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3336 memcpy(r, str, str_len);
3337 }
3338 return new_str;
3339 } else {
3340 if (str_len < needle_len) {
3341 new_str = zend_string_alloc(length, 0);
3342 } else {
3343 size_t count = 0;
3344 const char *o = haystack;
3345 const char *n = needle;
3346 const char *endp = o + length;
3347
3348 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3349 o += needle_len;
3350 count++;
3351 }
3352 if (count == 0) {
3353 /* Needle doesn't occur, shortcircuit the actual replacement. */
3354 new_str = zend_string_init(haystack, length, 0);
3355 return new_str;
3356 } else {
3357 if (str_len > needle_len) {
3358 new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
3359 } else {
3360 new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
3361 }
3362 }
3363 }
3364
3365 s = e = ZSTR_VAL(new_str);
3366 end = haystack + length;
3367 for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3368 e = zend_mempcpy(e, p, r - p);
3369 e = zend_mempcpy(e, str, str_len);
3370 }
3371
3372 if (p < end) {
3373 e = zend_mempcpy(e, p, end - p);
3374 }
3375
3376 *e = '\0';
3377 new_str = zend_string_truncate(new_str, e - s, 0);
3378 return new_str;
3379 }
3380 } else if (needle_len > length || memcmp(haystack, needle, length)) {
3381 new_str = zend_string_init(haystack, length, 0);
3382 return new_str;
3383 } else {
3384 new_str = zend_string_init(str, str_len, 0);
3385
3386 return new_str;
3387 }
3388 }
3389 /* }}} */
3390
3391 static void php_strtr_array(zval *return_value, zend_string *str, HashTable *from_ht)
3392 {
3393 if (zend_hash_num_elements(from_ht) < 1) {
3394 RETURN_STR_COPY(str);
3395 } else if (zend_hash_num_elements(from_ht) == 1) {
3396 zend_long num_key;
3397 zend_string *str_key, *tmp_str, *replace, *tmp_replace;
3398 zval *entry;
3399
3400 ZEND_HASH_FOREACH_KEY_VAL(from_ht, num_key, str_key, entry) {
3401 tmp_str = NULL;
3402 if (UNEXPECTED(!str_key)) {
3403 str_key = tmp_str = zend_long_to_str(num_key);
3404 }
3405 replace = zval_get_tmp_string(entry, &tmp_replace);
3406 if (ZSTR_LEN(str_key) < 1) {
3407 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
3408 RETVAL_STR_COPY(str);
3409 } else if (ZSTR_LEN(str_key) == 1) {
3410 RETVAL_STR(php_char_to_str_ex(str,
3411 ZSTR_VAL(str_key)[0],
3412 ZSTR_VAL(replace),
3413 ZSTR_LEN(replace),
3414 /* case_sensitive */ true,
3415 NULL));
3416 } else {
3417 zend_long dummy = 0;
3418 RETVAL_STR(php_str_to_str_ex(str,
3419 ZSTR_VAL(str_key), ZSTR_LEN(str_key),
3420 ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
3421 }
3422 zend_tmp_string_release(tmp_str);
3423 zend_tmp_string_release(tmp_replace);
3424 return;
3425 } ZEND_HASH_FOREACH_END();
3426 } else {
3427 php_strtr_array_ex(return_value, str, from_ht);
3428 }
3429 }
3430
3431 /* {{{ Translates characters in str using given translation tables */
3432 PHP_FUNCTION(strtr)
3433 {
3434 zend_string *str, *from_str = NULL;
3435 HashTable *from_ht = NULL;
3436 char *to = NULL;
3437 size_t to_len = 0;
3438
3439 if (ZEND_NUM_ARGS() <= 2) {
3440 ZEND_PARSE_PARAMETERS_START(2, 2)
3441 Z_PARAM_STR(str)
3442 Z_PARAM_ARRAY_HT(from_ht)
3443 ZEND_PARSE_PARAMETERS_END();
3444 } else {
3445 ZEND_PARSE_PARAMETERS_START(3, 3)
3446 Z_PARAM_STR(str)
3447 Z_PARAM_STR(from_str)
3448 Z_PARAM_STRING(to, to_len)
3449 ZEND_PARSE_PARAMETERS_END();
3450 }
3451
3452 /* shortcut for empty string */
3453 if (ZSTR_LEN(str) == 0) {
3454 RETURN_EMPTY_STRING();
3455 }
3456
3457 if (!to) {
3458 php_strtr_array(return_value, str, from_ht);
3459 } else {
3460 RETURN_STR(php_strtr_ex(str,
3461 ZSTR_VAL(from_str),
3462 to,
3463 MIN(ZSTR_LEN(from_str), to_len)));
3464 }
3465 }
3466 /* }}} */
3467
3468 ZEND_FRAMELESS_FUNCTION(strtr, 2)
3469 {
3470 zval str_tmp;
3471 zend_string *str;
3472 zval *from;
3473
3474 Z_FLF_PARAM_STR(1, str, str_tmp);
3475 Z_FLF_PARAM_ARRAY(2, from);
3476
3477 if (ZSTR_LEN(str) == 0) {
3478 RETVAL_EMPTY_STRING();
3479 goto flf_clean;
3480 }
3481
3482 php_strtr_array(return_value, str, Z_ARR_P(from));
3483
3484 flf_clean:
3485 Z_FLF_PARAM_FREE_STR(1, str_tmp);
3486 }
3487
3488 ZEND_FRAMELESS_FUNCTION(strtr, 3)
3489 {
3490 zval str_tmp, from_tmp, to_tmp;
3491 zend_string *str, *from, *to;
3492
3493 Z_FLF_PARAM_STR(1, str, str_tmp);
3494 Z_FLF_PARAM_STR(2, from, from_tmp);
3495 Z_FLF_PARAM_STR(3, to, to_tmp);
3496
3497 if (ZSTR_LEN(str) == 0) {
3498 RETVAL_EMPTY_STRING();
3499 goto flf_clean;
3500 }
3501
3502 RETVAL_STR(php_strtr_ex(str, ZSTR_VAL(from), ZSTR_VAL(to), MIN(ZSTR_LEN(from), ZSTR_LEN(to))));
3503
3504 flf_clean:
3505 Z_FLF_PARAM_FREE_STR(1, str_tmp);
3506 Z_FLF_PARAM_FREE_STR(2, from_tmp);
3507 Z_FLF_PARAM_FREE_STR(3, to_tmp);
3508 }
3509
3510 /* {{{ Reverse a string */
3511 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3512 #include <tmmintrin.h>
3513 #elif defined(__aarch64__) || defined(_M_ARM64)
3514 #include <arm_neon.h>
3515 #endif
3516 PHP_FUNCTION(strrev)
3517 {
3518 zend_string *str;
3519 const char *s, *e;
3520 char *p;
3521 zend_string *n;
3522
3523 ZEND_PARSE_PARAMETERS_START(1, 1)
3524 Z_PARAM_STR(str)
3525 ZEND_PARSE_PARAMETERS_END();
3526
3527 n = zend_string_alloc(ZSTR_LEN(str), 0);
3528 p = ZSTR_VAL(n);
3529
3530 s = ZSTR_VAL(str);
3531 e = s + ZSTR_LEN(str);
3532 --e;
3533 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3534 if (e - s > 15) {
3535 const __m128i map = _mm_set_epi8(
3536 0, 1, 2, 3,
3537 4, 5, 6, 7,
3538 8, 9, 10, 11,
3539 12, 13, 14, 15);
3540 do {
3541 const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
3542 _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
3543 p += 16;
3544 e -= 16;
3545 } while (e - s > 15);
3546 }
3547 #elif defined(__aarch64__)
3548 if (e - s > 15) {
3549 do {
3550 const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
3551 /* Synthesize rev128 with a rev64 + ext. */
3552 const uint8x16_t rev = vrev64q_u8(str);
3553 const uint8x16_t ext = (uint8x16_t)
3554 vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
3555 vst1q_u8((uint8_t *)p, ext);
3556 p += 16;
3557 e -= 16;
3558 } while (e - s > 15);
3559 }
3560 #elif defined(_M_ARM64)
3561 if (e - s > 15) {
3562 do {
3563 const __n128 str = vld1q_u8((uint8_t *)(e - 15));
3564 /* Synthesize rev128 with a rev64 + ext. */
3565 /* strange force cast limit on windows: you cannot convert anything */
3566 const __n128 rev = vrev64q_u8(str);
3567 const __n128 ext = vextq_u64(rev, rev, 1);
3568 vst1q_u8((uint8_t *)p, ext);
3569 p += 16;
3570 e -= 16;
3571 } while (e - s > 15);
3572 }
3573 #endif
3574 while (e >= s) {
3575 *p++ = *e--;
3576 }
3577
3578 *p = '\0';
3579
3580 RETVAL_NEW_STR(n);
3581 }
3582 /* }}} */
3583
3584 /* {{{ php_similar_str */
3585 static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
3586 {
3587 const char *p, *q;
3588 const char *end1 = (char *) txt1 + len1;
3589 const char *end2 = (char *) txt2 + len2;
3590 size_t l;
3591
3592 *max = 0;
3593 *count = 0;
3594 for (p = (char *) txt1; p < end1; p++) {
3595 for (q = (char *) txt2; q < end2; q++) {
3596 for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
3597 if (l > *max) {
3598 *max = l;
3599 *count += 1;
3600 *pos1 = p - txt1;
3601 *pos2 = q - txt2;
3602 }
3603 }
3604 }
3605 }
3606 /* }}} */
3607
3608 /* {{{ php_similar_char */
3609 static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
3610 {
3611 size_t sum;
3612 size_t pos1 = 0, pos2 = 0, max, count;
3613
3614 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
3615 if ((sum = max)) {
3616 if (pos1 && pos2 && count > 1) {
3617 sum += php_similar_char(txt1, pos1,
3618 txt2, pos2);
3619 }
3620 if ((pos1 + max < len1) && (pos2 + max < len2)) {
3621 sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
3622 txt2 + pos2 + max, len2 - pos2 - max);
3623 }
3624 }
3625
3626 return sum;
3627 }
3628 /* }}} */
3629
3630 /* {{{ Calculates the similarity between two strings */
3631 PHP_FUNCTION(similar_text)
3632 {
3633 zend_string *t1, *t2;
3634 zval *percent = NULL;
3635 bool compute_percentage = ZEND_NUM_ARGS() >= 3;
3636 size_t sim;
3637
3638 ZEND_PARSE_PARAMETERS_START(2, 3)
3639 Z_PARAM_STR(t1)
3640 Z_PARAM_STR(t2)
3641 Z_PARAM_OPTIONAL
3642 Z_PARAM_ZVAL(percent)
3643 ZEND_PARSE_PARAMETERS_END();
3644
3645 if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
3646 if (compute_percentage) {
3647 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
3648 }
3649
3650 RETURN_LONG(0);
3651 }
3652
3653 sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
3654
3655 if (compute_percentage) {
3656 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
3657 }
3658
3659 RETURN_LONG(sim);
3660 }
3661 /* }}} */
3662
3663 /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
3664 PHP_FUNCTION(addcslashes)
3665 {
3666 zend_string *str, *what;
3667
3668 ZEND_PARSE_PARAMETERS_START(2, 2)
3669 Z_PARAM_STR(str)
3670 Z_PARAM_STR(what)
3671 ZEND_PARSE_PARAMETERS_END();
3672
3673 if (ZSTR_LEN(str) == 0) {
3674 RETURN_EMPTY_STRING();
3675 }
3676
3677 if (ZSTR_LEN(what) == 0) {
3678 RETURN_STR_COPY(str);
3679 }
3680
3681 RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
3682 }
3683 /* }}} */
3684
3685 /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
3686 PHP_FUNCTION(addslashes)
3687 {
3688 zend_string *str;
3689
3690 ZEND_PARSE_PARAMETERS_START(1, 1)
3691 Z_PARAM_STR(str)
3692 ZEND_PARSE_PARAMETERS_END();
3693
3694 if (ZSTR_LEN(str) == 0) {
3695 RETURN_EMPTY_STRING();
3696 }
3697
3698 RETURN_STR(php_addslashes(str));
3699 }
3700 /* }}} */
3701
3702 /* {{{ Strips backslashes from a string. Uses C-style conventions */
3703 PHP_FUNCTION(stripcslashes)
3704 {
3705 zend_string *str;
3706
3707 ZEND_PARSE_PARAMETERS_START(1, 1)
3708 Z_PARAM_STR(str)
3709 ZEND_PARSE_PARAMETERS_END();
3710
3711 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3712 php_stripcslashes(Z_STR_P(return_value));
3713 }
3714 /* }}} */
3715
3716 /* {{{ Strips backslashes from a string */
3717 PHP_FUNCTION(stripslashes)
3718 {
3719 zend_string *str;
3720
3721 ZEND_PARSE_PARAMETERS_START(1, 1)
3722 Z_PARAM_STR(str)
3723 ZEND_PARSE_PARAMETERS_END();
3724
3725 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3726 php_stripslashes(Z_STR_P(return_value));
3727 }
3728 /* }}} */
3729
3730 /* {{{ php_stripcslashes */
3731 PHPAPI void php_stripcslashes(zend_string *str)
3732 {
3733 const char *source, *end;
3734 char *target;
3735 size_t nlen = ZSTR_LEN(str), i;
3736 char numtmp[4];
3737
3738 for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
3739 if (*source == '\\' && source + 1 < end) {
3740 source++;
3741 switch (*source) {
3742 case 'n': *target++='\n'; nlen--; break;
3743 case 'r': *target++='\r'; nlen--; break;
3744 case 'a': *target++='\a'; nlen--; break;
3745 case 't': *target++='\t'; nlen--; break;
3746 case 'v': *target++='\v'; nlen--; break;
3747 case 'b': *target++='\b'; nlen--; break;
3748 case 'f': *target++='\f'; nlen--; break;
3749 case '\\': *target++='\\'; nlen--; break;
3750 case 'x':
3751 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3752 numtmp[0] = *++source;
3753 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3754 numtmp[1] = *++source;
3755 numtmp[2] = '\0';
3756 nlen-=3;
3757 } else {
3758 numtmp[1] = '\0';
3759 nlen-=2;
3760 }
3761 *target++=(char)strtol(numtmp, NULL, 16);
3762 break;
3763 }
3764 ZEND_FALLTHROUGH;
3765 default:
3766 i=0;
3767 while (source < end && *source >= '0' && *source <= '7' && i<3) {
3768 numtmp[i++] = *source++;
3769 }
3770 if (i) {
3771 numtmp[i]='\0';
3772 *target++=(char)strtol(numtmp, NULL, 8);
3773 nlen-=i;
3774 source--;
3775 } else {
3776 *target++=*source;
3777 nlen--;
3778 }
3779 }
3780 } else {
3781 *target++=*source;
3782 }
3783 }
3784
3785 if (nlen != 0) {
3786 *target='\0';
3787 }
3788
3789 ZSTR_LEN(str) = nlen;
3790 }
3791 /* }}} */
3792
3793 /* {{{ php_addcslashes_str */
3794 PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
3795 {
3796 char flags[256];
3797 char *target;
3798 const char *source, *end;
3799 char c;
3800 size_t newlen;
3801 zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
3802
3803 php_charmask((const unsigned char *) what, wlength, flags);
3804
3805 for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
3806 c = *source;
3807 if (flags[(unsigned char)c]) {
3808 if ((unsigned char) c < 32 || (unsigned char) c > 126) {
3809 *target++ = '\\';
3810 switch (c) {
3811 case '\n': *target++ = 'n'; break;
3812 case '\t': *target++ = 't'; break;
3813 case '\r': *target++ = 'r'; break;
3814 case '\a': *target++ = 'a'; break;
3815 case '\v': *target++ = 'v'; break;
3816 case '\b': *target++ = 'b'; break;
3817 case '\f': *target++ = 'f'; break;
3818 default: target += snprintf(target, 4, "%03o", (unsigned char) c);
3819 }
3820 continue;
3821 }
3822 *target++ = '\\';
3823 }
3824 *target++ = c;
3825 }
3826 *target = 0;
3827 newlen = target - ZSTR_VAL(new_str);
3828 if (newlen < len * 4) {
3829 new_str = zend_string_truncate(new_str, newlen, 0);
3830 }
3831 return new_str;
3832 }
3833 /* }}} */
3834
3835 /* {{{ php_addcslashes */
3836 PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
3837 {
3838 return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
3839 }
3840 /* }}} */
3841
3842 /* {{{ php_addslashes */
3843
3844 #ifdef ZEND_INTRIN_SSE4_2_NATIVE
3845 # include <nmmintrin.h>
3846 # include "Zend/zend_bitset.h"
3847 #elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3848 # include <nmmintrin.h>
3849 # include "Zend/zend_bitset.h"
3850 # include "Zend/zend_cpuinfo.h"
3851
3852 ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
3853 zend_string *php_addslashes_default(zend_string *str);
3854
3855 # ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO
3856 PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
3857
3858 typedef zend_string *(*php_addslashes_func_t)(zend_string *);
3859
3860 ZEND_NO_SANITIZE_ADDRESS
3861 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3862 static php_addslashes_func_t resolve_addslashes(void) {
3863 if (zend_cpu_supports_sse42()) {
3864 return php_addslashes_sse42;
3865 }
3866 return php_addslashes_default;
3867 }
3868 # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
3869
3870 static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
3871
3872 PHPAPI zend_string *php_addslashes(zend_string *str) {
3873 return php_addslashes_ptr(str);
3874 }
3875
3876 /* {{{ PHP_MINIT_FUNCTION */
3877 PHP_MINIT_FUNCTION(string_intrin)
3878 {
3879 if (zend_cpu_supports_sse42()) {
3880 php_addslashes_ptr = php_addslashes_sse42;
3881 } else {
3882 php_addslashes_ptr = php_addslashes_default;
3883 }
3884 return SUCCESS;
3885 }
3886 /* }}} */
3887 # endif
3888 #endif
3889
3890 #if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3891 # ifdef ZEND_INTRIN_SSE4_2_NATIVE
3892 PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
3893 # elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3894 zend_string *php_addslashes_sse42(zend_string *str)
3895 # endif
3896 {
3897 ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
3898 __m128i w128, s128;
3899 uint32_t res = 0;
3900 /* maximum string length, worst case situation */
3901 char *target;
3902 const char *source, *end;
3903 size_t offset;
3904 zend_string *new_str;
3905
3906 if (!str) {
3907 return ZSTR_EMPTY_ALLOC();
3908 }
3909
3910 source = ZSTR_VAL(str);
3911 end = source + ZSTR_LEN(str);
3912
3913 if (ZSTR_LEN(str) > 15) {
3914 w128 = _mm_load_si128((__m128i *)slashchars);
3915 do {
3916 s128 = _mm_loadu_si128((__m128i *)source);
3917 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3918 if (res) {
3919 goto do_escape;
3920 }
3921 source += 16;
3922 } while ((end - source) > 15);
3923 }
3924
3925 while (source < end) {
3926 switch (*source) {
3927 case '\0':
3928 case '\'':
3929 case '\"':
3930 case '\\':
3931 goto do_escape;
3932 default:
3933 source++;
3934 break;
3935 }
3936 }
3937
3938 return zend_string_copy(str);
3939
3940 do_escape:
3941 offset = source - (char *)ZSTR_VAL(str);
3942 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3943 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3944 target = ZSTR_VAL(new_str) + offset;
3945
3946 if (res) {
3947 int pos = 0;
3948 do {
3949 int i, n = zend_ulong_ntz(res);
3950 for (i = 0; i < n; i++) {
3951 *target++ = source[pos + i];
3952 }
3953 pos += n;
3954 *target++ = '\\';
3955 if (source[pos] == '\0') {
3956 *target++ = '0';
3957 } else {
3958 *target++ = source[pos];
3959 }
3960 pos++;
3961 res = res >> (n + 1);
3962 } while (res);
3963
3964 for (; pos < 16; pos++) {
3965 *target++ = source[pos];
3966 }
3967 source += 16;
3968 } else if (end - source > 15) {
3969 w128 = _mm_load_si128((__m128i *)slashchars);
3970 }
3971
3972 for (; end - source > 15; source += 16) {
3973 int pos = 0;
3974 s128 = _mm_loadu_si128((__m128i *)source);
3975 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3976 if (res) {
3977 do {
3978 int i, n = zend_ulong_ntz(res);
3979 for (i = 0; i < n; i++) {
3980 *target++ = source[pos + i];
3981 }
3982 pos += n;
3983 *target++ = '\\';
3984 if (source[pos] == '\0') {
3985 *target++ = '0';
3986 } else {
3987 *target++ = source[pos];
3988 }
3989 pos++;
3990 res = res >> (n + 1);
3991 } while (res);
3992 for (; pos < 16; pos++) {
3993 *target++ = source[pos];
3994 }
3995 } else {
3996 _mm_storeu_si128((__m128i*)target, s128);
3997 target += 16;
3998 }
3999 }
4000
4001 while (source < end) {
4002 switch (*source) {
4003 case '\0':
4004 *target++ = '\\';
4005 *target++ = '0';
4006 break;
4007 case '\'':
4008 case '\"':
4009 case '\\':
4010 *target++ = '\\';
4011 ZEND_FALLTHROUGH;
4012 default:
4013 *target++ = *source;
4014 break;
4015 }
4016 source++;
4017 }
4018
4019 *target = '\0';
4020
4021 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
4022 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
4023 } else {
4024 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
4025 }
4026
4027 return new_str;
4028 }
4029 /* }}} */
4030 #endif
4031
4032 #if defined(__aarch64__) || defined(_M_ARM64)
4033 typedef union {
4034 uint8_t mem[16];
4035 uint64_t dw[2];
4036 } quad_word;
4037
4038 static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
4039 uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
4040 uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
4041 uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
4042 uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
4043 uint8x16_t s01 = vorrq_u8(s0, s1);
4044 uint8x16_t s23 = vorrq_u8(s2, s3);
4045 uint8x16_t s0123 = vorrq_u8(s01, s23);
4046 quad_word qw;
4047 vst1q_u8(qw.mem, s0123);
4048 return qw;
4049 }
4050
4051 static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
4052 {
4053 for (int i = 0; i < 16; i++) {
4054 char s = source[i];
4055 if (res.mem[i] == 0)
4056 *target++ = s;
4057 else {
4058 *target++ = '\\';
4059 if (s == '\0')
4060 *target++ = '0';
4061 else
4062 *target++ = s;
4063 }
4064 }
4065 return target;
4066 }
4067 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
4068
4069 #ifndef ZEND_INTRIN_SSE4_2_NATIVE
4070 # ifdef ZEND_INTRIN_SSE4_2_RESOLVER
4071 zend_string *php_addslashes_default(zend_string *str) /* {{{ */
4072 # else
4073 PHPAPI zend_string *php_addslashes(zend_string *str)
4074 # endif
4075 {
4076 /* maximum string length, worst case situation */
4077 char *target;
4078 const char *source, *end;
4079 size_t offset;
4080 zend_string *new_str;
4081
4082 if (!str) {
4083 return ZSTR_EMPTY_ALLOC();
4084 }
4085
4086 source = ZSTR_VAL(str);
4087 end = source + ZSTR_LEN(str);
4088
4089 # if defined(__aarch64__) || defined(_M_ARM64)
4090 quad_word res = {0};
4091 if (ZSTR_LEN(str) > 15) {
4092 do {
4093 res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
4094 if (res.dw[0] | res.dw[1])
4095 goto do_escape;
4096 source += 16;
4097 } while ((end - source) > 15);
4098 }
4099 /* Finish the last 15 bytes or less with the scalar loop. */
4100 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
4101
4102 while (source < end) {
4103 switch (*source) {
4104 case '\0':
4105 case '\'':
4106 case '\"':
4107 case '\\':
4108 goto do_escape;
4109 default:
4110 source++;
4111 break;
4112 }
4113 }
4114
4115 return zend_string_copy(str);
4116
4117 do_escape:
4118 offset = source - (char *)ZSTR_VAL(str);
4119 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
4120 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
4121 target = ZSTR_VAL(new_str) + offset;
4122
4123 # if defined(__aarch64__) || defined(_M_ARM64)
4124 if (res.dw[0] | res.dw[1]) {
4125 target = aarch64_add_slashes(res, source, target);
4126 source += 16;
4127 }
4128 for (; end - source > 15; source += 16) {
4129 uint8x16_t x = vld1q_u8((uint8_t *)source);
4130 res = aarch64_contains_slash_chars(x);
4131 if (res.dw[0] | res.dw[1]) {
4132 target = aarch64_add_slashes(res, source, target);
4133 } else {
4134 vst1q_u8((uint8_t*)target, x);
4135 target += 16;
4136 }
4137 }
4138 /* Finish the last 15 bytes or less with the scalar loop. */
4139 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
4140
4141 while (source < end) {
4142 switch (*source) {
4143 case '\0':
4144 *target++ = '\\';
4145 *target++ = '0';
4146 break;
4147 case '\'':
4148 case '\"':
4149 case '\\':
4150 *target++ = '\\';
4151 ZEND_FALLTHROUGH;
4152 default:
4153 *target++ = *source;
4154 break;
4155 }
4156 source++;
4157 }
4158
4159 *target = '\0';
4160
4161 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
4162 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
4163 } else {
4164 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
4165 }
4166
4167 return new_str;
4168 }
4169 #endif
4170 /* }}} */
4171 /* }}} */
4172
4173 /* {{{ php_stripslashes
4174 *
4175 * be careful, this edits the string in-place */
4176 static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
4177 {
4178 #if defined(__aarch64__) || defined(_M_ARM64)
4179 while (len > 15) {
4180 uint8x16_t x = vld1q_u8((uint8_t *)str);
4181 quad_word q;
4182 vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
4183 if (q.dw[0] | q.dw[1]) {
4184 unsigned int i = 0;
4185 while (i < 16) {
4186 if (q.mem[i] == 0) {
4187 *out++ = str[i];
4188 i++;
4189 continue;
4190 }
4191
4192 i++; /* skip the slash */
4193 if (i < len) {
4194 char s = str[i];
4195 if (s == '0')
4196 *out++ = '\0';
4197 else
4198 *out++ = s; /* preserve the next character */
4199 i++;
4200 }
4201 }
4202 str += i;
4203 len -= i;
4204 } else {
4205 vst1q_u8((uint8_t*)out, x);
4206 out += 16;
4207 str += 16;
4208 len -= 16;
4209 }
4210 }
4211 /* Finish the last 15 bytes or less with the scalar loop. */
4212 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
4213 while (len > 0) {
4214 if (*str == '\\') {
4215 str++; /* skip the slash */
4216 len--;
4217 if (len > 0) {
4218 if (*str == '0') {
4219 *out++='\0';
4220 str++;
4221 } else {
4222 *out++ = *str++; /* preserve the next character */
4223 }
4224 len--;
4225 }
4226 } else {
4227 *out++ = *str++;
4228 len--;
4229 }
4230 }
4231
4232 return out;
4233 }
4234
4235 #ifdef __SSE2__
4236 PHPAPI void php_stripslashes(zend_string *str)
4237 {
4238 const char *s = ZSTR_VAL(str);
4239 char *t = ZSTR_VAL(str);
4240 size_t l = ZSTR_LEN(str);
4241
4242 if (l > 15) {
4243 const __m128i slash = _mm_set1_epi8('\\');
4244
4245 do {
4246 __m128i in = _mm_loadu_si128((__m128i *)s);
4247 __m128i any_slash = _mm_cmpeq_epi8(in, slash);
4248 uint32_t res = _mm_movemask_epi8(any_slash);
4249
4250 if (res) {
4251 int i, n = zend_ulong_ntz(res);
4252 const char *e = s + 15;
4253 l -= n;
4254 for (i = 0; i < n; i++) {
4255 *t++ = *s++;
4256 }
4257 for (; s < e; s++) {
4258 if (*s == '\\') {
4259 s++;
4260 l--;
4261 if (*s == '0') {
4262 *t = '\0';
4263 } else {
4264 *t = *s;
4265 }
4266 } else {
4267 *t = *s;
4268 }
4269 t++;
4270 l--;
4271 }
4272 } else {
4273 _mm_storeu_si128((__m128i *)t, in);
4274 s += 16;
4275 t += 16;
4276 l -= 16;
4277 }
4278 } while (l > 15);
4279 }
4280
4281 t = php_stripslashes_impl(s, t, l);
4282 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4283 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4284 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4285 }
4286 }
4287 #else
4288 PHPAPI void php_stripslashes(zend_string *str)
4289 {
4290 const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
4291 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4292 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4293 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4294 }
4295 }
4296 #endif
4297 /* }}} */
4298
4299 #define _HEB_BLOCK_TYPE_ENG 1
4300 #define _HEB_BLOCK_TYPE_HEB 2
4301 #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
4302 #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
4303 #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
4304
4305 /* {{{ php_str_replace_in_subject */
4306 static zend_long php_str_replace_in_subject(
4307 zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
4308 zend_string *subject_str, zval *result, bool case_sensitivity
4309 ) {
4310 zval *search_entry;
4311 zend_string *tmp_result;
4312 char *replace_value = NULL;
4313 size_t replace_len = 0;
4314 zend_long replace_count = 0;
4315 zend_string *lc_subject_str = NULL;
4316 uint32_t replace_idx;
4317
4318 if (ZSTR_LEN(subject_str) == 0) {
4319 ZVAL_EMPTY_STRING(result);
4320 return 0;
4321 }
4322
4323 /* If search is an array */
4324 if (search_ht) {
4325 /* Duplicate subject string for repeated replacement */
4326 zend_string_addref(subject_str);
4327
4328 if (replace_ht) {
4329 replace_idx = 0;
4330 } else {
4331 /* Set replacement value to the passed one */
4332 replace_value = ZSTR_VAL(replace_str);
4333 replace_len = ZSTR_LEN(replace_str);
4334 }
4335
4336 /* For each entry in the search array, get the entry */
4337 ZEND_HASH_FOREACH_VAL(search_ht, search_entry) {
4338 /* Make sure we're dealing with strings. */
4339 zend_string *tmp_search_str;
4340 zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
4341 zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
4342
4343 /* If replace is an array. */
4344 if (replace_ht) {
4345 /* Get current entry */
4346 zval *replace_entry = NULL;
4347 if (HT_IS_PACKED(replace_ht)) {
4348 while (replace_idx < replace_ht->nNumUsed) {
4349 replace_entry = &replace_ht->arPacked[replace_idx];
4350 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4351 break;
4352 }
4353 replace_idx++;
4354 }
4355 } else {
4356 while (replace_idx < replace_ht->nNumUsed) {
4357 replace_entry = &replace_ht->arData[replace_idx].val;
4358 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4359 break;
4360 }
4361 replace_idx++;
4362 }
4363 }
4364 if (replace_idx < replace_ht->nNumUsed) {
4365 /* Make sure we're dealing with strings. */
4366 replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
4367
4368 /* Set replacement value to the one we got from array */
4369 replace_value = ZSTR_VAL(replace_entry_str);
4370 replace_len = ZSTR_LEN(replace_entry_str);
4371
4372 replace_idx++;
4373 } else {
4374 /* We've run out of replacement strings, so use an empty one. */
4375 replace_value = "";
4376 replace_len = 0;
4377 }
4378 }
4379
4380 if (ZSTR_LEN(search_str) == 1) {
4381 zend_long old_replace_count = replace_count;
4382
4383 tmp_result = php_char_to_str_ex(subject_str,
4384 ZSTR_VAL(search_str)[0],
4385 replace_value,
4386 replace_len,
4387 case_sensitivity,
4388 &replace_count);
4389 if (lc_subject_str && replace_count != old_replace_count) {
4390 zend_string_release_ex(lc_subject_str, 0);
4391 lc_subject_str = NULL;
4392 }
4393 } else if (ZSTR_LEN(search_str) > 1) {
4394 if (case_sensitivity) {
4395 tmp_result = php_str_to_str_ex(subject_str,
4396 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4397 replace_value, replace_len, &replace_count);
4398 } else {
4399 zend_long old_replace_count = replace_count;
4400
4401 if (!lc_subject_str) {
4402 lc_subject_str = zend_string_tolower(subject_str);
4403 }
4404 tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4405 search_str, replace_value, replace_len, &replace_count);
4406 if (replace_count != old_replace_count) {
4407 zend_string_release_ex(lc_subject_str, 0);
4408 lc_subject_str = NULL;
4409 }
4410 }
4411 } else {
4412 zend_tmp_string_release(tmp_search_str);
4413 zend_tmp_string_release(tmp_replace_entry_str);
4414 continue;
4415 }
4416
4417 zend_tmp_string_release(tmp_search_str);
4418 zend_tmp_string_release(tmp_replace_entry_str);
4419
4420 if (subject_str == tmp_result) {
4421 zend_string_delref(subject_str);
4422 } else {
4423 zend_string_release_ex(subject_str, 0);
4424 subject_str = tmp_result;
4425 if (ZSTR_LEN(subject_str) == 0) {
4426 zend_string_release_ex(subject_str, 0);
4427 ZVAL_EMPTY_STRING(result);
4428 if (lc_subject_str) {
4429 zend_string_release_ex(lc_subject_str, 0);
4430 }
4431 return replace_count;
4432 }
4433 }
4434 } ZEND_HASH_FOREACH_END();
4435 ZVAL_STR(result, subject_str);
4436 if (lc_subject_str) {
4437 zend_string_release_ex(lc_subject_str, 0);
4438 }
4439 } else {
4440 ZEND_ASSERT(search_str);
4441 if (ZSTR_LEN(search_str) == 1) {
4442 ZVAL_STR(result,
4443 php_char_to_str_ex(subject_str,
4444 ZSTR_VAL(search_str)[0],
4445 ZSTR_VAL(replace_str),
4446 ZSTR_LEN(replace_str),
4447 case_sensitivity,
4448 &replace_count));
4449 } else if (ZSTR_LEN(search_str) > 1) {
4450 if (case_sensitivity) {
4451 ZVAL_STR(result, php_str_to_str_ex(subject_str,
4452 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4453 ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4454 } else {
4455 lc_subject_str = zend_string_tolower(subject_str);
4456 ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4457 search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4458 zend_string_release_ex(lc_subject_str, 0);
4459 }
4460 } else {
4461 ZVAL_STR_COPY(result, subject_str);
4462 }
4463 }
4464 return replace_count;
4465 }
4466 /* }}} */
4467
4468 static void _php_str_replace_common(
4469 zval *return_value,
4470 HashTable *search_ht, zend_string *search_str,
4471 HashTable *replace_ht, zend_string *replace_str,
4472 HashTable *subject_ht, zend_string *subject_str,
4473 zval *zcount,
4474 bool case_sensitivity
4475 ) {
4476 zval *subject_entry;
4477 zval result;
4478 zend_string *string_key;
4479 zend_ulong num_key;
4480 zend_long count = 0;
4481
4482 /* Make sure we're dealing with strings and do the replacement. */
4483 if (search_str && replace_ht) {
4484 zend_argument_type_error(2, "must be of type string when argument #1 ($search) is a string");
4485 RETURN_THROWS();
4486 }
4487
4488 /* if subject is an array */
4489 if (subject_ht) {
4490 array_init(return_value);
4491
4492 /* For each subject entry, convert it to string, then perform replacement
4493 and add the result to the return_value array. */
4494 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
4495 zend_string *tmp_subject_str;
4496 ZVAL_DEREF(subject_entry);
4497 subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
4498 count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
4499 zend_tmp_string_release(tmp_subject_str);
4500
4501 /* Add to return array */
4502 if (string_key) {
4503 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
4504 } else {
4505 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
4506 }
4507 } ZEND_HASH_FOREACH_END();
4508 } else { /* if subject is not an array */
4509 count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
4510 }
4511 if (zcount) {
4512 ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
4513 }
4514 }
4515
4516 /* {{{ php_str_replace_common */
4517 static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool case_sensitivity)
4518 {
4519 zend_string *search_str;
4520 HashTable *search_ht;
4521 zend_string *replace_str;
4522 HashTable *replace_ht;
4523 zend_string *subject_str;
4524 HashTable *subject_ht;
4525 zval *zcount = NULL;
4526
4527 ZEND_PARSE_PARAMETERS_START(3, 4)
4528 Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
4529 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
4530 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
4531 Z_PARAM_OPTIONAL
4532 Z_PARAM_ZVAL(zcount)
4533 ZEND_PARSE_PARAMETERS_END();
4534
4535 _php_str_replace_common(return_value, search_ht, search_str, replace_ht, replace_str, subject_ht, subject_str, zcount, case_sensitivity);
4536 }
4537 /* }}} */
4538
4539 /* {{{ Replaces all occurrences of search in haystack with replace */
4540 PHP_FUNCTION(str_replace)
4541 {
4542 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4543 }
4544 /* }}} */
4545
4546 ZEND_FRAMELESS_FUNCTION(str_replace, 3)
4547 {
4548 zend_string *search_str, *replace_str, *subject_str;
4549 HashTable *search_ht, *replace_ht, *subject_ht;
4550 zval search_tmp, replace_tmp, subject_tmp;
4551
4552 Z_FLF_PARAM_ARRAY_HT_OR_STR(1, search_ht, search_str, search_tmp);
4553 Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
4554 Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
4555
4556 _php_str_replace_common(return_value, search_ht, search_str, replace_ht, replace_str, subject_ht, subject_str, /* zcount */ NULL, /* case_sensitivity */ true);
4557
4558 flf_clean:;
4559 Z_FLF_PARAM_FREE_STR(1, search_tmp);
4560 Z_FLF_PARAM_FREE_STR(2, replace_tmp);
4561 Z_FLF_PARAM_FREE_STR(3, subject_tmp);
4562 }
4563
4564 /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
4565 PHP_FUNCTION(str_ireplace)
4566 {
4567 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4568 }
4569 /* }}} */
4570
4571 /* {{{ Converts logical Hebrew text to visual text */
4572 PHP_FUNCTION(hebrev)
4573 {
4574 char *str, *heb_str, *target;
4575 const char *tmp;
4576 size_t block_start, block_end, block_type, i;
4577 zend_long max_chars=0, char_count;
4578 size_t begin, end, orig_begin;
4579 size_t str_len;
4580 zend_string *broken_str;
4581
4582 ZEND_PARSE_PARAMETERS_START(1, 2)
4583 Z_PARAM_STRING(str, str_len)
4584 Z_PARAM_OPTIONAL
4585 Z_PARAM_LONG(max_chars)
4586 ZEND_PARSE_PARAMETERS_END();
4587
4588 if (str_len == 0) {
4589 RETURN_EMPTY_STRING();
4590 }
4591
4592 tmp = str;
4593 block_start=block_end=0;
4594
4595 heb_str = (char *) emalloc(str_len+1);
4596 target = heb_str+str_len;
4597 *target = 0;
4598 target--;
4599
4600 if (isheb(*tmp)) {
4601 block_type = _HEB_BLOCK_TYPE_HEB;
4602 } else {
4603 block_type = _HEB_BLOCK_TYPE_ENG;
4604 }
4605
4606 do {
4607 if (block_type == _HEB_BLOCK_TYPE_HEB) {
4608 while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
4609 tmp++;
4610 block_end++;
4611 }
4612 for (i = block_start+1; i<= block_end+1; i++) {
4613 *target = str[i-1];
4614 switch (*target) {
4615 case '(':
4616 *target = ')';
4617 break;
4618 case ')':
4619 *target = '(';
4620 break;
4621 case '[':
4622 *target = ']';
4623 break;
4624 case ']':
4625 *target = '[';
4626 break;
4627 case '{':
4628 *target = '}';
4629 break;
4630 case '}':
4631 *target = '{';
4632 break;
4633 case '<':
4634 *target = '>';
4635 break;
4636 case '>':
4637 *target = '<';
4638 break;
4639 case '\\':
4640 *target = '/';
4641 break;
4642 case '/':
4643 *target = '\\';
4644 break;
4645 default:
4646 break;
4647 }
4648 target--;
4649 }
4650 block_type = _HEB_BLOCK_TYPE_ENG;
4651 } else {
4652 while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
4653 tmp++;
4654 block_end++;
4655 }
4656 while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
4657 tmp--;
4658 block_end--;
4659 }
4660 for (i = block_end+1; i >= block_start+1; i--) {
4661 *target = str[i-1];
4662 target--;
4663 }
4664 block_type = _HEB_BLOCK_TYPE_HEB;
4665 }
4666 block_start=block_end+1;
4667 } while (block_end < str_len-1);
4668
4669
4670 broken_str = zend_string_alloc(str_len, 0);
4671 begin = end = str_len-1;
4672 target = ZSTR_VAL(broken_str);
4673
4674 while (1) {
4675 char_count=0;
4676 while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
4677 char_count++;
4678 begin--;
4679 if (_isnewline(heb_str[begin])) {
4680 while (begin > 0 && _isnewline(heb_str[begin-1])) {
4681 begin--;
4682 char_count++;
4683 }
4684 break;
4685 }
4686 }
4687 if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
4688 size_t new_char_count=char_count, new_begin=begin;
4689
4690 while (new_char_count > 0) {
4691 if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
4692 break;
4693 }
4694 new_begin++;
4695 new_char_count--;
4696 }
4697 if (new_char_count > 0) {
4698 begin=new_begin;
4699 }
4700 }
4701 orig_begin=begin;
4702
4703 if (_isblank(heb_str[begin])) {
4704 heb_str[begin]='\n';
4705 }
4706 while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
4707 begin++;
4708 }
4709 for (i = begin; i <= end; i++) { /* copy content */
4710 *target = heb_str[i];
4711 target++;
4712 }
4713 for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
4714 *target = heb_str[i];
4715 target++;
4716 }
4717 begin=orig_begin;
4718
4719 if (begin == 0) {
4720 *target = 0;
4721 break;
4722 }
4723 begin--;
4724 end=begin;
4725 }
4726 efree(heb_str);
4727
4728 RETURN_NEW_STR(broken_str);
4729 }
4730 /* }}} */
4731
4732 /* {{{ Converts newlines to HTML line breaks */
4733 PHP_FUNCTION(nl2br)
4734 {
4735 /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
4736 const char *tmp, *end;
4737 zend_string *str;
4738 char *target;
4739 size_t repl_cnt = 0;
4740 bool is_xhtml = 1;
4741 zend_string *result;
4742
4743 ZEND_PARSE_PARAMETERS_START(1, 2)
4744 Z_PARAM_STR(str)
4745 Z_PARAM_OPTIONAL
4746 Z_PARAM_BOOL(is_xhtml)
4747 ZEND_PARSE_PARAMETERS_END();
4748
4749 tmp = ZSTR_VAL(str);
4750 end = ZSTR_VAL(str) + ZSTR_LEN(str);
4751
4752 /* it is really faster to scan twice and allocate mem once instead of scanning once
4753 and constantly reallocing */
4754 while (tmp < end) {
4755 if (*tmp == '\r') {
4756 if (*(tmp+1) == '\n') {
4757 tmp++;
4758 }
4759 repl_cnt++;
4760 } else if (*tmp == '\n') {
4761 if (*(tmp+1) == '\r') {
4762 tmp++;
4763 }
4764 repl_cnt++;
4765 }
4766
4767 tmp++;
4768 }
4769
4770 if (repl_cnt == 0) {
4771 RETURN_STR_COPY(str);
4772 }
4773
4774 {
4775 size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
4776
4777 result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
4778 target = ZSTR_VAL(result);
4779 }
4780
4781 tmp = ZSTR_VAL(str);
4782 while (tmp < end) {
4783 switch (*tmp) {
4784 case '\r':
4785 case '\n':
4786 *target++ = '<';
4787 *target++ = 'b';
4788 *target++ = 'r';
4789
4790 if (is_xhtml) {
4791 *target++ = ' ';
4792 *target++ = '/';
4793 }
4794
4795 *target++ = '>';
4796
4797 if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
4798 *target++ = *tmp++;
4799 }
4800 ZEND_FALLTHROUGH;
4801 default:
4802 *target++ = *tmp;
4803 }
4804
4805 tmp++;
4806 }
4807
4808 *target = '\0';
4809
4810 RETURN_NEW_STR(result);
4811 }
4812 /* }}} */
4813
4814 /* {{{ Strips HTML and PHP tags from a string */
4815 PHP_FUNCTION(strip_tags)
4816 {
4817 zend_string *buf;
4818 zend_string *str;
4819 zend_string *allow_str = NULL;
4820 HashTable *allow_ht = NULL;
4821 const char *allowed_tags=NULL;
4822 size_t allowed_tags_len=0;
4823 smart_str tags_ss = {0};
4824
4825 ZEND_PARSE_PARAMETERS_START(1, 2)
4826 Z_PARAM_STR(str)
4827 Z_PARAM_OPTIONAL
4828 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
4829 ZEND_PARSE_PARAMETERS_END();
4830
4831 if (allow_ht) {
4832 zval *tmp;
4833 zend_string *tag;
4834
4835 ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
4836 tag = zval_get_string(tmp);
4837 smart_str_appendc(&tags_ss, '<');
4838 smart_str_append(&tags_ss, tag);
4839 smart_str_appendc(&tags_ss, '>');
4840 zend_string_release(tag);
4841 } ZEND_HASH_FOREACH_END();
4842 if (tags_ss.s) {
4843 smart_str_0(&tags_ss);
4844 allowed_tags = ZSTR_VAL(tags_ss.s);
4845 allowed_tags_len = ZSTR_LEN(tags_ss.s);
4846 }
4847 } else if (allow_str) {
4848 allowed_tags = ZSTR_VAL(allow_str);
4849 allowed_tags_len = ZSTR_LEN(allow_str);
4850 }
4851
4852 buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
4853 ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
4854 smart_str_free(&tags_ss);
4855 RETURN_NEW_STR(buf);
4856 }
4857 /* }}} */
4858
4859 static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
4860 const char *retval;
4861
4862 if (zend_string_equals_literal(loc, "0")) {
4863 loc = NULL;
4864 } else {
4865 if (ZSTR_LEN(loc) >= 255) {
4866 php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
4867 return NULL;
4868 }
4869 }
4870
4871 # ifndef PHP_WIN32
4872 retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
4873 # else
4874 if (loc) {
4875 /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
4876 char *locp = ZSTR_VAL(loc);
4877 if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
4878 && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
4879 && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
4880 && (locp[5] == '\0' || locp[5] == '.')
4881 && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
4882 && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
4883 && locp[5] == '\0')
4884 ) {
4885 retval = NULL;
4886 } else {
4887 retval = setlocale(cat, ZSTR_VAL(loc));
4888 }
4889 } else {
4890 retval = setlocale(cat, NULL);
4891 }
4892 # endif
4893 if (!retval) {
4894 return NULL;
4895 }
4896
4897 if (loc) {
4898 /* Remember if locale was changed */
4899 size_t len = strlen(retval);
4900
4901 BG(locale_changed) = 1;
4902 if (cat == LC_CTYPE || cat == LC_ALL) {
4903 zend_update_current_locale();
4904 if (BG(ctype_string)) {
4905 zend_string_release_ex(BG(ctype_string), 0);
4906 }
4907 if (len == 1 && *retval == 'C') {
4908 /* C locale is represented as NULL. */
4909 BG(ctype_string) = NULL;
4910 return ZSTR_CHAR('C');
4911 } else if (zend_string_equals_cstr(loc, retval, len)) {
4912 BG(ctype_string) = zend_string_copy(loc);
4913 return zend_string_copy(BG(ctype_string));
4914 } else {
4915 BG(ctype_string) = zend_string_init(retval, len, 0);
4916 return zend_string_copy(BG(ctype_string));
4917 }
4918 } else if (zend_string_equals_cstr(loc, retval, len)) {
4919 return zend_string_copy(loc);
4920 }
4921 }
4922 return zend_string_init(retval, strlen(retval), 0);
4923 }
4924
4925 static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
4926 zend_string *tmp_loc_str;
4927 zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
4928 if (UNEXPECTED(loc_str == NULL)) {
4929 return NULL;
4930 }
4931 zend_string *result = try_setlocale_str(cat, loc_str);
4932 zend_tmp_string_release(tmp_loc_str);
4933 return result;
4934 }
4935
4936 /* {{{ Set locale information */
4937 PHP_FUNCTION(setlocale)
4938 {
4939 zend_long cat;
4940 zval *args = NULL;
4941 int num_args;
4942
4943 ZEND_PARSE_PARAMETERS_START(2, -1)
4944 Z_PARAM_LONG(cat)
4945 Z_PARAM_VARIADIC('+', args, num_args)
4946 ZEND_PARSE_PARAMETERS_END();
4947
4948 for (uint32_t i = 0; i < num_args; i++) {
4949 if (Z_TYPE(args[i]) == IS_ARRAY) {
4950 zval *elem;
4951 ZEND_HASH_FOREACH_VAL(Z_ARRVAL(args[i]), elem) {
4952 zend_string *result = try_setlocale_zval(cat, elem);
4953 if (EG(exception)) {
4954 RETURN_THROWS();
4955 }
4956 if (result) {
4957 RETURN_STR(result);
4958 }
4959 } ZEND_HASH_FOREACH_END();
4960 } else {
4961 zend_string *result = try_setlocale_zval(cat, &args[i]);
4962 if (EG(exception)) {
4963 RETURN_THROWS();
4964 }
4965 if (result) {
4966 RETURN_STR(result);
4967 }
4968 }
4969 }
4970
4971 RETURN_FALSE;
4972 }
4973 /* }}} */
4974
4975 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
4976 PHP_FUNCTION(parse_str)
4977 {
4978 char *arg;
4979 zval *arrayArg = NULL;
4980 char *res = NULL;
4981 size_t arglen;
4982
4983 ZEND_PARSE_PARAMETERS_START(2, 2)
4984 Z_PARAM_STRING(arg, arglen)
4985 Z_PARAM_ZVAL(arrayArg)
4986 ZEND_PARSE_PARAMETERS_END();
4987
4988 arrayArg = zend_try_array_init(arrayArg);
4989 if (!arrayArg) {
4990 RETURN_THROWS();
4991 }
4992
4993 res = estrndup(arg, arglen);
4994 sapi_module.treat_data(PARSE_STRING, res, arrayArg);
4995 }
4996 /* }}} */
4997
4998 #define PHP_TAG_BUF_SIZE 1023
4999
5000 /* {{{ php_tag_find
5001 *
5002 * Check if tag is in a set of tags
5003 *
5004 * states:
5005 *
5006 * 0 start tag
5007 * 1 first non-whitespace char seen
5008 */
5009 static bool php_tag_find(char *tag, size_t len, const char *set) {
5010 char c, *n;
5011 const char *t;
5012 int state = 0;
5013 bool done = 0;
5014 char *norm;
5015
5016 if (len == 0) {
5017 return 0;
5018 }
5019
5020 norm = emalloc(len+1);
5021
5022 n = norm;
5023 t = tag;
5024 c = zend_tolower_ascii(*t);
5025 /*
5026 normalize the tag removing leading and trailing whitespace
5027 and turn any <a whatever...> into just <a> and any </tag>
5028 into <tag>
5029 */
5030 while (!done) {
5031 switch (c) {
5032 case '<':
5033 *(n++) = c;
5034 break;
5035 case '>':
5036 done =1;
5037 break;
5038 default:
5039 if (!isspace((int)c)) {
5040 if (state == 0) {
5041 state=1;
5042 }
5043 if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
5044 *(n++) = c;
5045 }
5046 } else {
5047 if (state == 1)
5048 done=1;
5049 }
5050 break;
5051 }
5052 c = zend_tolower_ascii(*(++t));
5053 }
5054 *(n++) = '>';
5055 *n = '\0';
5056 if (strstr(set, norm)) {
5057 done=1;
5058 } else {
5059 done=0;
5060 }
5061 efree(norm);
5062 return done;
5063 }
5064 /* }}} */
5065
5066 PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
5067 {
5068 return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
5069 }
5070 /* }}} */
5071
5072 /* {{{ php_strip_tags
5073
5074 A simple little state-machine to strip out html and php tags
5075
5076 State 0 is the output state, State 1 means we are inside a
5077 normal html tag and state 2 means we are inside a php tag.
5078
5079 The state variable is passed in to allow a function like fgetss
5080 to maintain state across calls to the function.
5081
5082 lc holds the last significant character read and br is a bracket
5083 counter.
5084
5085 When an allow string is passed in we keep track of the string
5086 in state 1 and when the tag is closed check it against the
5087 allow string to see if we should allow it.
5088
5089 swm: Added ability to strip <?xml tags without assuming it PHP
5090 code.
5091 */
5092 PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, bool allow_tag_spaces)
5093 {
5094 char *tbuf, *tp, *rp, c, lc;
5095 const char *buf, *p, *end;
5096 int br, depth=0, in_q = 0;
5097 uint8_t state = 0;
5098 size_t pos;
5099 char *allow_free = NULL;
5100 char is_xml = 0;
5101
5102 buf = estrndup(rbuf, len);
5103 end = buf + len;
5104 lc = '\0';
5105 p = buf;
5106 rp = rbuf;
5107 br = 0;
5108 if (allow) {
5109 allow_free = zend_str_tolower_dup_ex(allow, allow_len);
5110 allow = allow_free ? allow_free : allow;
5111 tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
5112 tp = tbuf;
5113 } else {
5114 tbuf = tp = NULL;
5115 }
5116
5117 state_0:
5118 if (p >= end) {
5119 goto finish;
5120 }
5121 c = *p;
5122 switch (c) {
5123 case '\0':
5124 break;
5125 case '<':
5126 if (in_q) {
5127 break;
5128 }
5129 if (isspace(*(p + 1)) && !allow_tag_spaces) {
5130 *(rp++) = c;
5131 break;
5132 }
5133 lc = '<';
5134 state = 1;
5135 if (allow) {
5136 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5137 pos = tp - tbuf;
5138 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5139 tp = tbuf + pos;
5140 }
5141 *(tp++) = '<';
5142 }
5143 p++;
5144 goto state_1;
5145 case '>':
5146 if (depth) {
5147 depth--;
5148 break;
5149 }
5150
5151 if (in_q) {
5152 break;
5153 }
5154
5155 *(rp++) = c;
5156 break;
5157 default:
5158 *(rp++) = c;
5159 break;
5160 }
5161 p++;
5162 goto state_0;
5163
5164 state_1:
5165 if (p >= end) {
5166 goto finish;
5167 }
5168 c = *p;
5169 switch (c) {
5170 case '\0':
5171 break;
5172 case '<':
5173 if (in_q) {
5174 break;
5175 }
5176 if (isspace(*(p + 1)) && !allow_tag_spaces) {
5177 goto reg_char_1;
5178 }
5179 depth++;
5180 break;
5181 case '>':
5182 if (depth) {
5183 depth--;
5184 break;
5185 }
5186 if (in_q) {
5187 break;
5188 }
5189
5190 lc = '>';
5191 if (is_xml && p >= buf + 1 && *(p -1) == '-') {
5192 break;
5193 }
5194 in_q = state = is_xml = 0;
5195 if (allow) {
5196 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5197 pos = tp - tbuf;
5198 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5199 tp = tbuf + pos;
5200 }
5201 *(tp++) = '>';
5202 *tp='\0';
5203 if (php_tag_find(tbuf, tp-tbuf, allow)) {
5204 rp = zend_mempcpy(rp, tbuf, tp - tbuf);
5205 }
5206 tp = tbuf;
5207 }
5208 p++;
5209 goto state_0;
5210 case '"':
5211 case '\'':
5212 if (p != buf && (!in_q || *p == in_q)) {
5213 if (in_q) {
5214 in_q = 0;
5215 } else {
5216 in_q = *p;
5217 }
5218 }
5219 goto reg_char_1;
5220 case '!':
5221 /* JavaScript & Other HTML scripting languages */
5222 if (p >= buf + 1 && *(p-1) == '<') {
5223 state = 3;
5224 lc = c;
5225 p++;
5226 goto state_3;
5227 } else {
5228 goto reg_char_1;
5229 }
5230 break;
5231 case '?':
5232 if (p >= buf + 1 && *(p-1) == '<') {
5233 br=0;
5234 state = 2;
5235 p++;
5236 goto state_2;
5237 } else {
5238 goto reg_char_1;
5239 }
5240 break;
5241 default:
5242 reg_char_1:
5243 if (allow) {
5244 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5245 pos = tp - tbuf;
5246 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5247 tp = tbuf + pos;
5248 }
5249 *(tp++) = c;
5250 }
5251 break;
5252 }
5253 p++;
5254 goto state_1;
5255
5256 state_2:
5257 if (p >= end) {
5258 goto finish;
5259 }
5260 c = *p;
5261 switch (c) {
5262 case '(':
5263 if (lc != '"' && lc != '\'') {
5264 lc = '(';
5265 br++;
5266 }
5267 break;
5268 case ')':
5269 if (lc != '"' && lc != '\'') {
5270 lc = ')';
5271 br--;
5272 }
5273 break;
5274 case '>':
5275 if (depth) {
5276 depth--;
5277 break;
5278 }
5279 if (in_q) {
5280 break;
5281 }
5282
5283 if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
5284 in_q = state = 0;
5285 tp = tbuf;
5286 p++;
5287 goto state_0;
5288 }
5289 break;
5290 case '"':
5291 case '\'':
5292 if (p >= buf + 1 && *(p-1) != '\\') {
5293 if (lc == c) {
5294 lc = '\0';
5295 } else if (lc != '\\') {
5296 lc = c;
5297 }
5298 if (p != buf && (!in_q || *p == in_q)) {
5299 if (in_q) {
5300 in_q = 0;
5301 } else {
5302 in_q = *p;
5303 }
5304 }
5305 }
5306 break;
5307 case 'l':
5308 case 'L':
5309 /* swm: If we encounter '<?xml' then we shouldn't be in
5310 * state == 2 (PHP). Switch back to HTML.
5311 */
5312 if (state == 2 && p > buf+4
5313 && (*(p-1) == 'm' || *(p-1) == 'M')
5314 && (*(p-2) == 'x' || *(p-2) == 'X')
5315 && *(p-3) == '?'
5316 && *(p-4) == '<') {
5317 state = 1; is_xml=1;
5318 p++;
5319 goto state_1;
5320 }
5321 break;
5322 default:
5323 break;
5324 }
5325 p++;
5326 goto state_2;
5327
5328 state_3:
5329 if (p >= end) {
5330 goto finish;
5331 }
5332 c = *p;
5333 switch (c) {
5334 case '>':
5335 if (depth) {
5336 depth--;
5337 break;
5338 }
5339 if (in_q) {
5340 break;
5341 }
5342 in_q = state = 0;
5343 tp = tbuf;
5344 p++;
5345 goto state_0;
5346 case '"':
5347 case '\'':
5348 if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
5349 if (in_q) {
5350 in_q = 0;
5351 } else {
5352 in_q = *p;
5353 }
5354 }
5355 break;
5356 case '-':
5357 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
5358 state = 4;
5359 p++;
5360 goto state_4;
5361 }
5362 break;
5363 case 'E':
5364 case 'e':
5365 /* !DOCTYPE exception */
5366 if (p > buf+6
5367 && (*(p-1) == 'p' || *(p-1) == 'P')
5368 && (*(p-2) == 'y' || *(p-2) == 'Y')
5369 && (*(p-3) == 't' || *(p-3) == 'T')
5370 && (*(p-4) == 'c' || *(p-4) == 'C')
5371 && (*(p-5) == 'o' || *(p-5) == 'O')
5372 && (*(p-6) == 'd' || *(p-6) == 'D')) {
5373 state = 1;
5374 p++;
5375 goto state_1;
5376 }
5377 break;
5378 default:
5379 break;
5380 }
5381 p++;
5382 goto state_3;
5383
5384 state_4:
5385 while (p < end) {
5386 c = *p;
5387 if (c == '>' && !in_q) {
5388 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
5389 in_q = state = 0;
5390 tp = tbuf;
5391 p++;
5392 goto state_0;
5393 }
5394 }
5395 p++;
5396 }
5397
5398 finish:
5399 if (rp < rbuf + len) {
5400 *rp = '\0';
5401 }
5402 efree((void *)buf);
5403 if (tbuf) {
5404 efree(tbuf);
5405 }
5406 if (allow_free) {
5407 efree(allow_free);
5408 }
5409
5410 return (size_t)(rp - rbuf);
5411 }
5412 /* }}} */
5413
5414 /* {{{ Parse a CSV string into an array */
5415 PHP_FUNCTION(str_getcsv)
5416 {
5417 zend_string *str;
5418 char delimiter = ',', enclosure = '"';
5419 char *delimiter_str = NULL, *enclosure_str = NULL;
5420 size_t delimiter_str_len = 0, enclosure_str_len = 0;
5421 zend_string *escape_str = NULL;
5422
5423 ZEND_PARSE_PARAMETERS_START(1, 4)
5424 Z_PARAM_STR(str)
5425 Z_PARAM_OPTIONAL
5426 Z_PARAM_STRING(delimiter_str, delimiter_str_len)
5427 Z_PARAM_STRING(enclosure_str, enclosure_str_len)
5428 Z_PARAM_STR(escape_str)
5429 ZEND_PARSE_PARAMETERS_END();
5430
5431 if (delimiter_str != NULL) {
5432 /* Make sure that there is at least one character in string */
5433 if (delimiter_str_len != 1) {
5434 zend_argument_value_error(2, "must be a single character");
5435 RETURN_THROWS();
5436 }
5437 /* use first character from string */
5438 delimiter = delimiter_str[0];
5439 }
5440 if (enclosure_str != NULL) {
5441 if (enclosure_str_len != 1) {
5442 zend_argument_value_error(3, "must be a single character");
5443 RETURN_THROWS();
5444 }
5445 /* use first character from string */
5446 enclosure = enclosure_str[0];
5447 }
5448
5449 int escape_char = php_csv_handle_escape_argument(escape_str, 4);
5450 if (escape_char == PHP_CSV_ESCAPE_ERROR) {
5451 RETURN_THROWS();
5452 }
5453
5454 HashTable *values = php_fgetcsv(NULL, delimiter, enclosure, escape_char, ZSTR_LEN(str), ZSTR_VAL(str));
5455 if (values == NULL) {
5456 values = php_bc_fgetcsv_empty_line();
5457 }
5458 RETURN_ARR(values);
5459 }
5460 /* }}} */
5461
5462 /* {{{ Returns the input string repeat mult times */
5463 PHP_FUNCTION(str_repeat)
5464 {
5465 zend_string *input_str; /* Input string */
5466 zend_long mult; /* Multiplier */
5467 zend_string *result; /* Resulting string */
5468 size_t result_len; /* Length of the resulting string */
5469
5470 ZEND_PARSE_PARAMETERS_START(2, 2)
5471 Z_PARAM_STR(input_str)
5472 Z_PARAM_LONG(mult)
5473 ZEND_PARSE_PARAMETERS_END();
5474
5475 if (mult < 0) {
5476 zend_argument_value_error(2, "must be greater than or equal to 0");
5477 RETURN_THROWS();
5478 }
5479
5480 /* Don't waste our time if it's empty */
5481 /* ... or if the multiplier is zero */
5482 if (ZSTR_LEN(input_str) == 0 || mult == 0)
5483 RETURN_EMPTY_STRING();
5484
5485 /* Initialize the result string */
5486 result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
5487 result_len = ZSTR_LEN(input_str) * mult;
5488 ZSTR_COPY_CONCAT_PROPERTIES(result, input_str);
5489
5490 /* Heavy optimization for situations where input string is 1 byte long */
5491 if (ZSTR_LEN(input_str) == 1) {
5492 memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
5493 } else {
5494 const char *s, *ee;
5495 char *e;
5496 ptrdiff_t l=0;
5497 memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
5498 s = ZSTR_VAL(result);
5499 e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
5500 ee = ZSTR_VAL(result) + result_len;
5501
5502 while (e<ee) {
5503 l = (e-s) < (ee-e) ? (e-s) : (ee-e);
5504 memmove(e, s, l);
5505 e += l;
5506 }
5507 }
5508
5509 ZSTR_VAL(result)[result_len] = '\0';
5510
5511 RETURN_NEW_STR(result);
5512 }
5513 /* }}} */
5514
5515 /* {{{ Returns info about what characters are used in input */
5516 PHP_FUNCTION(count_chars)
5517 {
5518 zend_string *input;
5519 int chars[256];
5520 zend_long mymode=0;
5521 const unsigned char *buf;
5522 int inx;
5523 char retstr[256];
5524 size_t retlen=0;
5525 size_t tmp = 0;
5526
5527 ZEND_PARSE_PARAMETERS_START(1, 2)
5528 Z_PARAM_STR(input)
5529 Z_PARAM_OPTIONAL
5530 Z_PARAM_LONG(mymode)
5531 ZEND_PARSE_PARAMETERS_END();
5532
5533 if (mymode < 0 || mymode > 4) {
5534 zend_argument_value_error(2, "must be between 0 and 4 (inclusive)");
5535 RETURN_THROWS();
5536 }
5537
5538 buf = (const unsigned char *) ZSTR_VAL(input);
5539 memset((void*) chars, 0, sizeof(chars));
5540
5541 while (tmp < ZSTR_LEN(input)) {
5542 chars[*buf]++;
5543 buf++;
5544 tmp++;
5545 }
5546
5547 if (mymode < 3) {
5548 array_init(return_value);
5549 }
5550
5551 for (inx = 0; inx < 256; inx++) {
5552 switch (mymode) {
5553 case 0:
5554 add_index_long(return_value, inx, chars[inx]);
5555 break;
5556 case 1:
5557 if (chars[inx] != 0) {
5558 add_index_long(return_value, inx, chars[inx]);
5559 }
5560 break;
5561 case 2:
5562 if (chars[inx] == 0) {
5563 add_index_long(return_value, inx, chars[inx]);
5564 }
5565 break;
5566 case 3:
5567 if (chars[inx] != 0) {
5568 retstr[retlen++] = inx;
5569 }
5570 break;
5571 case 4:
5572 if (chars[inx] == 0) {
5573 retstr[retlen++] = inx;
5574 }
5575 break;
5576 }
5577 }
5578
5579 if (mymode == 3 || mymode == 4) {
5580 RETURN_STRINGL(retstr, retlen);
5581 }
5582 }
5583 /* }}} */
5584
5585 /* {{{ php_strnatcmp */
5586 static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, bool is_case_insensitive)
5587 {
5588 zend_string *s1, *s2;
5589
5590 ZEND_PARSE_PARAMETERS_START(2, 2)
5591 Z_PARAM_STR(s1)
5592 Z_PARAM_STR(s2)
5593 ZEND_PARSE_PARAMETERS_END();
5594
5595 RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
5596 ZSTR_VAL(s2), ZSTR_LEN(s2),
5597 is_case_insensitive));
5598 }
5599 /* }}} */
5600
5601 /* {{{ Returns the result of string comparison using 'natural' algorithm */
5602 PHP_FUNCTION(strnatcmp)
5603 {
5604 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
5605 }
5606 /* }}} */
5607
5608 /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5609 PHP_FUNCTION(strnatcasecmp)
5610 {
5611 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5612 }
5613 /* }}} */
5614
5615 /* {{{ Returns numeric formatting information based on the current locale */
5616 PHP_FUNCTION(localeconv)
5617 {
5618 zval grouping, mon_grouping;
5619 size_t len, i;
5620
5621 ZEND_PARSE_PARAMETERS_NONE();
5622
5623 array_init(return_value);
5624 array_init(&grouping);
5625 array_init(&mon_grouping);
5626
5627 {
5628 struct lconv currlocdata;
5629
5630 localeconv_r( &currlocdata );
5631
5632 /* Grab the grouping data out of the array */
5633 len = strlen(currlocdata.grouping);
5634
5635 for (i = 0; i < len; i++) {
5636 add_index_long(&grouping, i, currlocdata.grouping[i]);
5637 }
5638
5639 /* Grab the monetary grouping data out of the array */
5640 len = strlen(currlocdata.mon_grouping);
5641
5642 for (i = 0; i < len; i++) {
5643 add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
5644 }
5645
5646 add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
5647 add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
5648 add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
5649 add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
5650 add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
5651 add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
5652 add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
5653 add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
5654 add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
5655 add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
5656 add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
5657 add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
5658 add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
5659 add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
5660 add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
5661 add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
5662 }
5663
5664 zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
5665 zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
5666 }
5667 /* }}} */
5668
5669 /* {{{ Returns the number of times a substring occurs in the string */
5670 PHP_FUNCTION(substr_count)
5671 {
5672 char *haystack, *needle;
5673 zend_long offset = 0, length = 0;
5674 bool length_is_null = 1;
5675 zend_long count;
5676 size_t haystack_len, needle_len;
5677 const char *p, *endp;
5678
5679 ZEND_PARSE_PARAMETERS_START(2, 4)
5680 Z_PARAM_STRING(haystack, haystack_len)
5681 Z_PARAM_STRING(needle, needle_len)
5682 Z_PARAM_OPTIONAL
5683 Z_PARAM_LONG(offset)
5684 Z_PARAM_LONG_OR_NULL(length, length_is_null)
5685 ZEND_PARSE_PARAMETERS_END();
5686
5687 if (needle_len == 0) {
5688 zend_argument_must_not_be_empty_error(2);
5689 RETURN_THROWS();
5690 }
5691
5692 p = haystack;
5693
5694 if (offset) {
5695 if (offset < 0) {
5696 offset += (zend_long)haystack_len;
5697 }
5698 if ((offset < 0) || ((size_t)offset > haystack_len)) {
5699 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5700 RETURN_THROWS();
5701 }
5702 p += offset;
5703 haystack_len -= offset;
5704 }
5705
5706 if (!length_is_null) {
5707 if (length < 0) {
5708 length += haystack_len;
5709 }
5710 if (length < 0 || ((size_t)length > haystack_len)) {
5711 zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
5712 RETURN_THROWS();
5713 }
5714 } else {
5715 length = haystack_len;
5716 }
5717
5718 if (needle_len == 1) {
5719 count = count_chars(p, length, needle[0]);
5720 } else {
5721 count = 0;
5722 endp = p + length;
5723 while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
5724 p += needle_len;
5725 count++;
5726 }
5727 }
5728
5729 RETURN_LONG(count);
5730 }
5731 /* }}} */
5732
5733 /* {{{ Returns input string padded on the left or right to specified length with pad_string */
5734 PHP_FUNCTION(str_pad)
5735 {
5736 /* Input arguments */
5737 zend_string *input; /* Input string */
5738 zend_long pad_length; /* Length to pad to */
5739
5740 /* Helper variables */
5741 size_t num_pad_chars; /* Number of padding characters (total - input size) */
5742 char *pad_str = " "; /* Pointer to padding string */
5743 size_t pad_str_len = 1;
5744 zend_long pad_type_val = PHP_STR_PAD_RIGHT; /* The padding type value */
5745 size_t i, left_pad=0, right_pad=0;
5746 zend_string *result = NULL; /* Resulting string */
5747
5748 ZEND_PARSE_PARAMETERS_START(2, 4)
5749 Z_PARAM_STR(input)
5750 Z_PARAM_LONG(pad_length)
5751 Z_PARAM_OPTIONAL
5752 Z_PARAM_STRING(pad_str, pad_str_len)
5753 Z_PARAM_LONG(pad_type_val)
5754 ZEND_PARSE_PARAMETERS_END();
5755
5756 /* If resulting string turns out to be shorter than input string,
5757 we simply copy the input and return. */
5758 if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
5759 RETURN_STR_COPY(input);
5760 }
5761
5762 if (pad_str_len == 0) {
5763 zend_argument_must_not_be_empty_error(3);
5764 RETURN_THROWS();
5765 }
5766
5767 if (pad_type_val < PHP_STR_PAD_LEFT || pad_type_val > PHP_STR_PAD_BOTH) {
5768 zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
5769 RETURN_THROWS();
5770 }
5771
5772 num_pad_chars = pad_length - ZSTR_LEN(input);
5773 result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
5774 ZSTR_LEN(result) = 0;
5775
5776 /* We need to figure out the left/right padding lengths. */
5777 switch (pad_type_val) {
5778 case PHP_STR_PAD_RIGHT:
5779 left_pad = 0;
5780 right_pad = num_pad_chars;
5781 break;
5782
5783 case PHP_STR_PAD_LEFT:
5784 left_pad = num_pad_chars;
5785 right_pad = 0;
5786 break;
5787
5788 case PHP_STR_PAD_BOTH:
5789 left_pad = num_pad_chars / 2;
5790 right_pad = num_pad_chars - left_pad;
5791 break;
5792 }
5793
5794 /* First we pad on the left. */
5795 for (i = 0; i < left_pad; i++)
5796 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5797
5798 /* Then we copy the input string. */
5799 memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
5800 ZSTR_LEN(result) += ZSTR_LEN(input);
5801
5802 /* Finally, we pad on the right. */
5803 for (i = 0; i < right_pad; i++)
5804 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5805
5806 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
5807
5808 RETURN_NEW_STR(result);
5809 }
5810 /* }}} */
5811
5812 /* {{{ Implements an ANSI C compatible sscanf */
5813 PHP_FUNCTION(sscanf)
5814 {
5815 zval *args = NULL;
5816 char *str, *format;
5817 size_t str_len, format_len;
5818 int result, num_args = 0;
5819
5820 ZEND_PARSE_PARAMETERS_START(2, -1)
5821 Z_PARAM_STRING(str, str_len)
5822 Z_PARAM_STRING(format, format_len)
5823 Z_PARAM_VARIADIC('*', args, num_args)
5824 ZEND_PARSE_PARAMETERS_END();
5825
5826 result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
5827
5828 if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
5829 WRONG_PARAM_COUNT;
5830 }
5831 }
5832 /* }}} */
5833
5834 /* static zend_string *php_str_rot13(zend_string *str) {{{ */
5835 static zend_string *php_str_rot13(zend_string *str)
5836 {
5837 zend_string *ret;
5838 const char *p, *e;
5839 char *target;
5840
5841 if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
5842 return ZSTR_EMPTY_ALLOC();
5843 }
5844
5845 ret = zend_string_alloc(ZSTR_LEN(str), 0);
5846
5847 p = ZSTR_VAL(str);
5848 e = p + ZSTR_LEN(str);
5849 target = ZSTR_VAL(ret);
5850
5851 #ifdef __SSE2__
5852 if (e - p > 15) {
5853 const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
5854 const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
5855 const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
5856 const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
5857 const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
5858 const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
5859 const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
5860 const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
5861 const __m128i add = _mm_set1_epi8(13);
5862 const __m128i sub = _mm_set1_epi8(-13);
5863
5864 do {
5865 __m128i in, gt, lt, cmp, delta;
5866
5867 delta = _mm_setzero_si128();
5868 in = _mm_loadu_si128((__m128i *)p);
5869
5870 gt = _mm_cmpgt_epi8(in, a_minus_1);
5871 lt = _mm_cmplt_epi8(in, m_plus_1);
5872 cmp = _mm_and_si128(lt, gt);
5873 if (_mm_movemask_epi8(cmp)) {
5874 cmp = _mm_and_si128(cmp, add);
5875 delta = _mm_or_si128(delta, cmp);
5876 }
5877
5878 gt = _mm_cmpgt_epi8(in, n_minus_1);
5879 lt = _mm_cmplt_epi8(in, z_plus_1);
5880 cmp = _mm_and_si128(lt, gt);
5881 if (_mm_movemask_epi8(cmp)) {
5882 cmp = _mm_and_si128(cmp, sub);
5883 delta = _mm_or_si128(delta, cmp);
5884 }
5885
5886 gt = _mm_cmpgt_epi8(in, A_minus_1);
5887 lt = _mm_cmplt_epi8(in, M_plus_1);
5888 cmp = _mm_and_si128(lt, gt);
5889 if (_mm_movemask_epi8(cmp)) {
5890 cmp = _mm_and_si128(cmp, add);
5891 delta = _mm_or_si128(delta, cmp);
5892 }
5893
5894 gt = _mm_cmpgt_epi8(in, N_minus_1);
5895 lt = _mm_cmplt_epi8(in, Z_plus_1);
5896 cmp = _mm_and_si128(lt, gt);
5897 if (_mm_movemask_epi8(cmp)) {
5898 cmp = _mm_and_si128(cmp, sub);
5899 delta = _mm_or_si128(delta, cmp);
5900 }
5901
5902 in = _mm_add_epi8(in, delta);
5903 _mm_storeu_si128((__m128i *)target, in);
5904
5905 p += 16;
5906 target += 16;
5907 } while (e - p > 15);
5908 }
5909 #endif
5910
5911 while (p < e) {
5912 if (*p >= 'a' && *p <= 'z') {
5913 *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
5914 } else if (*p >= 'A' && *p <= 'Z') {
5915 *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
5916 } else {
5917 *target++ = *p++;
5918 }
5919 }
5920
5921 *target = '\0';
5922
5923 return ret;
5924 }
5925 /* }}} */
5926
5927 /* {{{ Perform the rot13 transform on a string */
5928 PHP_FUNCTION(str_rot13)
5929 {
5930 zend_string *arg;
5931
5932 ZEND_PARSE_PARAMETERS_START(1, 1)
5933 Z_PARAM_STR(arg)
5934 ZEND_PARSE_PARAMETERS_END();
5935
5936 RETURN_STR(php_str_rot13(arg));
5937 }
5938 /* }}} */
5939
5940 /* {{{ php_binary_string_shuffle */
5941 PHPAPI bool php_binary_string_shuffle(php_random_algo_with_state engine, char *str, zend_long len) /* {{{ */
5942 {
5943 const php_random_algo *algo = engine.algo;
5944 void *state = engine.state;
5945
5946 int64_t n_elems, rnd_idx, n_left;
5947 char temp;
5948
5949 /* The implementation is stolen from array_data_shuffle */
5950 /* Thus the characteristics of the randomization are the same */
5951 n_elems = len;
5952
5953 if (n_elems <= 1) {
5954 return true;
5955 }
5956
5957 n_left = n_elems;
5958
5959 while (--n_left) {
5960 rnd_idx = algo->range(state, 0, n_left);
5961 if (EG(exception)) {
5962 return false;
5963 }
5964 if (rnd_idx != n_left) {
5965 temp = str[n_left];
5966 str[n_left] = str[rnd_idx];
5967 str[rnd_idx] = temp;
5968 }
5969 }
5970
5971 return true;
5972 }
5973 /* }}} */
5974
5975 /* {{{ Shuffles string. One permutation of all possible is created */
5976 PHP_FUNCTION(str_shuffle)
5977 {
5978 zend_string *arg;
5979
5980 ZEND_PARSE_PARAMETERS_START(1, 1)
5981 Z_PARAM_STR(arg)
5982 ZEND_PARSE_PARAMETERS_END();
5983
5984 RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
5985 if (Z_STRLEN_P(return_value) > 1) {
5986 php_binary_string_shuffle(
5987 php_random_default_engine(),
5988 Z_STRVAL_P(return_value),
5989 Z_STRLEN_P(return_value)
5990 );
5991 }
5992 }
5993 /* }}} */
5994
5995 /* {{{ Counts the number of words inside a string. If format of 1 is specified,
5996 then the function will return an array containing all the words
5997 found inside the string. If format of 2 is specified, then the function
5998 will return an associated array where the position of the word is the key
5999 and the word itself is the value.
6000 For the purpose of this function, 'word' is defined as a locale dependent
6001 string containing alphabetic characters, which also may contain, but not start
6002 with "'" and "-" characters.
6003 */
6004 PHP_FUNCTION(str_word_count)
6005 {
6006 zend_string *str;
6007 char *char_list = NULL, ch[256];
6008 const char *p, *e, *s;
6009 size_t char_list_len = 0, word_count = 0;
6010 zend_long type = 0;
6011
6012 ZEND_PARSE_PARAMETERS_START(1, 3)
6013 Z_PARAM_STR(str)
6014 Z_PARAM_OPTIONAL
6015 Z_PARAM_LONG(type)
6016 Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
6017 ZEND_PARSE_PARAMETERS_END();
6018
6019 switch(type) {
6020 case 1:
6021 case 2:
6022 array_init(return_value);
6023 if (!ZSTR_LEN(str)) {
6024 return;
6025 }
6026 break;
6027 case 0:
6028 if (!ZSTR_LEN(str)) {
6029 RETURN_LONG(0);
6030 }
6031 /* nothing to be done */
6032 break;
6033 default:
6034 zend_argument_value_error(2, "must be a valid format value");
6035 RETURN_THROWS();
6036 }
6037
6038 if (char_list) {
6039 php_charmask((const unsigned char *) char_list, char_list_len, ch);
6040 }
6041
6042 p = ZSTR_VAL(str);
6043 e = ZSTR_VAL(str) + ZSTR_LEN(str);
6044
6045 /* first character cannot be ' or -, unless explicitly allowed by the user */
6046 if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
6047 p++;
6048 }
6049 /* last character cannot be -, unless explicitly allowed by the user */
6050 if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
6051 e--;
6052 }
6053
6054 while (p < e) {
6055 s = p;
6056 while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
6057 p++;
6058 }
6059 if (p > s) {
6060 switch (type)
6061 {
6062 case 1:
6063 add_next_index_stringl(return_value, s, p - s);
6064 break;
6065 case 2:
6066 add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
6067 break;
6068 default:
6069 word_count++;
6070 break;
6071 }
6072 }
6073 p++;
6074 }
6075
6076 if (!type) {
6077 RETURN_LONG(word_count);
6078 }
6079 }
6080
6081 /* }}} */
6082
6083 /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
6084 PHP_FUNCTION(str_split)
6085 {
6086 zend_string *str;
6087 zend_long split_length = 1;
6088 const char *p;
6089 size_t n_reg_segments;
6090
6091 ZEND_PARSE_PARAMETERS_START(1, 2)
6092 Z_PARAM_STR(str)
6093 Z_PARAM_OPTIONAL
6094 Z_PARAM_LONG(split_length)
6095 ZEND_PARSE_PARAMETERS_END();
6096
6097 if (split_length <= 0) {
6098 zend_argument_value_error(2, "must be greater than 0");
6099 RETURN_THROWS();
6100 }
6101
6102 if ((size_t)split_length >= ZSTR_LEN(str)) {
6103 if (0 == ZSTR_LEN(str)) {
6104 RETURN_EMPTY_ARRAY();
6105 }
6106
6107 array_init_size(return_value, 1);
6108 add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
6109 return;
6110 }
6111
6112 array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
6113
6114 n_reg_segments = ZSTR_LEN(str) / split_length;
6115 p = ZSTR_VAL(str);
6116
6117 while (n_reg_segments-- > 0) {
6118 add_next_index_stringl(return_value, p, split_length);
6119 p += split_length;
6120 }
6121
6122 if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
6123 add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
6124 }
6125 }
6126 /* }}} */
6127
6128 /* {{{ Search a string for any of a set of characters */
6129 PHP_FUNCTION(strpbrk)
6130 {
6131 zend_string *haystack, *char_list;
6132
6133 ZEND_PARSE_PARAMETERS_START(2, 2)
6134 Z_PARAM_STR(haystack)
6135 Z_PARAM_STR(char_list)
6136 ZEND_PARSE_PARAMETERS_END();
6137
6138 if (!ZSTR_LEN(char_list)) {
6139 zend_argument_value_error(2, "must be a non-empty string");
6140 RETURN_THROWS();
6141 }
6142
6143 size_t shift = php_strcspn(
6144 ZSTR_VAL(haystack),
6145 ZSTR_VAL(char_list),
6146 ZSTR_VAL(haystack) + ZSTR_LEN(haystack),
6147 ZSTR_VAL(char_list) + ZSTR_LEN(char_list)
6148 );
6149 if (shift < ZSTR_LEN(haystack)) {
6150 RETURN_STRINGL(ZSTR_VAL(haystack) + shift, ZSTR_LEN(haystack) - shift);
6151 }
6152
6153 RETURN_FALSE;
6154 }
6155 /* }}} */
6156
6157 /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
6158 PHP_FUNCTION(substr_compare)
6159 {
6160 zend_string *s1, *s2;
6161 zend_long offset, len=0;
6162 bool len_is_default=1;
6163 bool cs=0;
6164 size_t cmp_len;
6165
6166 ZEND_PARSE_PARAMETERS_START(3, 5)
6167 Z_PARAM_STR(s1)
6168 Z_PARAM_STR(s2)
6169 Z_PARAM_LONG(offset)
6170 Z_PARAM_OPTIONAL
6171 Z_PARAM_LONG_OR_NULL(len, len_is_default)
6172 Z_PARAM_BOOL(cs)
6173 ZEND_PARSE_PARAMETERS_END();
6174
6175 if (!len_is_default && len <= 0) {
6176 if (len == 0) {
6177 RETURN_LONG(0L);
6178 } else {
6179 zend_argument_value_error(4, "must be greater than or equal to 0");
6180 RETURN_THROWS();
6181 }
6182 }
6183
6184 if (offset < 0) {
6185 offset = ZSTR_LEN(s1) + offset;
6186 offset = (offset < 0) ? 0 : offset;
6187 }
6188
6189 if ((size_t)offset > ZSTR_LEN(s1)) {
6190 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
6191 RETURN_THROWS();
6192 }
6193
6194 cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
6195
6196 if (!cs) {
6197 RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
6198 } else {
6199 RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
6200 }
6201 }
6202 /* }}} */
6203
6204 /* {{{ */
6205 static zend_string *php_utf8_encode(const char *s, size_t len)
6206 {
6207 size_t pos = len;
6208 zend_string *str;
6209 unsigned char c;
6210
6211 str = zend_string_safe_alloc(len, 2, 0, 0);
6212 ZSTR_LEN(str) = 0;
6213 while (pos > 0) {
6214 /* The lower 256 codepoints of Unicode are identical to Latin-1,
6215 * so we don't need to do any mapping here. */
6216 c = (unsigned char)(*s);
6217 if (c < 0x80) {
6218 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
6219 /* We only account for the single-byte and two-byte cases because
6220 * we're only dealing with the first 256 Unicode codepoints. */
6221 } else {
6222 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
6223 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
6224 }
6225 pos--;
6226 s++;
6227 }
6228 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6229 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6230 return str;
6231 }
6232 /* }}} */
6233
6234 /* {{{ */
6235 static zend_string *php_utf8_decode(const char *s, size_t len)
6236 {
6237 size_t pos = 0;
6238 unsigned int c;
6239 zend_string *str;
6240
6241 str = zend_string_alloc(len, 0);
6242 ZSTR_LEN(str) = 0;
6243 while (pos < len) {
6244 zend_result status = FAILURE;
6245 c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
6246
6247 /* The lower 256 codepoints of Unicode are identical to Latin-1,
6248 * so we don't need to do any mapping here beyond replacing non-Latin-1
6249 * characters. */
6250 if (status == FAILURE || c > 0xFFU) {
6251 c = '?';
6252 }
6253
6254 ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
6255 }
6256 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6257 if (ZSTR_LEN(str) < len) {
6258 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6259 }
6260
6261 return str;
6262 }
6263 /* }}} */
6264
6265 /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
6266 PHP_FUNCTION(utf8_encode)
6267 {
6268 char *arg;
6269 size_t arg_len;
6270
6271 ZEND_PARSE_PARAMETERS_START(1, 1)
6272 Z_PARAM_STRING(arg, arg_len)
6273 ZEND_PARSE_PARAMETERS_END();
6274
6275 RETURN_STR(php_utf8_encode(arg, arg_len));
6276 }
6277 /* }}} */
6278
6279 /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
6280 PHP_FUNCTION(utf8_decode)
6281 {
6282 char *arg;
6283 size_t arg_len;
6284
6285 ZEND_PARSE_PARAMETERS_START(1, 1)
6286 Z_PARAM_STRING(arg, arg_len)
6287 ZEND_PARSE_PARAMETERS_END();
6288
6289 RETURN_STR(php_utf8_decode(arg, arg_len));
6290 }
6291 /* }}} */
6292