1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Rasmus Lerdorf <rasmus@php.net> |
14 | Stig Sæther Bakken <ssb@php.net> |
15 | Zeev Suraski <zeev@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include <stdio.h>
20 #include "php.h"
21 #include "php_string.h"
22 #include "php_variables.h"
23 #include <locale.h>
24 #ifdef HAVE_LANGINFO_H
25 # include <langinfo.h>
26 #endif
27
28 #ifdef HAVE_LIBINTL
29 # include <libintl.h> /* For LC_MESSAGES */
30 #endif
31
32 #include "scanf.h"
33 #include "zend_API.h"
34 #include "zend_execute.h"
35 #include "php_globals.h"
36 #include "basic_functions.h"
37 #include "zend_smart_str.h"
38 #include <Zend/zend_exceptions.h>
39 #ifdef ZTS
40 #include "TSRM.h"
41 #endif
42
43 /* For str_getcsv() support */
44 #include "ext/standard/file.h"
45 /* For php_next_utf8_char() */
46 #include "ext/standard/html.h"
47 #include "ext/random/php_random.h"
48
49 #ifdef __SSE2__
50 #include <emmintrin.h>
51 #include "Zend/zend_bitset.h"
52 #endif
53
54 /* this is read-only, so it's ok */
55 ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
56
57 /* localeconv mutex */
58 #ifdef ZTS
59 static MUTEX_T locale_mutex = NULL;
60 #endif
61
62 /* {{{ php_bin2hex */
php_bin2hex(const unsigned char * old,const size_t oldlen)63 static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
64 {
65 zend_string *result;
66 size_t i, j;
67
68 result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
69
70 for (i = j = 0; i < oldlen; i++) {
71 ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
72 ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
73 }
74 ZSTR_VAL(result)[j] = '\0';
75
76 return result;
77 }
78 /* }}} */
79
80 /* {{{ php_hex2bin */
php_hex2bin(const unsigned char * old,const size_t oldlen)81 static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
82 {
83 size_t target_length = oldlen >> 1;
84 zend_string *str = zend_string_alloc(target_length, 0);
85 unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
86 size_t i, j;
87
88 for (i = j = 0; i < target_length; i++) {
89 unsigned char c = old[j++];
90 unsigned char l = c & ~0x20;
91 int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
92 unsigned char d;
93
94 /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
95 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
96 d = (l - 0x10 - 0x27 * is_letter) << 4;
97 } else {
98 zend_string_efree(str);
99 return NULL;
100 }
101 c = old[j++];
102 l = c & ~0x20;
103 is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
104 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
105 d |= l - 0x10 - 0x27 * is_letter;
106 } else {
107 zend_string_efree(str);
108 return NULL;
109 }
110 ret[i] = d;
111 }
112 ret[i] = '\0';
113
114 return str;
115 }
116 /* }}} */
117
118 /* {{{ localeconv_r
119 * glibc's localeconv is not reentrant, so lets make it so ... sorta */
localeconv_r(struct lconv * out)120 PHPAPI struct lconv *localeconv_r(struct lconv *out)
121 {
122
123 #ifdef ZTS
124 tsrm_mutex_lock( locale_mutex );
125 #endif
126
127 /* cur->locinfo is struct __crt_locale_info which implementation is
128 hidden in vc14. TODO revisit this and check if a workaround available
129 and needed. */
130 #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
131 {
132 /* Even with the enabled per thread locale, localeconv
133 won't check any locale change in the master thread. */
134 _locale_t cur = _get_current_locale();
135 *out = *cur->locinfo->lconv;
136 _free_locale(cur);
137 }
138 #else
139 /* localeconv doesn't return an error condition */
140 *out = *localeconv();
141 #endif
142
143 #ifdef ZTS
144 tsrm_mutex_unlock( locale_mutex );
145 #endif
146
147 return out;
148 }
149 /* }}} */
150
151 #ifdef ZTS
152 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(localeconv)153 PHP_MINIT_FUNCTION(localeconv)
154 {
155 locale_mutex = tsrm_mutex_alloc();
156 return SUCCESS;
157 }
158 /* }}} */
159
160 /* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(localeconv)161 PHP_MSHUTDOWN_FUNCTION(localeconv)
162 {
163 tsrm_mutex_free( locale_mutex );
164 locale_mutex = NULL;
165 return SUCCESS;
166 }
167 /* }}} */
168 #endif
169
170 /* {{{ Converts the binary representation of data to hex */
PHP_FUNCTION(bin2hex)171 PHP_FUNCTION(bin2hex)
172 {
173 zend_string *result;
174 zend_string *data;
175
176 ZEND_PARSE_PARAMETERS_START(1, 1)
177 Z_PARAM_STR(data)
178 ZEND_PARSE_PARAMETERS_END();
179
180 result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
181
182 RETURN_STR(result);
183 }
184 /* }}} */
185
186 /* {{{ Converts the hex representation of data to binary */
PHP_FUNCTION(hex2bin)187 PHP_FUNCTION(hex2bin)
188 {
189 zend_string *result, *data;
190
191 ZEND_PARSE_PARAMETERS_START(1, 1)
192 Z_PARAM_STR(data)
193 ZEND_PARSE_PARAMETERS_END();
194
195 if (ZSTR_LEN(data) % 2 != 0) {
196 php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
197 RETURN_FALSE;
198 }
199
200 result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
201
202 if (!result) {
203 php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
204 RETURN_FALSE;
205 }
206
207 RETVAL_STR(result);
208 }
209 /* }}} */
210
php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS,int behavior)211 static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
212 {
213 zend_string *s11, *s22;
214 zend_long start = 0, len = 0;
215 bool len_is_null = 1;
216
217 ZEND_PARSE_PARAMETERS_START(2, 4)
218 Z_PARAM_STR(s11)
219 Z_PARAM_STR(s22)
220 Z_PARAM_OPTIONAL
221 Z_PARAM_LONG(start)
222 Z_PARAM_LONG_OR_NULL(len, len_is_null)
223 ZEND_PARSE_PARAMETERS_END();
224
225 size_t remain_len = ZSTR_LEN(s11);
226 if (start < 0) {
227 start += remain_len;
228 if (start < 0) {
229 start = 0;
230 }
231 } else if ((size_t) start > remain_len) {
232 start = remain_len;
233 }
234
235 remain_len -= start;
236 if (!len_is_null) {
237 if (len < 0) {
238 len += remain_len;
239 if (len < 0) {
240 len = 0;
241 }
242 } else if ((size_t) len > remain_len) {
243 len = remain_len;
244 }
245 } else {
246 len = remain_len;
247 }
248
249 if (len == 0) {
250 RETURN_LONG(0);
251 }
252
253 if (behavior == PHP_STR_STRSPN) {
254 RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
255 ZSTR_VAL(s22) /*str2_start*/,
256 ZSTR_VAL(s11) + start + len /*str1_end*/,
257 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
258 } else {
259 ZEND_ASSERT(behavior == PHP_STR_STRCSPN);
260 RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
261 ZSTR_VAL(s22) /*str2_start*/,
262 ZSTR_VAL(s11) + start + len /*str1_end*/,
263 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
264 }
265 }
266 /* }}} */
267
268 /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
PHP_FUNCTION(strspn)269 PHP_FUNCTION(strspn)
270 {
271 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_STR_STRSPN);
272 }
273 /* }}} */
274
275 /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
PHP_FUNCTION(strcspn)276 PHP_FUNCTION(strcspn)
277 {
278 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_STR_STRCSPN);
279 }
280 /* }}} */
281
282 #ifdef HAVE_NL_LANGINFO
283 /* {{{ Query language and locale information */
PHP_FUNCTION(nl_langinfo)284 PHP_FUNCTION(nl_langinfo)
285 {
286 zend_long item;
287 char *value;
288
289 ZEND_PARSE_PARAMETERS_START(1, 1)
290 Z_PARAM_LONG(item)
291 ZEND_PARSE_PARAMETERS_END();
292
293 switch(item) { /* {{{ */
294 #ifdef ABDAY_1
295 case ABDAY_1:
296 case ABDAY_2:
297 case ABDAY_3:
298 case ABDAY_4:
299 case ABDAY_5:
300 case ABDAY_6:
301 case ABDAY_7:
302 #endif
303 #ifdef DAY_1
304 case DAY_1:
305 case DAY_2:
306 case DAY_3:
307 case DAY_4:
308 case DAY_5:
309 case DAY_6:
310 case DAY_7:
311 #endif
312 #ifdef ABMON_1
313 case ABMON_1:
314 case ABMON_2:
315 case ABMON_3:
316 case ABMON_4:
317 case ABMON_5:
318 case ABMON_6:
319 case ABMON_7:
320 case ABMON_8:
321 case ABMON_9:
322 case ABMON_10:
323 case ABMON_11:
324 case ABMON_12:
325 #endif
326 #ifdef MON_1
327 case MON_1:
328 case MON_2:
329 case MON_3:
330 case MON_4:
331 case MON_5:
332 case MON_6:
333 case MON_7:
334 case MON_8:
335 case MON_9:
336 case MON_10:
337 case MON_11:
338 case MON_12:
339 #endif
340 #ifdef AM_STR
341 case AM_STR:
342 #endif
343 #ifdef PM_STR
344 case PM_STR:
345 #endif
346 #ifdef D_T_FMT
347 case D_T_FMT:
348 #endif
349 #ifdef D_FMT
350 case D_FMT:
351 #endif
352 #ifdef T_FMT
353 case T_FMT:
354 #endif
355 #ifdef T_FMT_AMPM
356 case T_FMT_AMPM:
357 #endif
358 #ifdef ERA
359 case ERA:
360 #endif
361 #ifdef ERA_YEAR
362 case ERA_YEAR:
363 #endif
364 #ifdef ERA_D_T_FMT
365 case ERA_D_T_FMT:
366 #endif
367 #ifdef ERA_D_FMT
368 case ERA_D_FMT:
369 #endif
370 #ifdef ERA_T_FMT
371 case ERA_T_FMT:
372 #endif
373 #ifdef ALT_DIGITS
374 case ALT_DIGITS:
375 #endif
376 #ifdef INT_CURR_SYMBOL
377 case INT_CURR_SYMBOL:
378 #endif
379 #ifdef CURRENCY_SYMBOL
380 case CURRENCY_SYMBOL:
381 #endif
382 #ifdef CRNCYSTR
383 case CRNCYSTR:
384 #endif
385 #ifdef MON_DECIMAL_POINT
386 case MON_DECIMAL_POINT:
387 #endif
388 #ifdef MON_THOUSANDS_SEP
389 case MON_THOUSANDS_SEP:
390 #endif
391 #ifdef MON_GROUPING
392 case MON_GROUPING:
393 #endif
394 #ifdef POSITIVE_SIGN
395 case POSITIVE_SIGN:
396 #endif
397 #ifdef NEGATIVE_SIGN
398 case NEGATIVE_SIGN:
399 #endif
400 #ifdef INT_FRAC_DIGITS
401 case INT_FRAC_DIGITS:
402 #endif
403 #ifdef FRAC_DIGITS
404 case FRAC_DIGITS:
405 #endif
406 #ifdef P_CS_PRECEDES
407 case P_CS_PRECEDES:
408 #endif
409 #ifdef P_SEP_BY_SPACE
410 case P_SEP_BY_SPACE:
411 #endif
412 #ifdef N_CS_PRECEDES
413 case N_CS_PRECEDES:
414 #endif
415 #ifdef N_SEP_BY_SPACE
416 case N_SEP_BY_SPACE:
417 #endif
418 #ifdef P_SIGN_POSN
419 case P_SIGN_POSN:
420 #endif
421 #ifdef N_SIGN_POSN
422 case N_SIGN_POSN:
423 #endif
424 #ifdef DECIMAL_POINT
425 case DECIMAL_POINT:
426 #elif defined(RADIXCHAR)
427 case RADIXCHAR:
428 #endif
429 #ifdef THOUSANDS_SEP
430 case THOUSANDS_SEP:
431 #elif defined(THOUSEP)
432 case THOUSEP:
433 #endif
434 #ifdef GROUPING
435 case GROUPING:
436 #endif
437 #ifdef YESEXPR
438 case YESEXPR:
439 #endif
440 #ifdef NOEXPR
441 case NOEXPR:
442 #endif
443 #ifdef YESSTR
444 case YESSTR:
445 #endif
446 #ifdef NOSTR
447 case NOSTR:
448 #endif
449 #ifdef CODESET
450 case CODESET:
451 #endif
452 break;
453 default:
454 php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
455 RETURN_FALSE;
456 }
457 /* }}} */
458
459 value = nl_langinfo(item);
460 if (value == NULL) {
461 RETURN_FALSE;
462 } else {
463 RETURN_STRING(value);
464 }
465 }
466 #endif
467 /* }}} */
468
469 /* {{{ Compares two strings using the current locale */
PHP_FUNCTION(strcoll)470 PHP_FUNCTION(strcoll)
471 {
472 zend_string *s1, *s2;
473
474 ZEND_PARSE_PARAMETERS_START(2, 2)
475 Z_PARAM_STR(s1)
476 Z_PARAM_STR(s2)
477 ZEND_PARSE_PARAMETERS_END();
478
479 RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
480 (const char *) ZSTR_VAL(s2)));
481 }
482 /* }}} */
483
484 /* {{{ php_charmask
485 * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
486 * it needs to be incrementing.
487 * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
488 */
php_charmask(const unsigned char * input,size_t len,char * mask)489 static inline zend_result php_charmask(const unsigned char *input, size_t len, char *mask)
490 {
491 const unsigned char *end;
492 unsigned char c;
493 zend_result result = SUCCESS;
494
495 memset(mask, 0, 256);
496 for (end = input+len; input < end; input++) {
497 c=*input;
498 if ((input+3 < end) && input[1] == '.' && input[2] == '.'
499 && input[3] >= c) {
500 memset(mask+c, 1, input[3] - c + 1);
501 input+=3;
502 } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
503 /* Error, try to be as helpful as possible:
504 (a range ending/starting with '.' won't be captured here) */
505 if (end-len >= input) { /* there was no 'left' char */
506 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
507 result = FAILURE;
508 continue;
509 }
510 if (input+2 >= end) { /* there is no 'right' char */
511 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
512 result = FAILURE;
513 continue;
514 }
515 if (input[-1] > input[2]) { /* wrong order */
516 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
517 result = FAILURE;
518 continue;
519 }
520 /* FIXME: better error (a..b..c is the only left possibility?) */
521 php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
522 result = FAILURE;
523 continue;
524 } else {
525 mask[c]=1;
526 }
527 }
528 return result;
529 }
530 /* }}} */
531
532 /* {{{ php_trim_int()
533 * mode 1 : trim left
534 * mode 2 : trim right
535 * mode 3 : trim left and right
536 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
537 */
php_trim_int(zend_string * str,const char * what,size_t what_len,int mode)538 static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
539 {
540 const char *start = ZSTR_VAL(str);
541 const char *end = start + ZSTR_LEN(str);
542 char mask[256];
543
544 if (what) {
545 if (what_len == 1) {
546 char p = *what;
547 if (mode & 1) {
548 while (start != end) {
549 if (*start == p) {
550 start++;
551 } else {
552 break;
553 }
554 }
555 }
556 if (mode & 2) {
557 while (start != end) {
558 if (*(end-1) == p) {
559 end--;
560 } else {
561 break;
562 }
563 }
564 }
565 } else {
566 php_charmask((const unsigned char *) what, what_len, mask);
567
568 if (mode & 1) {
569 while (start != end) {
570 if (mask[(unsigned char)*start]) {
571 start++;
572 } else {
573 break;
574 }
575 }
576 }
577 if (mode & 2) {
578 while (start != end) {
579 if (mask[(unsigned char)*(end-1)]) {
580 end--;
581 } else {
582 break;
583 }
584 }
585 }
586 }
587 } else {
588 if (mode & 1) {
589 while (start != end) {
590 unsigned char c = (unsigned char)*start;
591
592 if (c <= ' ' &&
593 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
594 start++;
595 } else {
596 break;
597 }
598 }
599 }
600 if (mode & 2) {
601 while (start != end) {
602 unsigned char c = (unsigned char)*(end-1);
603
604 if (c <= ' ' &&
605 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
606 end--;
607 } else {
608 break;
609 }
610 }
611 }
612 }
613
614 if (ZSTR_LEN(str) == end - start) {
615 return zend_string_copy(str);
616 } else if (end - start == 0) {
617 return ZSTR_EMPTY_ALLOC();
618 } else {
619 return zend_string_init(start, end - start, 0);
620 }
621 }
622 /* }}} */
623
624 /* {{{ php_trim_int()
625 * mode 1 : trim left
626 * mode 2 : trim right
627 * mode 3 : trim left and right
628 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
629 */
php_trim(zend_string * str,const char * what,size_t what_len,int mode)630 PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
631 {
632 return php_trim_int(str, what, what_len, mode);
633 }
634 /* }}} */
635
636 /* {{{ php_do_trim
637 * Base for trim(), rtrim() and ltrim() functions.
638 */
php_do_trim(INTERNAL_FUNCTION_PARAMETERS,int mode)639 static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
640 {
641 zend_string *str;
642 zend_string *what = NULL;
643
644 ZEND_PARSE_PARAMETERS_START(1, 2)
645 Z_PARAM_STR(str)
646 Z_PARAM_OPTIONAL
647 Z_PARAM_STR(what)
648 ZEND_PARSE_PARAMETERS_END();
649
650 ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
651 }
652 /* }}} */
653
654 /* {{{ Strips whitespace from the beginning and end of a string */
PHP_FUNCTION(trim)655 PHP_FUNCTION(trim)
656 {
657 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
658 }
659 /* }}} */
660
661 /* {{{ Removes trailing whitespace */
PHP_FUNCTION(rtrim)662 PHP_FUNCTION(rtrim)
663 {
664 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
665 }
666 /* }}} */
667
668 /* {{{ Strips whitespace from the beginning of a string */
PHP_FUNCTION(ltrim)669 PHP_FUNCTION(ltrim)
670 {
671 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
672 }
673 /* }}} */
674
675 /* {{{ Wraps buffer to selected number of characters using string break char */
PHP_FUNCTION(wordwrap)676 PHP_FUNCTION(wordwrap)
677 {
678 zend_string *text;
679 char *breakchar = "\n";
680 size_t newtextlen, chk, breakchar_len = 1;
681 size_t alloced;
682 zend_long current = 0, laststart = 0, lastspace = 0;
683 zend_long linelength = 75;
684 bool docut = 0;
685 zend_string *newtext;
686
687 ZEND_PARSE_PARAMETERS_START(1, 4)
688 Z_PARAM_STR(text)
689 Z_PARAM_OPTIONAL
690 Z_PARAM_LONG(linelength)
691 Z_PARAM_STRING(breakchar, breakchar_len)
692 Z_PARAM_BOOL(docut)
693 ZEND_PARSE_PARAMETERS_END();
694
695 if (ZSTR_LEN(text) == 0) {
696 RETURN_EMPTY_STRING();
697 }
698
699 if (breakchar_len == 0) {
700 zend_argument_value_error(3, "cannot be empty");
701 RETURN_THROWS();
702 }
703
704 if (linelength == 0 && docut) {
705 zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
706 RETURN_THROWS();
707 }
708
709 /* Special case for a single-character break as it needs no
710 additional storage space */
711 if (breakchar_len == 1 && !docut) {
712 newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
713
714 laststart = lastspace = 0;
715 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
716 if (ZSTR_VAL(text)[current] == breakchar[0]) {
717 laststart = lastspace = current + 1;
718 } else if (ZSTR_VAL(text)[current] == ' ') {
719 if (current - laststart >= linelength) {
720 ZSTR_VAL(newtext)[current] = breakchar[0];
721 laststart = current + 1;
722 }
723 lastspace = current;
724 } else if (current - laststart >= linelength && laststart != lastspace) {
725 ZSTR_VAL(newtext)[lastspace] = breakchar[0];
726 laststart = lastspace + 1;
727 }
728 }
729
730 RETURN_NEW_STR(newtext);
731 } else {
732 /* Multiple character line break or forced cut */
733 if (linelength > 0) {
734 chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
735 newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
736 alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
737 } else {
738 chk = ZSTR_LEN(text);
739 alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
740 newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
741 }
742
743 /* now keep track of the actual new text length */
744 newtextlen = 0;
745
746 laststart = lastspace = 0;
747 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
748 if (chk == 0) {
749 alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
750 newtext = zend_string_extend(newtext, alloced, 0);
751 chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
752 }
753 /* when we hit an existing break, copy to new buffer, and
754 * fix up laststart and lastspace */
755 if (ZSTR_VAL(text)[current] == breakchar[0]
756 && current + breakchar_len < ZSTR_LEN(text)
757 && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
758 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
759 newtextlen += current - laststart + breakchar_len;
760 current += breakchar_len - 1;
761 laststart = lastspace = current + 1;
762 chk--;
763 }
764 /* if it is a space, check if it is at the line boundary,
765 * copy and insert a break, or just keep track of it */
766 else if (ZSTR_VAL(text)[current] == ' ') {
767 if (current - laststart >= linelength) {
768 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
769 newtextlen += current - laststart;
770 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
771 newtextlen += breakchar_len;
772 laststart = current + 1;
773 chk--;
774 }
775 lastspace = current;
776 }
777 /* if we are cutting, and we've accumulated enough
778 * characters, and we haven't see a space for this line,
779 * copy and insert a break. */
780 else if (current - laststart >= linelength
781 && docut && laststart >= lastspace) {
782 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
783 newtextlen += current - laststart;
784 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
785 newtextlen += breakchar_len;
786 laststart = lastspace = current;
787 chk--;
788 }
789 /* if the current word puts us over the linelength, copy
790 * back up until the last space, insert a break, and move
791 * up the laststart */
792 else if (current - laststart >= linelength
793 && laststart < lastspace) {
794 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
795 newtextlen += lastspace - laststart;
796 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
797 newtextlen += breakchar_len;
798 laststart = lastspace = lastspace + 1;
799 chk--;
800 }
801 }
802
803 /* copy over any stragglers */
804 if (laststart != current) {
805 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
806 newtextlen += current - laststart;
807 }
808
809 ZSTR_VAL(newtext)[newtextlen] = '\0';
810 /* free unused memory */
811 newtext = zend_string_truncate(newtext, newtextlen, 0);
812
813 RETURN_NEW_STR(newtext);
814 }
815 }
816 /* }}} */
817
818 /* {{{ php_explode */
php_explode(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)819 PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
820 {
821 const char *p1 = ZSTR_VAL(str);
822 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
823 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
824 zval tmp;
825
826 if (p2 == NULL) {
827 ZVAL_STR_COPY(&tmp, str);
828 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
829 } else {
830 zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
831 ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
832 do {
833 ZEND_HASH_FILL_GROW();
834 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
835 ZEND_HASH_FILL_NEXT();
836 p1 = p2 + ZSTR_LEN(delim);
837 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
838 } while (p2 != NULL && --limit > 1);
839
840 if (p1 <= endp) {
841 ZEND_HASH_FILL_GROW();
842 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
843 ZEND_HASH_FILL_NEXT();
844 }
845 } ZEND_HASH_FILL_END();
846 }
847 }
848 /* }}} */
849
850 /* {{{ php_explode_negative_limit */
php_explode_negative_limit(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)851 PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
852 {
853 #define EXPLODE_ALLOC_STEP 64
854 const char *p1 = ZSTR_VAL(str);
855 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
856 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
857 zval tmp;
858
859 if (p2 == NULL) {
860 /*
861 do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
862 by doing nothing we return empty array
863 */
864 } else {
865 size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
866 zend_long i, to_return;
867 const char **positions = emalloc(allocated * sizeof(char *));
868
869 positions[found++] = p1;
870 do {
871 if (found >= allocated) {
872 allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
873 positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
874 }
875 positions[found++] = p1 = p2 + ZSTR_LEN(delim);
876 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
877 } while (p2 != NULL);
878
879 to_return = limit + found;
880 /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
881 for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
882 ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
883 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
884 }
885 efree((void *)positions);
886 }
887 #undef EXPLODE_ALLOC_STEP
888 }
889 /* }}} */
890
891 /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
PHP_FUNCTION(explode)892 PHP_FUNCTION(explode)
893 {
894 zend_string *str, *delim;
895 zend_long limit = ZEND_LONG_MAX; /* No limit */
896 zval tmp;
897
898 ZEND_PARSE_PARAMETERS_START(2, 3)
899 Z_PARAM_STR(delim)
900 Z_PARAM_STR(str)
901 Z_PARAM_OPTIONAL
902 Z_PARAM_LONG(limit)
903 ZEND_PARSE_PARAMETERS_END();
904
905 if (ZSTR_LEN(delim) == 0) {
906 zend_argument_value_error(1, "cannot be empty");
907 RETURN_THROWS();
908 }
909
910 array_init(return_value);
911
912 if (ZSTR_LEN(str) == 0) {
913 if (limit >= 0) {
914 ZVAL_EMPTY_STRING(&tmp);
915 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
916 }
917 return;
918 }
919
920 if (limit > 1) {
921 php_explode(delim, str, return_value, limit);
922 } else if (limit < 0) {
923 php_explode_negative_limit(delim, str, return_value, limit);
924 } else {
925 ZVAL_STR_COPY(&tmp, str);
926 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
927 }
928 }
929 /* }}} */
930
931 /* {{{ php_implode */
php_implode(const zend_string * glue,HashTable * pieces,zval * return_value)932 PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
933 {
934 zval *tmp;
935 uint32_t numelems;
936 zend_string *str;
937 char *cptr;
938 size_t len = 0;
939 struct {
940 zend_string *str;
941 zend_long lval;
942 } *strings, *ptr;
943 ALLOCA_FLAG(use_heap)
944
945 numelems = zend_hash_num_elements(pieces);
946
947 if (numelems == 0) {
948 RETURN_EMPTY_STRING();
949 } else if (numelems == 1) {
950 /* loop to search the first not undefined element... */
951 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
952 RETURN_STR(zval_get_string(tmp));
953 } ZEND_HASH_FOREACH_END();
954 }
955
956 ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
957
958 uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(glue);
959
960 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
961 if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
962 ptr->str = Z_STR_P(tmp);
963 len += ZSTR_LEN(ptr->str);
964 ptr->lval = 0;
965 flags &= ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(ptr->str);
966 ptr++;
967 } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
968 zend_long val = Z_LVAL_P(tmp);
969
970 ptr->str = NULL;
971 ptr->lval = val;
972 ptr++;
973 if (val <= 0) {
974 len++;
975 }
976 while (val) {
977 val /= 10;
978 len++;
979 }
980 } else {
981 ptr->str = zval_get_string_func(tmp);
982 len += ZSTR_LEN(ptr->str);
983 ptr->lval = 1;
984 flags &= ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(ptr->str);
985 ptr++;
986 }
987 } ZEND_HASH_FOREACH_END();
988
989 /* numelems cannot be 0, we checked above */
990 str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
991 GC_ADD_FLAGS(str, flags);
992 cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
993 *cptr = 0;
994
995 while (1) {
996 ptr--;
997 if (EXPECTED(ptr->str)) {
998 cptr -= ZSTR_LEN(ptr->str);
999 memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
1000 if (ptr->lval) {
1001 zend_string_release_ex(ptr->str, 0);
1002 }
1003 } else {
1004 char *oldPtr = cptr;
1005 char oldVal = *cptr;
1006 cptr = zend_print_long_to_buf(cptr, ptr->lval);
1007 *oldPtr = oldVal;
1008 }
1009
1010 if (ptr == strings) {
1011 break;
1012 }
1013
1014 cptr -= ZSTR_LEN(glue);
1015 memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
1016 }
1017
1018 free_alloca(strings, use_heap);
1019 RETURN_NEW_STR(str);
1020 }
1021 /* }}} */
1022
1023 /* {{{ Joins array elements placing glue string between items and return one string */
PHP_FUNCTION(implode)1024 PHP_FUNCTION(implode)
1025 {
1026 zend_string *arg1_str = NULL;
1027 HashTable *arg1_array = NULL;
1028 zend_array *pieces = NULL;
1029
1030 ZEND_PARSE_PARAMETERS_START(1, 2)
1031 Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
1032 Z_PARAM_OPTIONAL
1033 Z_PARAM_ARRAY_HT_OR_NULL(pieces)
1034 ZEND_PARSE_PARAMETERS_END();
1035
1036 if (pieces == NULL) {
1037 if (arg1_array == NULL) {
1038 zend_type_error("%s(): Argument #1 ($array) must be of type array, string given", get_active_function_name());
1039 RETURN_THROWS();
1040 }
1041
1042 arg1_str = ZSTR_EMPTY_ALLOC();
1043 pieces = arg1_array;
1044 } else {
1045 if (arg1_str == NULL) {
1046 zend_argument_type_error(1, "must be of type string, array given");
1047 RETURN_THROWS();
1048 }
1049 }
1050
1051 php_implode(arg1_str, pieces, return_value);
1052 }
1053 /* }}} */
1054
1055 #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
1056
1057 /* {{{ Tokenize a string */
PHP_FUNCTION(strtok)1058 PHP_FUNCTION(strtok)
1059 {
1060 zend_string *str, *tok = NULL;
1061 char *token;
1062 char *token_end;
1063 char *p;
1064 char *pe;
1065 size_t skipped = 0;
1066
1067 ZEND_PARSE_PARAMETERS_START(1, 2)
1068 Z_PARAM_STR(str)
1069 Z_PARAM_OPTIONAL
1070 Z_PARAM_STR_OR_NULL(tok)
1071 ZEND_PARSE_PARAMETERS_END();
1072
1073 if (!tok) {
1074 tok = str;
1075 } else {
1076 if (BG(strtok_string)) {
1077 zend_string_release(BG(strtok_string));
1078 }
1079 BG(strtok_string) = zend_string_copy(str);
1080 BG(strtok_last) = ZSTR_VAL(str);
1081 BG(strtok_len) = ZSTR_LEN(str);
1082 }
1083
1084 if (!BG(strtok_string)) {
1085 /* String to tokenize not set. */
1086 php_error_docref(NULL, E_WARNING, "Both arguments must be provided when starting tokenization");
1087 RETURN_FALSE;
1088 }
1089
1090 p = BG(strtok_last); /* Where we start to search */
1091 pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
1092 if (p >= pe) {
1093 /* Reached the end of the string. */
1094 RETURN_FALSE;
1095 }
1096
1097 token = ZSTR_VAL(tok);
1098 token_end = token + ZSTR_LEN(tok);
1099
1100 while (token < token_end) {
1101 STRTOK_TABLE(token++) = 1;
1102 }
1103
1104 /* Skip leading delimiters */
1105 while (STRTOK_TABLE(p)) {
1106 if (++p >= pe) {
1107 /* no other chars left */
1108 goto return_false;
1109 }
1110 skipped++;
1111 }
1112
1113 /* We know at this place that *p is no delimiter, so skip it */
1114 while (++p < pe) {
1115 if (STRTOK_TABLE(p)) {
1116 goto return_token;
1117 }
1118 }
1119
1120 if (p - BG(strtok_last)) {
1121 return_token:
1122 RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
1123 BG(strtok_last) = p + 1;
1124 } else {
1125 return_false:
1126 RETVAL_FALSE;
1127 zend_string_release(BG(strtok_string));
1128 BG(strtok_string) = NULL;
1129 }
1130
1131 /* Restore table -- usually faster then memset'ing the table on every invocation */
1132 token = ZSTR_VAL(tok);
1133 while (token < token_end) {
1134 STRTOK_TABLE(token++) = 0;
1135 }
1136 }
1137 /* }}} */
1138
1139 /* {{{ php_strtoupper */
php_strtoupper(char * s,size_t len)1140 PHPAPI char *php_strtoupper(char *s, size_t len)
1141 {
1142 zend_str_toupper(s, len);
1143 return s;
1144 }
1145 /* }}} */
1146
1147 /* {{{ php_string_toupper */
php_string_toupper(zend_string * s)1148 PHPAPI zend_string *php_string_toupper(zend_string *s)
1149 {
1150 return zend_string_toupper(s);
1151 }
1152 /* }}} */
1153
1154 /* {{{ Makes a string uppercase */
PHP_FUNCTION(strtoupper)1155 PHP_FUNCTION(strtoupper)
1156 {
1157 zend_string *arg;
1158
1159 ZEND_PARSE_PARAMETERS_START(1, 1)
1160 Z_PARAM_STR(arg)
1161 ZEND_PARSE_PARAMETERS_END();
1162
1163 RETURN_STR(zend_string_toupper(arg));
1164 }
1165 /* }}} */
1166
1167 /* {{{ php_strtolower */
php_strtolower(char * s,size_t len)1168 PHPAPI char *php_strtolower(char *s, size_t len)
1169 {
1170 zend_str_tolower(s, len);
1171 return s;
1172 }
1173 /* }}} */
1174
1175 /* {{{ php_string_tolower */
php_string_tolower(zend_string * s)1176 PHPAPI zend_string *php_string_tolower(zend_string *s)
1177 {
1178 return zend_string_tolower(s);
1179 }
1180 /* }}} */
1181
1182 /* {{{ Makes a string lowercase */
PHP_FUNCTION(strtolower)1183 PHP_FUNCTION(strtolower)
1184 {
1185 zend_string *str;
1186
1187 ZEND_PARSE_PARAMETERS_START(1, 1)
1188 Z_PARAM_STR(str)
1189 ZEND_PARSE_PARAMETERS_END();
1190
1191 RETURN_STR(zend_string_tolower(str));
1192 }
1193 /* }}} */
1194
PHP_FUNCTION(str_increment)1195 PHP_FUNCTION(str_increment)
1196 {
1197 zend_string *str;
1198
1199 ZEND_PARSE_PARAMETERS_START(1, 1)
1200 Z_PARAM_STR(str)
1201 ZEND_PARSE_PARAMETERS_END();
1202
1203 if (ZSTR_LEN(str) == 0) {
1204 zend_argument_value_error(1, "cannot be empty");
1205 RETURN_THROWS();
1206 }
1207 if (!zend_string_only_has_ascii_alphanumeric(str)) {
1208 zend_argument_value_error(1, "must be composed only of alphanumeric ASCII characters");
1209 RETURN_THROWS();
1210 }
1211
1212 zend_string *incremented = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), /* persistent */ false);
1213 size_t position = ZSTR_LEN(str)-1;
1214 bool carry = false;
1215
1216 do {
1217 char c = ZSTR_VAL(incremented)[position];
1218 /* We know c is in ['a', 'z'], ['A', 'Z'], or ['0', '9'] range from zend_string_only_has_ascii_alphanumeric() */
1219 if (EXPECTED( c != 'z' && c != 'Z' && c != '9' )) {
1220 carry = false;
1221 ZSTR_VAL(incremented)[position]++;
1222 } else { /* if 'z', 'Z', or '9' */
1223 carry = true;
1224 if (c == '9') {
1225 ZSTR_VAL(incremented)[position] = '0';
1226 } else {
1227 ZSTR_VAL(incremented)[position] -= 25;
1228 }
1229 }
1230 } while (carry && position-- > 0);
1231
1232 if (UNEXPECTED(carry)) {
1233 zend_string *tmp = zend_string_alloc(ZSTR_LEN(incremented)+1, 0);
1234 memcpy(ZSTR_VAL(tmp) + 1, ZSTR_VAL(incremented), ZSTR_LEN(incremented));
1235 ZSTR_VAL(tmp)[ZSTR_LEN(incremented)+1] = '\0';
1236 switch (ZSTR_VAL(incremented)[0]) {
1237 case '0':
1238 ZSTR_VAL(tmp)[0] = '1';
1239 break;
1240 default:
1241 ZSTR_VAL(tmp)[0] = ZSTR_VAL(incremented)[0];
1242 break;
1243 }
1244 zend_string_release_ex(incremented, /* persistent */ false);
1245 RETURN_STR(tmp);
1246 }
1247 RETURN_STR(incremented);
1248 }
1249
1250
PHP_FUNCTION(str_decrement)1251 PHP_FUNCTION(str_decrement)
1252 {
1253 zend_string *str;
1254
1255 ZEND_PARSE_PARAMETERS_START(1, 1)
1256 Z_PARAM_STR(str)
1257 ZEND_PARSE_PARAMETERS_END();
1258
1259 if (ZSTR_LEN(str) == 0) {
1260 zend_argument_value_error(1, "cannot be empty");
1261 RETURN_THROWS();
1262 }
1263 if (!zend_string_only_has_ascii_alphanumeric(str)) {
1264 zend_argument_value_error(1, "must be composed only of alphanumeric ASCII characters");
1265 RETURN_THROWS();
1266 }
1267 if (ZSTR_LEN(str) >= 1 && ZSTR_VAL(str)[0] == '0') {
1268 zend_argument_value_error(1, "\"%s\" is out of decrement range", ZSTR_VAL(str));
1269 RETURN_THROWS();
1270 }
1271
1272 zend_string *decremented = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), /* persistent */ false);
1273 size_t position = ZSTR_LEN(str)-1;
1274 bool carry = false;
1275
1276 do {
1277 char c = ZSTR_VAL(decremented)[position];
1278 /* We know c is in ['a', 'z'], ['A', 'Z'], or ['0', '9'] range from zend_string_only_has_ascii_alphanumeric() */
1279 if (EXPECTED( c != 'a' && c != 'A' && c != '0' )) {
1280 carry = false;
1281 ZSTR_VAL(decremented)[position]--;
1282 } else { /* if 'a', 'A', or '0' */
1283 carry = true;
1284 if (c == '0') {
1285 ZSTR_VAL(decremented)[position] = '9';
1286 } else {
1287 ZSTR_VAL(decremented)[position] += 25;
1288 }
1289 }
1290 } while (carry && position-- > 0);
1291
1292 if (UNEXPECTED(carry || (ZSTR_VAL(decremented)[0] == '0' && ZSTR_LEN(decremented) > 1))) {
1293 if (ZSTR_LEN(decremented) == 1) {
1294 zend_string_release_ex(decremented, /* persistent */ false);
1295 zend_argument_value_error(1, "\"%s\" is out of decrement range", ZSTR_VAL(str));
1296 RETURN_THROWS();
1297 }
1298 zend_string *tmp = zend_string_alloc(ZSTR_LEN(decremented) - 1, 0);
1299 memcpy(ZSTR_VAL(tmp), ZSTR_VAL(decremented) + 1, ZSTR_LEN(decremented) - 1);
1300 ZSTR_VAL(tmp)[ZSTR_LEN(decremented) - 1] = '\0';
1301 zend_string_release_ex(decremented, /* persistent */ false);
1302 RETURN_STR(tmp);
1303 }
1304 RETURN_STR(decremented);
1305 }
1306
1307 #if defined(PHP_WIN32)
_is_basename_start(const char * start,const char * pos)1308 static bool _is_basename_start(const char *start, const char *pos)
1309 {
1310 if (pos - start >= 1
1311 && *(pos-1) != '/'
1312 && *(pos-1) != '\\') {
1313 if (pos - start == 1) {
1314 return 1;
1315 } else if (*(pos-2) == '/' || *(pos-2) == '\\') {
1316 return 1;
1317 } else if (*(pos-2) == ':'
1318 && _is_basename_start(start, pos - 2)) {
1319 return 1;
1320 }
1321 }
1322 return 0;
1323 }
1324 #endif
1325
1326 /* {{{ php_basename */
php_basename(const char * s,size_t len,const char * suffix,size_t suffix_len)1327 PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
1328 {
1329 const char *basename_start;
1330 const char *basename_end;
1331
1332 if (CG(ascii_compatible_locale)) {
1333 basename_end = s + len - 1;
1334
1335 /* Strip trailing slashes */
1336 while (basename_end >= s
1337 #ifdef PHP_WIN32
1338 && (*basename_end == '/'
1339 || *basename_end == '\\'
1340 || (*basename_end == ':'
1341 && _is_basename_start(s, basename_end)))) {
1342 #else
1343 && *basename_end == '/') {
1344 #endif
1345 basename_end--;
1346 }
1347 if (basename_end < s) {
1348 return ZSTR_EMPTY_ALLOC();
1349 }
1350
1351 /* Extract filename */
1352 basename_start = basename_end;
1353 basename_end++;
1354 while (basename_start > s
1355 #ifdef PHP_WIN32
1356 && *(basename_start-1) != '/'
1357 && *(basename_start-1) != '\\') {
1358
1359 if (*(basename_start-1) == ':' &&
1360 _is_basename_start(s, basename_start - 1)) {
1361 break;
1362 }
1363 #else
1364 && *(basename_start-1) != '/') {
1365 #endif
1366 basename_start--;
1367 }
1368 } else {
1369 /* State 0 is directly after a directory separator (or at the start of the string).
1370 * State 1 is everything else. */
1371 int state = 0;
1372
1373 basename_start = s;
1374 basename_end = s;
1375 while (len > 0) {
1376 int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
1377
1378 switch (inc_len) {
1379 case 0:
1380 goto quit_loop;
1381 case 1:
1382 #ifdef PHP_WIN32
1383 if (*s == '/' || *s == '\\') {
1384 #else
1385 if (*s == '/') {
1386 #endif
1387 if (state == 1) {
1388 state = 0;
1389 basename_end = s;
1390 }
1391 #ifdef PHP_WIN32
1392 /* Catch relative paths in c:file.txt style. They're not to confuse
1393 with the NTFS streams. This part ensures also, that no drive
1394 letter traversing happens. */
1395 } else if ((*s == ':' && (s - basename_start == 1))) {
1396 if (state == 0) {
1397 basename_start = s;
1398 state = 1;
1399 } else {
1400 basename_end = s;
1401 state = 0;
1402 }
1403 #endif
1404 } else {
1405 if (state == 0) {
1406 basename_start = s;
1407 state = 1;
1408 }
1409 }
1410 break;
1411 default:
1412 if (inc_len < 0) {
1413 /* If character is invalid, treat it like other non-significant characters. */
1414 inc_len = 1;
1415 php_mb_reset();
1416 }
1417 if (state == 0) {
1418 basename_start = s;
1419 state = 1;
1420 }
1421 break;
1422 }
1423 s += inc_len;
1424 len -= inc_len;
1425 }
1426
1427 quit_loop:
1428 if (state == 1) {
1429 basename_end = s;
1430 }
1431 }
1432
1433 if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
1434 memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
1435 basename_end -= suffix_len;
1436 }
1437
1438 return zend_string_init(basename_start, basename_end - basename_start, 0);
1439 }
1440 /* }}} */
1441
1442 /* {{{ Returns the filename component of the path */
1443 PHP_FUNCTION(basename)
1444 {
1445 char *string, *suffix = NULL;
1446 size_t string_len, suffix_len = 0;
1447
1448 ZEND_PARSE_PARAMETERS_START(1, 2)
1449 Z_PARAM_STRING(string, string_len)
1450 Z_PARAM_OPTIONAL
1451 Z_PARAM_STRING(suffix, suffix_len)
1452 ZEND_PARSE_PARAMETERS_END();
1453
1454 RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
1455 }
1456 /* }}} */
1457
1458 /* {{{ php_dirname
1459 Returns directory name component of path */
1460 PHPAPI size_t php_dirname(char *path, size_t len)
1461 {
1462 return zend_dirname(path, len);
1463 }
1464 /* }}} */
1465
1466 /* {{{ Returns the directory name component of the path */
1467 PHP_FUNCTION(dirname)
1468 {
1469 char *str;
1470 size_t str_len;
1471 zend_string *ret;
1472 zend_long levels = 1;
1473
1474 ZEND_PARSE_PARAMETERS_START(1, 2)
1475 Z_PARAM_STRING(str, str_len)
1476 Z_PARAM_OPTIONAL
1477 Z_PARAM_LONG(levels)
1478 ZEND_PARSE_PARAMETERS_END();
1479
1480 ret = zend_string_init(str, str_len, 0);
1481
1482 if (levels == 1) {
1483 /* Default case */
1484 #ifdef PHP_WIN32
1485 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len);
1486 #else
1487 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len);
1488 #endif
1489 } else if (levels < 1) {
1490 zend_argument_value_error(2, "must be greater than or equal to 1");
1491 zend_string_efree(ret);
1492 RETURN_THROWS();
1493 } else {
1494 /* Some levels up */
1495 do {
1496 #ifdef PHP_WIN32
1497 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1498 #else
1499 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1500 #endif
1501 } while (ZSTR_LEN(ret) < str_len && --levels);
1502 }
1503
1504 RETURN_NEW_STR(ret);
1505 }
1506 /* }}} */
1507
1508 /* {{{ Returns information about a certain string */
1509 PHP_FUNCTION(pathinfo)
1510 {
1511 zval tmp;
1512 char *path, *dirname;
1513 size_t path_len;
1514 bool have_basename;
1515 zend_long opt = PHP_PATHINFO_ALL;
1516 zend_string *ret = NULL;
1517
1518 ZEND_PARSE_PARAMETERS_START(1, 2)
1519 Z_PARAM_STRING(path, path_len)
1520 Z_PARAM_OPTIONAL
1521 Z_PARAM_LONG(opt)
1522 ZEND_PARSE_PARAMETERS_END();
1523
1524 have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
1525
1526 array_init(&tmp);
1527
1528 if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
1529 dirname = estrndup(path, path_len);
1530 php_dirname(dirname, path_len);
1531 if (*dirname) {
1532 add_assoc_string(&tmp, "dirname", dirname);
1533 }
1534 efree(dirname);
1535 }
1536
1537 if (have_basename) {
1538 ret = php_basename(path, path_len, NULL, 0);
1539 add_assoc_str(&tmp, "basename", zend_string_copy(ret));
1540 }
1541
1542 if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
1543 const char *p;
1544 ptrdiff_t idx;
1545
1546 if (!have_basename) {
1547 ret = php_basename(path, path_len, NULL, 0);
1548 }
1549
1550 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1551
1552 if (p) {
1553 idx = p - ZSTR_VAL(ret);
1554 add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
1555 }
1556 }
1557
1558 if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
1559 const char *p;
1560 ptrdiff_t idx;
1561
1562 /* Have we already looked up the basename? */
1563 if (!have_basename && !ret) {
1564 ret = php_basename(path, path_len, NULL, 0);
1565 }
1566
1567 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1568
1569 idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
1570 add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
1571 }
1572
1573 if (ret) {
1574 zend_string_release_ex(ret, 0);
1575 }
1576
1577 if (opt == PHP_PATHINFO_ALL) {
1578 RETURN_COPY_VALUE(&tmp);
1579 } else {
1580 zval *element;
1581 if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
1582 RETVAL_COPY_DEREF(element);
1583 } else {
1584 RETVAL_EMPTY_STRING();
1585 }
1586 zval_ptr_dtor(&tmp);
1587 }
1588 }
1589 /* }}} */
1590
1591 /* {{{ php_stristr
1592 case insensitive strstr */
1593 PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
1594 {
1595 return (char*)php_memnistr(s, t, t_len, s + s_len);
1596 }
1597 /* }}} */
1598
1599 /* {{{ php_strspn */
1600 PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1601 {
1602 const char *p = s1, *spanp;
1603 char c = *p;
1604
1605 cont:
1606 for (spanp = s2; p != s1_end && spanp != s2_end;) {
1607 if (*spanp++ == c) {
1608 c = *(++p);
1609 goto cont;
1610 }
1611 }
1612 return (p - s1);
1613 }
1614 /* }}} */
1615
1616 /* {{{ php_strcspn */
1617 PHPAPI size_t php_strcspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1618 {
1619 const char *p, *spanp;
1620 char c = *s1;
1621
1622 for (p = s1;;) {
1623 spanp = s2;
1624 do {
1625 if (*spanp == c || p == s1_end) {
1626 return p - s1;
1627 }
1628 } while (spanp++ < (s2_end - 1));
1629 c = *++p;
1630 }
1631 /* NOTREACHED */
1632 }
1633 /* }}} */
1634
1635 /* {{{ Finds first occurrence of a string within another, case insensitive */
1636 PHP_FUNCTION(stristr)
1637 {
1638 zend_string *haystack, *needle;
1639 const char *found = NULL;
1640 size_t found_offset;
1641 bool part = 0;
1642
1643 ZEND_PARSE_PARAMETERS_START(2, 3)
1644 Z_PARAM_STR(haystack)
1645 Z_PARAM_STR(needle)
1646 Z_PARAM_OPTIONAL
1647 Z_PARAM_BOOL(part)
1648 ZEND_PARSE_PARAMETERS_END();
1649
1650 found = php_stristr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle));
1651
1652 if (UNEXPECTED(!found)) {
1653 RETURN_FALSE;
1654 }
1655 found_offset = found - ZSTR_VAL(haystack);
1656 if (part) {
1657 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1658 }
1659 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1660 }
1661 /* }}} */
1662
1663 /* {{{ Finds first occurrence of a string within another */
1664 PHP_FUNCTION(strstr)
1665 {
1666 zend_string *haystack, *needle;
1667 const char *found = NULL;
1668 zend_long found_offset;
1669 bool part = 0;
1670
1671 ZEND_PARSE_PARAMETERS_START(2, 3)
1672 Z_PARAM_STR(haystack)
1673 Z_PARAM_STR(needle)
1674 Z_PARAM_OPTIONAL
1675 Z_PARAM_BOOL(part)
1676 ZEND_PARSE_PARAMETERS_END();
1677
1678 found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1679
1680 if (UNEXPECTED(!found)) {
1681 RETURN_FALSE;
1682 }
1683 found_offset = found - ZSTR_VAL(haystack);
1684 if (part) {
1685 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1686 }
1687 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1688 }
1689 /* }}} */
1690
1691 /* {{{ Checks if a string contains another */
1692 PHP_FUNCTION(str_contains)
1693 {
1694 zend_string *haystack, *needle;
1695
1696 ZEND_PARSE_PARAMETERS_START(2, 2)
1697 Z_PARAM_STR(haystack)
1698 Z_PARAM_STR(needle)
1699 ZEND_PARSE_PARAMETERS_END();
1700
1701 RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1702 }
1703 /* }}} */
1704
1705 /* {{{ Checks if haystack starts with needle */
1706 PHP_FUNCTION(str_starts_with)
1707 {
1708 zend_string *haystack, *needle;
1709
1710 ZEND_PARSE_PARAMETERS_START(2, 2)
1711 Z_PARAM_STR(haystack)
1712 Z_PARAM_STR(needle)
1713 ZEND_PARSE_PARAMETERS_END();
1714
1715 RETURN_BOOL(zend_string_starts_with(haystack, needle));
1716 }
1717 /* }}} */
1718
1719 /* {{{ Checks if haystack ends with needle */
1720 PHP_FUNCTION(str_ends_with)
1721 {
1722 zend_string *haystack, *needle;
1723
1724 ZEND_PARSE_PARAMETERS_START(2, 2)
1725 Z_PARAM_STR(haystack)
1726 Z_PARAM_STR(needle)
1727 ZEND_PARSE_PARAMETERS_END();
1728
1729 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1730 RETURN_FALSE;
1731 }
1732
1733 RETURN_BOOL(memcmp(
1734 ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
1735 ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1736 }
1737 /* }}} */
1738
1739 /* {{{ Finds position of first occurrence of a string within another */
1740 PHP_FUNCTION(strpos)
1741 {
1742 zend_string *haystack, *needle;
1743 const char *found = NULL;
1744 zend_long offset = 0;
1745
1746 ZEND_PARSE_PARAMETERS_START(2, 3)
1747 Z_PARAM_STR(haystack)
1748 Z_PARAM_STR(needle)
1749 Z_PARAM_OPTIONAL
1750 Z_PARAM_LONG(offset)
1751 ZEND_PARSE_PARAMETERS_END();
1752
1753 if (offset < 0) {
1754 offset += (zend_long)ZSTR_LEN(haystack);
1755 }
1756 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1757 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1758 RETURN_THROWS();
1759 }
1760
1761 found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
1762 ZSTR_VAL(needle), ZSTR_LEN(needle),
1763 ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1764
1765 if (UNEXPECTED(!found)) {
1766 RETURN_FALSE;
1767 }
1768 RETURN_LONG(found - ZSTR_VAL(haystack));
1769 }
1770 /* }}} */
1771
1772 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
1773 PHP_FUNCTION(stripos)
1774 {
1775 const char *found = NULL;
1776 zend_string *haystack, *needle;
1777 zend_long offset = 0;
1778
1779 ZEND_PARSE_PARAMETERS_START(2, 3)
1780 Z_PARAM_STR(haystack)
1781 Z_PARAM_STR(needle)
1782 Z_PARAM_OPTIONAL
1783 Z_PARAM_LONG(offset)
1784 ZEND_PARSE_PARAMETERS_END();
1785
1786 if (offset < 0) {
1787 offset += (zend_long)ZSTR_LEN(haystack);
1788 }
1789 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1790 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1791 RETURN_THROWS();
1792 }
1793
1794 found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset,
1795 ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1796
1797 if (UNEXPECTED(!found)) {
1798 RETURN_FALSE;
1799 }
1800 RETURN_LONG(found - ZSTR_VAL(haystack));
1801 }
1802 /* }}} */
1803
1804 /* {{{ Finds position of last occurrence of a string within another string */
1805 PHP_FUNCTION(strrpos)
1806 {
1807 zend_string *needle;
1808 zend_string *haystack;
1809 zend_long offset = 0;
1810 const char *p, *e, *found;
1811
1812 ZEND_PARSE_PARAMETERS_START(2, 3)
1813 Z_PARAM_STR(haystack)
1814 Z_PARAM_STR(needle)
1815 Z_PARAM_OPTIONAL
1816 Z_PARAM_LONG(offset)
1817 ZEND_PARSE_PARAMETERS_END();
1818
1819 if (offset >= 0) {
1820 if ((size_t)offset > ZSTR_LEN(haystack)) {
1821 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1822 RETURN_THROWS();
1823 }
1824 p = ZSTR_VAL(haystack) + (size_t)offset;
1825 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
1826 } else {
1827 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
1828 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1829 RETURN_THROWS();
1830 }
1831
1832 p = ZSTR_VAL(haystack);
1833 if ((size_t)-offset < ZSTR_LEN(needle)) {
1834 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
1835 } else {
1836 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
1837 }
1838 }
1839
1840 found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e);
1841
1842 if (UNEXPECTED(!found)) {
1843 RETURN_FALSE;
1844 }
1845 RETURN_LONG(found - ZSTR_VAL(haystack));
1846 }
1847 /* }}} */
1848
1849 /* {{{ Finds position of last occurrence of a string within another string */
1850 PHP_FUNCTION(strripos)
1851 {
1852 zend_string *needle;
1853 zend_string *haystack;
1854 zend_long offset = 0;
1855 const char *p, *e, *found;
1856 zend_string *needle_dup, *haystack_dup;
1857
1858 ZEND_PARSE_PARAMETERS_START(2, 3)
1859 Z_PARAM_STR(haystack)
1860 Z_PARAM_STR(needle)
1861 Z_PARAM_OPTIONAL
1862 Z_PARAM_LONG(offset)
1863 ZEND_PARSE_PARAMETERS_END();
1864
1865 if (ZSTR_LEN(needle) == 1) {
1866 /* Single character search can shortcut memcmps
1867 Can also avoid tolower emallocs */
1868 char lowered;
1869 if (offset >= 0) {
1870 if ((size_t)offset > ZSTR_LEN(haystack)) {
1871 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1872 RETURN_THROWS();
1873 }
1874 p = ZSTR_VAL(haystack) + (size_t)offset;
1875 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
1876 } else {
1877 p = ZSTR_VAL(haystack);
1878 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
1879 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1880 RETURN_THROWS();
1881 }
1882 e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
1883 }
1884 lowered = zend_tolower_ascii(*ZSTR_VAL(needle));
1885 while (e >= p) {
1886 if (zend_tolower_ascii(*e) == lowered) {
1887 RETURN_LONG(e - p + (offset > 0 ? offset : 0));
1888 }
1889 e--;
1890 }
1891 RETURN_FALSE;
1892 }
1893
1894 haystack_dup = zend_string_tolower(haystack);
1895 if (offset >= 0) {
1896 if ((size_t)offset > ZSTR_LEN(haystack)) {
1897 zend_string_release_ex(haystack_dup, 0);
1898 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1899 RETURN_THROWS();
1900 }
1901 p = ZSTR_VAL(haystack_dup) + offset;
1902 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
1903 } else {
1904 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
1905 zend_string_release_ex(haystack_dup, 0);
1906 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1907 RETURN_THROWS();
1908 }
1909
1910 p = ZSTR_VAL(haystack_dup);
1911 if ((size_t)-offset < ZSTR_LEN(needle)) {
1912 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
1913 } else {
1914 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
1915 }
1916 }
1917
1918 needle_dup = zend_string_tolower(needle);
1919 if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
1920 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
1921 zend_string_release_ex(needle_dup, 0);
1922 zend_string_release_ex(haystack_dup, 0);
1923 } else {
1924 zend_string_release_ex(needle_dup, 0);
1925 zend_string_release_ex(haystack_dup, 0);
1926 RETURN_FALSE;
1927 }
1928 }
1929 /* }}} */
1930
1931 /* {{{ Finds the last occurrence of a character in a string within another */
1932 PHP_FUNCTION(strrchr)
1933 {
1934 zend_string *haystack, *needle;
1935 const char *found = NULL;
1936 zend_long found_offset;
1937 bool part = 0;
1938
1939 ZEND_PARSE_PARAMETERS_START(2, 3)
1940 Z_PARAM_STR(haystack)
1941 Z_PARAM_STR(needle)
1942 Z_PARAM_OPTIONAL
1943 Z_PARAM_BOOL(part)
1944 ZEND_PARSE_PARAMETERS_END();
1945
1946 found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
1947 if (UNEXPECTED(!found)) {
1948 RETURN_FALSE;
1949 }
1950 found_offset = found - ZSTR_VAL(haystack);
1951 if (part) {
1952 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1953 }
1954 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1955 }
1956 /* }}} */
1957
1958 /* {{{ php_chunk_split */
1959 static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
1960 {
1961 char *q;
1962 const char *p;
1963 size_t chunks;
1964 size_t restlen;
1965 zend_string *dest;
1966
1967 chunks = srclen / chunklen;
1968 restlen = srclen - chunks * chunklen; /* srclen % chunklen */
1969 if (restlen) {
1970 /* We want chunks to be rounded up rather than rounded down.
1971 * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
1972 chunks++;
1973 }
1974
1975 dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
1976
1977 for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
1978 memcpy(q, p, chunklen);
1979 q += chunklen;
1980 memcpy(q, end, endlen);
1981 q += endlen;
1982 p += chunklen;
1983 }
1984
1985 if (restlen) {
1986 memcpy(q, p, restlen);
1987 q += restlen;
1988 memcpy(q, end, endlen);
1989 q += endlen;
1990 }
1991
1992 *q = '\0';
1993 ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
1994
1995 return dest;
1996 }
1997 /* }}} */
1998
1999 /* {{{ Returns split line */
2000 PHP_FUNCTION(chunk_split)
2001 {
2002 zend_string *str;
2003 char *end = "\r\n";
2004 size_t endlen = 2;
2005 zend_long chunklen = 76;
2006 zend_string *result;
2007
2008 ZEND_PARSE_PARAMETERS_START(1, 3)
2009 Z_PARAM_STR(str)
2010 Z_PARAM_OPTIONAL
2011 Z_PARAM_LONG(chunklen)
2012 Z_PARAM_STRING(end, endlen)
2013 ZEND_PARSE_PARAMETERS_END();
2014
2015 if (chunklen <= 0) {
2016 zend_argument_value_error(2, "must be greater than 0");
2017 RETURN_THROWS();
2018 }
2019
2020 if ((size_t)chunklen > ZSTR_LEN(str)) {
2021 /* to maintain BC, we must return original string + ending */
2022 result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
2023 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
2024 memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
2025 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2026 RETURN_NEW_STR(result);
2027 }
2028
2029 if (!ZSTR_LEN(str)) {
2030 RETURN_EMPTY_STRING();
2031 }
2032
2033 result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
2034
2035 RETURN_STR(result);
2036 }
2037 /* }}} */
2038
2039 /* {{{ Returns part of a string */
2040 PHP_FUNCTION(substr)
2041 {
2042 zend_string *str;
2043 zend_long l = 0, f;
2044 bool len_is_null = 1;
2045
2046 ZEND_PARSE_PARAMETERS_START(2, 3)
2047 Z_PARAM_STR(str)
2048 Z_PARAM_LONG(f)
2049 Z_PARAM_OPTIONAL
2050 Z_PARAM_LONG_OR_NULL(l, len_is_null)
2051 ZEND_PARSE_PARAMETERS_END();
2052
2053 if (f < 0) {
2054 /* if "from" position is negative, count start position from the end
2055 * of the string
2056 */
2057 if (-(size_t)f > ZSTR_LEN(str)) {
2058 f = 0;
2059 } else {
2060 f = (zend_long)ZSTR_LEN(str) + f;
2061 }
2062 } else if ((size_t)f > ZSTR_LEN(str)) {
2063 RETURN_EMPTY_STRING();
2064 }
2065
2066 if (!len_is_null) {
2067 if (l < 0) {
2068 /* if "length" position is negative, set it to the length
2069 * needed to stop that many chars from the end of the string
2070 */
2071 if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
2072 l = 0;
2073 } else {
2074 l = (zend_long)ZSTR_LEN(str) - f + l;
2075 }
2076 } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
2077 l = (zend_long)ZSTR_LEN(str) - f;
2078 }
2079 } else {
2080 l = (zend_long)ZSTR_LEN(str) - f;
2081 }
2082
2083 if (l == ZSTR_LEN(str)) {
2084 RETURN_STR_COPY(str);
2085 } else {
2086 RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
2087 }
2088 }
2089 /* }}} */
2090
2091 /* {{{ Replaces part of a string with another string */
2092 PHP_FUNCTION(substr_replace)
2093 {
2094 zend_string *str, *repl_str;
2095 HashTable *str_ht, *repl_ht;
2096 HashTable *from_ht;
2097 zend_long from_long;
2098 HashTable *len_ht = NULL;
2099 zend_long len_long;
2100 bool len_is_null = 1;
2101 zend_long l = 0;
2102 zend_long f;
2103 zend_string *result;
2104 HashPosition from_idx, repl_idx, len_idx;
2105 zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
2106
2107 ZEND_PARSE_PARAMETERS_START(3, 4)
2108 Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
2109 Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
2110 Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
2111 Z_PARAM_OPTIONAL
2112 Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
2113 ZEND_PARSE_PARAMETERS_END();
2114
2115 if (len_is_null) {
2116 if (str) {
2117 l = ZSTR_LEN(str);
2118 }
2119 } else if (!len_ht) {
2120 l = len_long;
2121 }
2122
2123 if (str) {
2124 if (from_ht) {
2125 zend_argument_type_error(3, "cannot be an array when working on a single string");
2126 RETURN_THROWS();
2127 }
2128 if (len_ht) {
2129 zend_argument_type_error(4, "cannot be an array when working on a single string");
2130 RETURN_THROWS();
2131 }
2132
2133 f = from_long;
2134
2135 /* if "from" position is negative, count start position from the end
2136 * of the string
2137 */
2138 if (f < 0) {
2139 f = (zend_long)ZSTR_LEN(str) + f;
2140 if (f < 0) {
2141 f = 0;
2142 }
2143 } else if ((size_t)f > ZSTR_LEN(str)) {
2144 f = ZSTR_LEN(str);
2145 }
2146 /* if "length" position is negative, set it to the length
2147 * needed to stop that many chars from the end of the string
2148 */
2149 if (l < 0) {
2150 l = ((zend_long)ZSTR_LEN(str) - f) + l;
2151 if (l < 0) {
2152 l = 0;
2153 }
2154 }
2155
2156 if ((size_t)l > ZSTR_LEN(str) || (l < 0 && (size_t)(-l) > ZSTR_LEN(str))) {
2157 l = ZSTR_LEN(str);
2158 }
2159
2160 if ((f + l) > (zend_long)ZSTR_LEN(str)) {
2161 l = ZSTR_LEN(str) - f;
2162 }
2163
2164 zend_string *tmp_repl_str = NULL;
2165 if (repl_ht) {
2166 repl_idx = 0;
2167 if (HT_IS_PACKED(repl_ht)) {
2168 while (repl_idx < repl_ht->nNumUsed) {
2169 tmp_repl = &repl_ht->arPacked[repl_idx];
2170 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2171 break;
2172 }
2173 repl_idx++;
2174 }
2175 } else {
2176 while (repl_idx < repl_ht->nNumUsed) {
2177 tmp_repl = &repl_ht->arData[repl_idx].val;
2178 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2179 break;
2180 }
2181 repl_idx++;
2182 }
2183 }
2184 if (repl_idx < repl_ht->nNumUsed) {
2185 repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2186 } else {
2187 repl_str = STR_EMPTY_ALLOC();
2188 }
2189 }
2190
2191 result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
2192
2193 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
2194 if (ZSTR_LEN(repl_str)) {
2195 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2196 }
2197 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
2198 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2199 zend_tmp_string_release(tmp_repl_str);
2200 RETURN_NEW_STR(result);
2201 } else { /* str is array of strings */
2202 zend_string *str_index = NULL;
2203 size_t result_len;
2204 zend_ulong num_index;
2205
2206 /* TODO
2207 if (!len_is_null && from_ht) {
2208 if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
2209 php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
2210 RETURN_STR_COPY(str);
2211 }
2212 }
2213 */
2214
2215 array_init(return_value);
2216
2217 from_idx = len_idx = repl_idx = 0;
2218
2219 ZEND_HASH_FOREACH_KEY_VAL(str_ht, num_index, str_index, tmp_str) {
2220 zend_string *tmp_orig_str;
2221 zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
2222
2223 if (from_ht) {
2224 if (HT_IS_PACKED(from_ht)) {
2225 while (from_idx < from_ht->nNumUsed) {
2226 tmp_from = &from_ht->arPacked[from_idx];
2227 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2228 break;
2229 }
2230 from_idx++;
2231 }
2232 } else {
2233 while (from_idx < from_ht->nNumUsed) {
2234 tmp_from = &from_ht->arData[from_idx].val;
2235 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2236 break;
2237 }
2238 from_idx++;
2239 }
2240 }
2241 if (from_idx < from_ht->nNumUsed) {
2242 f = zval_get_long(tmp_from);
2243
2244 if (f < 0) {
2245 f = (zend_long)ZSTR_LEN(orig_str) + f;
2246 if (f < 0) {
2247 f = 0;
2248 }
2249 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2250 f = ZSTR_LEN(orig_str);
2251 }
2252 from_idx++;
2253 } else {
2254 f = 0;
2255 }
2256 } else {
2257 f = from_long;
2258 if (f < 0) {
2259 f = (zend_long)ZSTR_LEN(orig_str) + f;
2260 if (f < 0) {
2261 f = 0;
2262 }
2263 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2264 f = ZSTR_LEN(orig_str);
2265 }
2266 }
2267
2268 if (len_ht) {
2269 if (HT_IS_PACKED(len_ht)) {
2270 while (len_idx < len_ht->nNumUsed) {
2271 tmp_len = &len_ht->arPacked[len_idx];
2272 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2273 break;
2274 }
2275 len_idx++;
2276 }
2277 } else {
2278 while (len_idx < len_ht->nNumUsed) {
2279 tmp_len = &len_ht->arData[len_idx].val;
2280 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2281 break;
2282 }
2283 len_idx++;
2284 }
2285 }
2286 if (len_idx < len_ht->nNumUsed) {
2287 l = zval_get_long(tmp_len);
2288 len_idx++;
2289 } else {
2290 l = ZSTR_LEN(orig_str);
2291 }
2292 } else if (!len_is_null) {
2293 l = len_long;
2294 } else {
2295 l = ZSTR_LEN(orig_str);
2296 }
2297
2298 if (l < 0) {
2299 l = (ZSTR_LEN(orig_str) - f) + l;
2300 if (l < 0) {
2301 l = 0;
2302 }
2303 }
2304
2305 ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
2306 ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
2307 if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
2308 l = ZSTR_LEN(orig_str) - f;
2309 }
2310
2311 result_len = ZSTR_LEN(orig_str) - l;
2312
2313 if (repl_ht) {
2314 if (HT_IS_PACKED(repl_ht)) {
2315 while (repl_idx < repl_ht->nNumUsed) {
2316 tmp_repl = &repl_ht->arPacked[repl_idx];
2317 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2318 break;
2319 }
2320 repl_idx++;
2321 }
2322 } else {
2323 while (repl_idx < repl_ht->nNumUsed) {
2324 tmp_repl = &repl_ht->arData[repl_idx].val;
2325 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2326 break;
2327 }
2328 repl_idx++;
2329 }
2330 }
2331 if (repl_idx < repl_ht->nNumUsed) {
2332 zend_string *tmp_repl_str;
2333 zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2334
2335 result_len += ZSTR_LEN(repl_str);
2336 repl_idx++;
2337 result = zend_string_safe_alloc(1, result_len, 0, 0);
2338
2339 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2340 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2341 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2342 zend_tmp_string_release(tmp_repl_str);
2343 } else {
2344 result = zend_string_safe_alloc(1, result_len, 0, 0);
2345
2346 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2347 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2348 }
2349 } else {
2350 result_len += ZSTR_LEN(repl_str);
2351
2352 result = zend_string_safe_alloc(1, result_len, 0, 0);
2353
2354 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2355 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2356 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2357 }
2358
2359 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2360
2361 if (str_index) {
2362 zval tmp;
2363
2364 ZVAL_NEW_STR(&tmp, result);
2365 zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
2366 } else {
2367 add_index_str(return_value, num_index, result);
2368 }
2369
2370 zend_tmp_string_release(tmp_orig_str);
2371 } ZEND_HASH_FOREACH_END();
2372 } /* if */
2373 }
2374 /* }}} */
2375
2376 /* {{{ Quotes meta characters */
2377 PHP_FUNCTION(quotemeta)
2378 {
2379 zend_string *old;
2380 const char *old_end, *p;
2381 char *q;
2382 char c;
2383 zend_string *str;
2384
2385 ZEND_PARSE_PARAMETERS_START(1, 1)
2386 Z_PARAM_STR(old)
2387 ZEND_PARSE_PARAMETERS_END();
2388
2389 old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
2390
2391 if (ZSTR_LEN(old) == 0) {
2392 RETURN_EMPTY_STRING();
2393 }
2394
2395 str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
2396
2397 for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
2398 c = *p;
2399 switch (c) {
2400 case '.':
2401 case '\\':
2402 case '+':
2403 case '*':
2404 case '?':
2405 case '[':
2406 case '^':
2407 case ']':
2408 case '$':
2409 case '(':
2410 case ')':
2411 *q++ = '\\';
2412 ZEND_FALLTHROUGH;
2413 default:
2414 *q++ = c;
2415 }
2416 }
2417
2418 *q = '\0';
2419
2420 RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
2421 }
2422 /* }}} */
2423
2424 /* {{{ Returns ASCII value of character
2425 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
2426 PHP_FUNCTION(ord)
2427 {
2428 zend_string *str;
2429
2430 ZEND_PARSE_PARAMETERS_START(1, 1)
2431 Z_PARAM_STR(str)
2432 ZEND_PARSE_PARAMETERS_END();
2433
2434 RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
2435 }
2436 /* }}} */
2437
2438 /* {{{ Converts ASCII code to a character
2439 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
2440 PHP_FUNCTION(chr)
2441 {
2442 zend_long c;
2443
2444 ZEND_PARSE_PARAMETERS_START(1, 1)
2445 Z_PARAM_LONG(c)
2446 ZEND_PARSE_PARAMETERS_END();
2447
2448 c &= 0xff;
2449 RETURN_CHAR(c);
2450 }
2451 /* }}} */
2452
2453 /* {{{ php_ucfirst
2454 Uppercase the first character of the word in a native string */
2455 static zend_string* php_ucfirst(zend_string *str)
2456 {
2457 const unsigned char ch = ZSTR_VAL(str)[0];
2458 unsigned char r = zend_toupper_ascii(ch);
2459 if (r == ch) {
2460 return zend_string_copy(str);
2461 } else {
2462 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2463 ZSTR_VAL(s)[0] = r;
2464 return s;
2465 }
2466 }
2467 /* }}} */
2468
2469 /* {{{ Makes a string's first character uppercase */
2470 PHP_FUNCTION(ucfirst)
2471 {
2472 zend_string *str;
2473
2474 ZEND_PARSE_PARAMETERS_START(1, 1)
2475 Z_PARAM_STR(str)
2476 ZEND_PARSE_PARAMETERS_END();
2477
2478 if (!ZSTR_LEN(str)) {
2479 RETURN_EMPTY_STRING();
2480 }
2481
2482 RETURN_STR(php_ucfirst(str));
2483 }
2484 /* }}} */
2485
2486 /* {{{
2487 Lowercase the first character of the word in a native string */
2488 static zend_string* php_lcfirst(zend_string *str)
2489 {
2490 unsigned char r = zend_tolower_ascii(ZSTR_VAL(str)[0]);
2491 if (r == ZSTR_VAL(str)[0]) {
2492 return zend_string_copy(str);
2493 } else {
2494 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2495 ZSTR_VAL(s)[0] = r;
2496 return s;
2497 }
2498 }
2499 /* }}} */
2500
2501 /* {{{ Make a string's first character lowercase */
2502 PHP_FUNCTION(lcfirst)
2503 {
2504 zend_string *str;
2505
2506 ZEND_PARSE_PARAMETERS_START(1, 1)
2507 Z_PARAM_STR(str)
2508 ZEND_PARSE_PARAMETERS_END();
2509
2510 if (!ZSTR_LEN(str)) {
2511 RETURN_EMPTY_STRING();
2512 }
2513
2514 RETURN_STR(php_lcfirst(str));
2515 }
2516 /* }}} */
2517
2518 /* {{{ Uppercase the first character of every word in a string */
2519 PHP_FUNCTION(ucwords)
2520 {
2521 zend_string *str;
2522 char *delims = " \t\r\n\f\v";
2523 char *r;
2524 const char *r_end;
2525 size_t delims_len = 6;
2526 char mask[256];
2527
2528 ZEND_PARSE_PARAMETERS_START(1, 2)
2529 Z_PARAM_STR(str)
2530 Z_PARAM_OPTIONAL
2531 Z_PARAM_STRING(delims, delims_len)
2532 ZEND_PARSE_PARAMETERS_END();
2533
2534 if (!ZSTR_LEN(str)) {
2535 RETURN_EMPTY_STRING();
2536 }
2537
2538 php_charmask((const unsigned char *) delims, delims_len, mask);
2539
2540 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
2541 r = Z_STRVAL_P(return_value);
2542
2543 *r = zend_toupper_ascii((unsigned char) *r);
2544 for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
2545 if (mask[(unsigned char)*r++]) {
2546 *r = zend_toupper_ascii((unsigned char) *r);
2547 }
2548 }
2549 }
2550 /* }}} */
2551
2552 /* {{{ php_strtr */
2553 PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
2554 {
2555 size_t i;
2556
2557 if (UNEXPECTED(trlen < 1)) {
2558 return str;
2559 } else if (trlen == 1) {
2560 char ch_from = *str_from;
2561 char ch_to = *str_to;
2562
2563 for (i = 0; i < len; i++) {
2564 if (str[i] == ch_from) {
2565 str[i] = ch_to;
2566 }
2567 }
2568 } else {
2569 unsigned char xlat[256];
2570
2571 memset(xlat, 0, sizeof(xlat));
2572
2573 for (i = 0; i < trlen; i++) {
2574 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2575 }
2576
2577 for (i = 0; i < len; i++) {
2578 str[i] += xlat[(size_t)(unsigned char) str[i]];
2579 }
2580 }
2581
2582 return str;
2583 }
2584 /* }}} */
2585
2586 /* {{{ php_strtr_ex */
2587 static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
2588 {
2589 zend_string *new_str = NULL;
2590 size_t i;
2591
2592 if (UNEXPECTED(trlen < 1)) {
2593 return zend_string_copy(str);
2594 } else if (trlen == 1) {
2595 char ch_from = *str_from;
2596 char ch_to = *str_to;
2597 char *output;
2598 char *input = ZSTR_VAL(str);
2599 size_t len = ZSTR_LEN(str);
2600
2601 #ifdef __SSE2__
2602 if (ZSTR_LEN(str) >= sizeof(__m128i)) {
2603 __m128i search = _mm_set1_epi8(ch_from);
2604 __m128i delta = _mm_set1_epi8(ch_to - ch_from);
2605
2606 do {
2607 __m128i src = _mm_loadu_si128((__m128i*)(input));
2608 __m128i mask = _mm_cmpeq_epi8(src, search);
2609 if (_mm_movemask_epi8(mask)) {
2610 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2611 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2612 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2613 _mm_storeu_si128((__m128i *)(output),
2614 _mm_add_epi8(src,
2615 _mm_and_si128(mask, delta)));
2616 input += sizeof(__m128i);
2617 output += sizeof(__m128i);
2618 len -= sizeof(__m128i);
2619 for (; len >= sizeof(__m128i); input += sizeof(__m128i), output += sizeof(__m128i), len -= sizeof(__m128i)) {
2620 src = _mm_loadu_si128((__m128i*)(input));
2621 mask = _mm_cmpeq_epi8(src, search);
2622 _mm_storeu_si128((__m128i *)(output),
2623 _mm_add_epi8(src,
2624 _mm_and_si128(mask, delta)));
2625 }
2626 for (; len > 0; input++, output++, len--) {
2627 *output = (*input == ch_from) ? ch_to : *input;
2628 }
2629 *output = 0;
2630 return new_str;
2631 }
2632 input += sizeof(__m128i);
2633 len -= sizeof(__m128i);
2634 } while (len >= sizeof(__m128i));
2635 }
2636 #endif
2637 for (; len > 0; input++, len--) {
2638 if (*input == ch_from) {
2639 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2640 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2641 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2642 *output = ch_to;
2643 input++;
2644 output++;
2645 len--;
2646 for (; len > 0; input++, output++, len--) {
2647 *output = (*input == ch_from) ? ch_to : *input;
2648 }
2649 *output = 0;
2650 return new_str;
2651 }
2652 }
2653 } else {
2654 unsigned char xlat[256];
2655
2656 memset(xlat, 0, sizeof(xlat));;
2657
2658 for (i = 0; i < trlen; i++) {
2659 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2660 }
2661
2662 for (i = 0; i < ZSTR_LEN(str); i++) {
2663 if (xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
2664 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2665 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2666 do {
2667 ZSTR_VAL(new_str)[i] = ZSTR_VAL(str)[i] + xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
2668 i++;
2669 } while (i < ZSTR_LEN(str));
2670 ZSTR_VAL(new_str)[i] = 0;
2671 return new_str;
2672 }
2673 }
2674 }
2675
2676 return zend_string_copy(str);
2677 }
2678 /* }}} */
2679
2680 /* {{{ php_strtr_array */
2681 static void php_strtr_array(zval *return_value, zend_string *input, HashTable *pats)
2682 {
2683 const char *str = ZSTR_VAL(input);
2684 size_t slen = ZSTR_LEN(input);
2685 zend_ulong num_key;
2686 zend_string *str_key;
2687 size_t len, pos, old_pos;
2688 bool has_num_keys = false;
2689 size_t minlen = 128*1024;
2690 size_t maxlen = 0;
2691 HashTable str_hash;
2692 zval *entry;
2693 const char *key;
2694 smart_str result = {0};
2695 zend_ulong bitset[256/sizeof(zend_ulong)];
2696 zend_ulong *num_bitset;
2697
2698 /* we will collect all possible key lengths */
2699 num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
2700 memset(bitset, 0, sizeof(bitset));
2701
2702 /* check if original array has numeric keys */
2703 ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
2704 if (UNEXPECTED(!str_key)) {
2705 has_num_keys = true;
2706 } else {
2707 len = ZSTR_LEN(str_key);
2708 if (UNEXPECTED(len == 0)) {
2709 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
2710 continue;
2711 } else if (UNEXPECTED(len > slen)) {
2712 /* skip long patterns */
2713 continue;
2714 }
2715 if (len > maxlen) {
2716 maxlen = len;
2717 }
2718 if (len < minlen) {
2719 minlen = len;
2720 }
2721 /* remember possible key length */
2722 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2723 bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
2724 }
2725 } ZEND_HASH_FOREACH_END();
2726
2727 if (UNEXPECTED(has_num_keys)) {
2728 zend_string *key_used;
2729 /* we have to rebuild HashTable with numeric keys */
2730 zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
2731 ZEND_HASH_FOREACH_KEY_VAL(pats, num_key, str_key, entry) {
2732 if (UNEXPECTED(!str_key)) {
2733 key_used = zend_long_to_str(num_key);
2734 len = ZSTR_LEN(key_used);
2735 if (UNEXPECTED(len > slen)) {
2736 /* skip long patterns */
2737 zend_string_release(key_used);
2738 continue;
2739 }
2740 if (len > maxlen) {
2741 maxlen = len;
2742 }
2743 if (len < minlen) {
2744 minlen = len;
2745 }
2746 /* remember possible key length */
2747 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2748 bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
2749 } else {
2750 key_used = str_key;
2751 len = ZSTR_LEN(key_used);
2752 if (UNEXPECTED(len > slen)) {
2753 /* skip long patterns */
2754 continue;
2755 }
2756 }
2757 zend_hash_add(&str_hash, key_used, entry);
2758 if (UNEXPECTED(!str_key)) {
2759 zend_string_release_ex(key_used, 0);
2760 }
2761 } ZEND_HASH_FOREACH_END();
2762 pats = &str_hash;
2763 }
2764
2765 if (UNEXPECTED(minlen > maxlen)) {
2766 /* return the original string */
2767 if (pats == &str_hash) {
2768 zend_hash_destroy(&str_hash);
2769 }
2770 efree(num_bitset);
2771 RETURN_STR_COPY(input);
2772 }
2773
2774 old_pos = pos = 0;
2775 while (pos <= slen - minlen) {
2776 key = str + pos;
2777 if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
2778 len = maxlen;
2779 if (len > slen - pos) {
2780 len = slen - pos;
2781 }
2782 while (len >= minlen) {
2783 if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
2784 entry = zend_hash_str_find(pats, key, len);
2785 if (entry != NULL) {
2786 zend_string *tmp;
2787 zend_string *s = zval_get_tmp_string(entry, &tmp);
2788 smart_str_appendl(&result, str + old_pos, pos - old_pos);
2789 smart_str_append(&result, s);
2790 old_pos = pos + len;
2791 pos = old_pos - 1;
2792 zend_tmp_string_release(tmp);
2793 break;
2794 }
2795 }
2796 len--;
2797 }
2798 }
2799 pos++;
2800 }
2801
2802 if (result.s) {
2803 smart_str_appendl(&result, str + old_pos, slen - old_pos);
2804 RETVAL_STR(smart_str_extract(&result));
2805 } else {
2806 smart_str_free(&result);
2807 RETVAL_STR_COPY(input);
2808 }
2809
2810 if (pats == &str_hash) {
2811 zend_hash_destroy(&str_hash);
2812 }
2813 efree(num_bitset);
2814 }
2815 /* }}} */
2816
2817 /* {{{ count_chars */
2818 static zend_always_inline zend_long count_chars(const char *p, zend_long length, char ch)
2819 {
2820 zend_long count = 0;
2821 const char *endp;
2822
2823 #ifdef __SSE2__
2824 if (length >= sizeof(__m128i)) {
2825 __m128i search = _mm_set1_epi8(ch);
2826
2827 do {
2828 __m128i src = _mm_loadu_si128((__m128i*)(p));
2829 uint32_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(src, search));
2830 // TODO: It would be great to use POPCNT, but it's available only with SSE4.1
2831 #if 1
2832 while (mask != 0) {
2833 count++;
2834 mask = mask & (mask - 1);
2835 }
2836 #else
2837 if (mask) {
2838 mask = mask - ((mask >> 1) & 0x5555);
2839 mask = (mask & 0x3333) + ((mask >> 2) & 0x3333);
2840 mask = (mask + (mask >> 4)) & 0x0F0F;
2841 mask = (mask + (mask >> 8)) & 0x00ff;
2842 count += mask;
2843 }
2844 #endif
2845 p += sizeof(__m128i);
2846 length -= sizeof(__m128i);
2847 } while (length >= sizeof(__m128i));
2848 }
2849 endp = p + length;
2850 while (p != endp) {
2851 count += (*p == ch);
2852 p++;
2853 }
2854 #else
2855 endp = p + length;
2856 while ((p = memchr(p, ch, endp-p))) {
2857 count++;
2858 p++;
2859 }
2860 #endif
2861 return count;
2862 }
2863 /* }}} */
2864
2865 /* {{{ php_char_to_str_ex */
2866 static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, bool case_sensitivity, zend_long *replace_count)
2867 {
2868 zend_string *result;
2869 size_t char_count;
2870 int lc_from = 0;
2871 const char *source, *source_end;
2872 char *target;
2873
2874 if (case_sensitivity) {
2875 char_count = count_chars(ZSTR_VAL(str), ZSTR_LEN(str), from);
2876 } else {
2877 char_count = 0;
2878 lc_from = zend_tolower_ascii(from);
2879 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
2880 for (source = ZSTR_VAL(str); source < source_end; source++) {
2881 if (zend_tolower_ascii(*source) == lc_from) {
2882 char_count++;
2883 }
2884 }
2885 }
2886
2887 if (char_count == 0) {
2888 return zend_string_copy(str);
2889 }
2890
2891 if (replace_count) {
2892 *replace_count += char_count;
2893 }
2894
2895 if (to_len > 0) {
2896 result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
2897 } else {
2898 result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
2899 }
2900 target = ZSTR_VAL(result);
2901
2902 if (case_sensitivity) {
2903 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
2904
2905 while ((p = memchr(p, from, (e - p)))) {
2906 memcpy(target, s, (p - s));
2907 target += p - s;
2908 memcpy(target, to, to_len);
2909 target += to_len;
2910 p++;
2911 s = p;
2912 if (--char_count == 0) break;
2913 }
2914 if (s < e) {
2915 memcpy(target, s, (e - s));
2916 target += e - s;
2917 }
2918 } else {
2919 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
2920 for (source = ZSTR_VAL(str); source < source_end; source++) {
2921 if (zend_tolower_ascii(*source) == lc_from) {
2922 memcpy(target, to, to_len);
2923 target += to_len;
2924 } else {
2925 *target = *source;
2926 target++;
2927 }
2928 }
2929 }
2930 *target = 0;
2931 return result;
2932 }
2933 /* }}} */
2934
2935 /* {{{ php_str_to_str_ex */
2936 static zend_string *php_str_to_str_ex(zend_string *haystack,
2937 const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
2938 {
2939
2940 if (needle_len < ZSTR_LEN(haystack)) {
2941 zend_string *new_str;
2942 const char *end;
2943 const char *p, *r;
2944 char *e;
2945
2946 if (needle_len == str_len) {
2947 new_str = NULL;
2948 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2949 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
2950 if (!new_str) {
2951 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
2952 }
2953 memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
2954 (*replace_count)++;
2955 }
2956 if (!new_str) {
2957 goto nothing_todo;
2958 }
2959 return new_str;
2960 } else {
2961 size_t count = 0;
2962 const char *o = ZSTR_VAL(haystack);
2963 const char *n = needle;
2964 const char *endp = o + ZSTR_LEN(haystack);
2965
2966 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
2967 o += needle_len;
2968 count++;
2969 }
2970 if (count == 0) {
2971 /* Needle doesn't occur, shortcircuit the actual replacement. */
2972 goto nothing_todo;
2973 }
2974 if (str_len > needle_len) {
2975 new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
2976 } else {
2977 new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
2978 }
2979
2980 e = ZSTR_VAL(new_str);
2981 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2982 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
2983 memcpy(e, p, r - p);
2984 e += r - p;
2985 memcpy(e, str, str_len);
2986 e += str_len;
2987 (*replace_count)++;
2988 }
2989
2990 if (p < end) {
2991 memcpy(e, p, end - p);
2992 e += end - p;
2993 }
2994
2995 *e = '\0';
2996 return new_str;
2997 }
2998 } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
2999 nothing_todo:
3000 return zend_string_copy(haystack);
3001 } else {
3002 (*replace_count)++;
3003 return zend_string_init_fast(str, str_len);
3004 }
3005 }
3006 /* }}} */
3007
3008 /* {{{ php_str_to_str_i_ex */
3009 static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
3010 zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
3011 {
3012 zend_string *new_str = NULL;
3013 zend_string *lc_needle;
3014
3015 if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
3016 const char *end;
3017 const char *p, *r;
3018 char *e;
3019
3020 if (ZSTR_LEN(needle) == str_len) {
3021 lc_needle = zend_string_tolower(needle);
3022 end = lc_haystack + ZSTR_LEN(haystack);
3023 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3024 if (!new_str) {
3025 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3026 }
3027 memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
3028 (*replace_count)++;
3029 }
3030 zend_string_release_ex(lc_needle, 0);
3031
3032 if (!new_str) {
3033 goto nothing_todo;
3034 }
3035 return new_str;
3036 } else {
3037 size_t count = 0;
3038 const char *o = lc_haystack;
3039 const char *n;
3040 const char *endp = o + ZSTR_LEN(haystack);
3041
3042 lc_needle = zend_string_tolower(needle);
3043 n = ZSTR_VAL(lc_needle);
3044
3045 while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
3046 o += ZSTR_LEN(lc_needle);
3047 count++;
3048 }
3049 if (count == 0) {
3050 /* Needle doesn't occur, shortcircuit the actual replacement. */
3051 zend_string_release_ex(lc_needle, 0);
3052 goto nothing_todo;
3053 }
3054
3055 if (str_len > ZSTR_LEN(lc_needle)) {
3056 new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
3057 } else {
3058 new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
3059 }
3060
3061 e = ZSTR_VAL(new_str);
3062 end = lc_haystack + ZSTR_LEN(haystack);
3063
3064 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3065 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
3066 e += r - p;
3067 memcpy(e, str, str_len);
3068 e += str_len;
3069 (*replace_count)++;
3070 }
3071
3072 if (p < end) {
3073 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
3074 e += end - p;
3075 }
3076 *e = '\0';
3077
3078 zend_string_release_ex(lc_needle, 0);
3079
3080 return new_str;
3081 }
3082 } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
3083 nothing_todo:
3084 return zend_string_copy(haystack);
3085 } else {
3086 lc_needle = zend_string_tolower(needle);
3087
3088 if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
3089 zend_string_release_ex(lc_needle, 0);
3090 goto nothing_todo;
3091 }
3092 zend_string_release_ex(lc_needle, 0);
3093
3094 new_str = zend_string_init(str, str_len, 0);
3095
3096 (*replace_count)++;
3097 return new_str;
3098 }
3099 }
3100 /* }}} */
3101
3102 /* {{{ php_str_to_str */
3103 PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
3104 {
3105 zend_string *new_str;
3106
3107 if (needle_len < length) {
3108 const char *end;
3109 const char *s, *p;
3110 char *e, *r;
3111
3112 if (needle_len == str_len) {
3113 new_str = zend_string_init(haystack, length, 0);
3114 end = ZSTR_VAL(new_str) + length;
3115 for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3116 memcpy(r, str, str_len);
3117 }
3118 return new_str;
3119 } else {
3120 if (str_len < needle_len) {
3121 new_str = zend_string_alloc(length, 0);
3122 } else {
3123 size_t count = 0;
3124 const char *o = haystack;
3125 const char *n = needle;
3126 const char *endp = o + length;
3127
3128 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3129 o += needle_len;
3130 count++;
3131 }
3132 if (count == 0) {
3133 /* Needle doesn't occur, shortcircuit the actual replacement. */
3134 new_str = zend_string_init(haystack, length, 0);
3135 return new_str;
3136 } else {
3137 if (str_len > needle_len) {
3138 new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
3139 } else {
3140 new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
3141 }
3142 }
3143 }
3144
3145 s = e = ZSTR_VAL(new_str);
3146 end = haystack + length;
3147 for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3148 memcpy(e, p, r - p);
3149 e += r - p;
3150 memcpy(e, str, str_len);
3151 e += str_len;
3152 }
3153
3154 if (p < end) {
3155 memcpy(e, p, end - p);
3156 e += end - p;
3157 }
3158
3159 *e = '\0';
3160 new_str = zend_string_truncate(new_str, e - s, 0);
3161 return new_str;
3162 }
3163 } else if (needle_len > length || memcmp(haystack, needle, length)) {
3164 new_str = zend_string_init(haystack, length, 0);
3165 return new_str;
3166 } else {
3167 new_str = zend_string_init(str, str_len, 0);
3168
3169 return new_str;
3170 }
3171 }
3172 /* }}} */
3173
3174 /* {{{ Translates characters in str using given translation tables */
3175 PHP_FUNCTION(strtr)
3176 {
3177 zend_string *str, *from_str = NULL;
3178 HashTable *from_ht = NULL;
3179 char *to = NULL;
3180 size_t to_len = 0;
3181
3182 ZEND_PARSE_PARAMETERS_START(2, 3)
3183 Z_PARAM_STR(str)
3184 Z_PARAM_ARRAY_HT_OR_STR(from_ht, from_str)
3185 Z_PARAM_OPTIONAL
3186 Z_PARAM_STRING_OR_NULL(to, to_len)
3187 ZEND_PARSE_PARAMETERS_END();
3188
3189 if (!to && from_ht == NULL) {
3190 zend_argument_type_error(2, "must be of type array, string given");
3191 RETURN_THROWS();
3192 } else if (to && from_str == NULL) {
3193 zend_argument_type_error(2, "must be of type string, array given");
3194 RETURN_THROWS();
3195 }
3196
3197 /* shortcut for empty string */
3198 if (ZSTR_LEN(str) == 0) {
3199 RETURN_EMPTY_STRING();
3200 }
3201
3202 if (!to) {
3203 if (zend_hash_num_elements(from_ht) < 1) {
3204 RETURN_STR_COPY(str);
3205 } else if (zend_hash_num_elements(from_ht) == 1) {
3206 zend_long num_key;
3207 zend_string *str_key, *tmp_str, *replace, *tmp_replace;
3208 zval *entry;
3209
3210 ZEND_HASH_FOREACH_KEY_VAL(from_ht, num_key, str_key, entry) {
3211 tmp_str = NULL;
3212 if (UNEXPECTED(!str_key)) {
3213 str_key = tmp_str = zend_long_to_str(num_key);
3214 }
3215 replace = zval_get_tmp_string(entry, &tmp_replace);
3216 if (ZSTR_LEN(str_key) < 1) {
3217 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
3218 RETVAL_STR_COPY(str);
3219 } else if (ZSTR_LEN(str_key) == 1) {
3220 RETVAL_STR(php_char_to_str_ex(str,
3221 ZSTR_VAL(str_key)[0],
3222 ZSTR_VAL(replace),
3223 ZSTR_LEN(replace),
3224 /* case_sensitive */ true,
3225 NULL));
3226 } else {
3227 zend_long dummy = 0;
3228 RETVAL_STR(php_str_to_str_ex(str,
3229 ZSTR_VAL(str_key), ZSTR_LEN(str_key),
3230 ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
3231 }
3232 zend_tmp_string_release(tmp_str);
3233 zend_tmp_string_release(tmp_replace);
3234 return;
3235 } ZEND_HASH_FOREACH_END();
3236 } else {
3237 php_strtr_array(return_value, str, from_ht);
3238 }
3239 } else {
3240 RETURN_STR(php_strtr_ex(str,
3241 ZSTR_VAL(from_str),
3242 to,
3243 MIN(ZSTR_LEN(from_str), to_len)));
3244 }
3245 }
3246 /* }}} */
3247
3248 /* {{{ Reverse a string */
3249 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3250 #include <tmmintrin.h>
3251 #elif defined(__aarch64__) || defined(_M_ARM64)
3252 #include <arm_neon.h>
3253 #endif
3254 PHP_FUNCTION(strrev)
3255 {
3256 zend_string *str;
3257 const char *s, *e;
3258 char *p;
3259 zend_string *n;
3260
3261 ZEND_PARSE_PARAMETERS_START(1, 1)
3262 Z_PARAM_STR(str)
3263 ZEND_PARSE_PARAMETERS_END();
3264
3265 n = zend_string_alloc(ZSTR_LEN(str), 0);
3266 p = ZSTR_VAL(n);
3267
3268 s = ZSTR_VAL(str);
3269 e = s + ZSTR_LEN(str);
3270 --e;
3271 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3272 if (e - s > 15) {
3273 const __m128i map = _mm_set_epi8(
3274 0, 1, 2, 3,
3275 4, 5, 6, 7,
3276 8, 9, 10, 11,
3277 12, 13, 14, 15);
3278 do {
3279 const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
3280 _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
3281 p += 16;
3282 e -= 16;
3283 } while (e - s > 15);
3284 }
3285 #elif defined(__aarch64__)
3286 if (e - s > 15) {
3287 do {
3288 const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
3289 /* Synthesize rev128 with a rev64 + ext. */
3290 const uint8x16_t rev = vrev64q_u8(str);
3291 const uint8x16_t ext = (uint8x16_t)
3292 vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
3293 vst1q_u8((uint8_t *)p, ext);
3294 p += 16;
3295 e -= 16;
3296 } while (e - s > 15);
3297 }
3298 #elif defined(_M_ARM64)
3299 if (e - s > 15) {
3300 do {
3301 const __n128 str = vld1q_u8((uint8_t *)(e - 15));
3302 /* Synthesize rev128 with a rev64 + ext. */
3303 /* strange force cast limit on windows: you cannot convert anything */
3304 const __n128 rev = vrev64q_u8(str);
3305 const __n128 ext = vextq_u64(rev, rev, 1);
3306 vst1q_u8((uint8_t *)p, ext);
3307 p += 16;
3308 e -= 16;
3309 } while (e - s > 15);
3310 }
3311 #endif
3312 while (e >= s) {
3313 *p++ = *e--;
3314 }
3315
3316 *p = '\0';
3317
3318 RETVAL_NEW_STR(n);
3319 }
3320 /* }}} */
3321
3322 /* {{{ php_similar_str */
3323 static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
3324 {
3325 const char *p, *q;
3326 const char *end1 = (char *) txt1 + len1;
3327 const char *end2 = (char *) txt2 + len2;
3328 size_t l;
3329
3330 *max = 0;
3331 *count = 0;
3332 for (p = (char *) txt1; p < end1; p++) {
3333 for (q = (char *) txt2; q < end2; q++) {
3334 for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
3335 if (l > *max) {
3336 *max = l;
3337 *count += 1;
3338 *pos1 = p - txt1;
3339 *pos2 = q - txt2;
3340 }
3341 }
3342 }
3343 }
3344 /* }}} */
3345
3346 /* {{{ php_similar_char */
3347 static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
3348 {
3349 size_t sum;
3350 size_t pos1 = 0, pos2 = 0, max, count;
3351
3352 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
3353 if ((sum = max)) {
3354 if (pos1 && pos2 && count > 1) {
3355 sum += php_similar_char(txt1, pos1,
3356 txt2, pos2);
3357 }
3358 if ((pos1 + max < len1) && (pos2 + max < len2)) {
3359 sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
3360 txt2 + pos2 + max, len2 - pos2 - max);
3361 }
3362 }
3363
3364 return sum;
3365 }
3366 /* }}} */
3367
3368 /* {{{ Calculates the similarity between two strings */
3369 PHP_FUNCTION(similar_text)
3370 {
3371 zend_string *t1, *t2;
3372 zval *percent = NULL;
3373 bool compute_percentage = ZEND_NUM_ARGS() >= 3;
3374 size_t sim;
3375
3376 ZEND_PARSE_PARAMETERS_START(2, 3)
3377 Z_PARAM_STR(t1)
3378 Z_PARAM_STR(t2)
3379 Z_PARAM_OPTIONAL
3380 Z_PARAM_ZVAL(percent)
3381 ZEND_PARSE_PARAMETERS_END();
3382
3383 if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
3384 if (compute_percentage) {
3385 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
3386 }
3387
3388 RETURN_LONG(0);
3389 }
3390
3391 sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
3392
3393 if (compute_percentage) {
3394 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
3395 }
3396
3397 RETURN_LONG(sim);
3398 }
3399 /* }}} */
3400
3401 /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
3402 PHP_FUNCTION(addcslashes)
3403 {
3404 zend_string *str, *what;
3405
3406 ZEND_PARSE_PARAMETERS_START(2, 2)
3407 Z_PARAM_STR(str)
3408 Z_PARAM_STR(what)
3409 ZEND_PARSE_PARAMETERS_END();
3410
3411 if (ZSTR_LEN(str) == 0) {
3412 RETURN_EMPTY_STRING();
3413 }
3414
3415 if (ZSTR_LEN(what) == 0) {
3416 RETURN_STR_COPY(str);
3417 }
3418
3419 RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
3420 }
3421 /* }}} */
3422
3423 /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
3424 PHP_FUNCTION(addslashes)
3425 {
3426 zend_string *str;
3427
3428 ZEND_PARSE_PARAMETERS_START(1, 1)
3429 Z_PARAM_STR(str)
3430 ZEND_PARSE_PARAMETERS_END();
3431
3432 if (ZSTR_LEN(str) == 0) {
3433 RETURN_EMPTY_STRING();
3434 }
3435
3436 RETURN_STR(php_addslashes(str));
3437 }
3438 /* }}} */
3439
3440 /* {{{ Strips backslashes from a string. Uses C-style conventions */
3441 PHP_FUNCTION(stripcslashes)
3442 {
3443 zend_string *str;
3444
3445 ZEND_PARSE_PARAMETERS_START(1, 1)
3446 Z_PARAM_STR(str)
3447 ZEND_PARSE_PARAMETERS_END();
3448
3449 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3450 php_stripcslashes(Z_STR_P(return_value));
3451 }
3452 /* }}} */
3453
3454 /* {{{ Strips backslashes from a string */
3455 PHP_FUNCTION(stripslashes)
3456 {
3457 zend_string *str;
3458
3459 ZEND_PARSE_PARAMETERS_START(1, 1)
3460 Z_PARAM_STR(str)
3461 ZEND_PARSE_PARAMETERS_END();
3462
3463 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3464 php_stripslashes(Z_STR_P(return_value));
3465 }
3466 /* }}} */
3467
3468 /* {{{ php_stripcslashes */
3469 PHPAPI void php_stripcslashes(zend_string *str)
3470 {
3471 const char *source, *end;
3472 char *target;
3473 size_t nlen = ZSTR_LEN(str), i;
3474 char numtmp[4];
3475
3476 for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
3477 if (*source == '\\' && source + 1 < end) {
3478 source++;
3479 switch (*source) {
3480 case 'n': *target++='\n'; nlen--; break;
3481 case 'r': *target++='\r'; nlen--; break;
3482 case 'a': *target++='\a'; nlen--; break;
3483 case 't': *target++='\t'; nlen--; break;
3484 case 'v': *target++='\v'; nlen--; break;
3485 case 'b': *target++='\b'; nlen--; break;
3486 case 'f': *target++='\f'; nlen--; break;
3487 case '\\': *target++='\\'; nlen--; break;
3488 case 'x':
3489 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3490 numtmp[0] = *++source;
3491 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3492 numtmp[1] = *++source;
3493 numtmp[2] = '\0';
3494 nlen-=3;
3495 } else {
3496 numtmp[1] = '\0';
3497 nlen-=2;
3498 }
3499 *target++=(char)strtol(numtmp, NULL, 16);
3500 break;
3501 }
3502 ZEND_FALLTHROUGH;
3503 default:
3504 i=0;
3505 while (source < end && *source >= '0' && *source <= '7' && i<3) {
3506 numtmp[i++] = *source++;
3507 }
3508 if (i) {
3509 numtmp[i]='\0';
3510 *target++=(char)strtol(numtmp, NULL, 8);
3511 nlen-=i;
3512 source--;
3513 } else {
3514 *target++=*source;
3515 nlen--;
3516 }
3517 }
3518 } else {
3519 *target++=*source;
3520 }
3521 }
3522
3523 if (nlen != 0) {
3524 *target='\0';
3525 }
3526
3527 ZSTR_LEN(str) = nlen;
3528 }
3529 /* }}} */
3530
3531 /* {{{ php_addcslashes_str */
3532 PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
3533 {
3534 char flags[256];
3535 char *target;
3536 const char *source, *end;
3537 char c;
3538 size_t newlen;
3539 zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
3540
3541 php_charmask((const unsigned char *) what, wlength, flags);
3542
3543 for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
3544 c = *source;
3545 if (flags[(unsigned char)c]) {
3546 if ((unsigned char) c < 32 || (unsigned char) c > 126) {
3547 *target++ = '\\';
3548 switch (c) {
3549 case '\n': *target++ = 'n'; break;
3550 case '\t': *target++ = 't'; break;
3551 case '\r': *target++ = 'r'; break;
3552 case '\a': *target++ = 'a'; break;
3553 case '\v': *target++ = 'v'; break;
3554 case '\b': *target++ = 'b'; break;
3555 case '\f': *target++ = 'f'; break;
3556 default: target += sprintf(target, "%03o", (unsigned char) c);
3557 }
3558 continue;
3559 }
3560 *target++ = '\\';
3561 }
3562 *target++ = c;
3563 }
3564 *target = 0;
3565 newlen = target - ZSTR_VAL(new_str);
3566 if (newlen < len * 4) {
3567 new_str = zend_string_truncate(new_str, newlen, 0);
3568 }
3569 return new_str;
3570 }
3571 /* }}} */
3572
3573 /* {{{ php_addcslashes */
3574 PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
3575 {
3576 return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
3577 }
3578 /* }}} */
3579
3580 /* {{{ php_addslashes */
3581
3582 #ifdef ZEND_INTRIN_SSE4_2_NATIVE
3583 # include <nmmintrin.h>
3584 # include "Zend/zend_bitset.h"
3585 #elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3586 # include <nmmintrin.h>
3587 # include "Zend/zend_bitset.h"
3588 # include "Zend/zend_cpuinfo.h"
3589
3590 ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
3591 zend_string *php_addslashes_default(zend_string *str);
3592
3593 # ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO
3594 PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
3595
3596 typedef zend_string *(*php_addslashes_func_t)(zend_string *);
3597
3598 ZEND_NO_SANITIZE_ADDRESS
3599 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3600 static php_addslashes_func_t resolve_addslashes(void) {
3601 if (zend_cpu_supports_sse42()) {
3602 return php_addslashes_sse42;
3603 }
3604 return php_addslashes_default;
3605 }
3606 # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
3607
3608 static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
3609
3610 PHPAPI zend_string *php_addslashes(zend_string *str) {
3611 return php_addslashes_ptr(str);
3612 }
3613
3614 /* {{{ PHP_MINIT_FUNCTION */
3615 PHP_MINIT_FUNCTION(string_intrin)
3616 {
3617 if (zend_cpu_supports_sse42()) {
3618 php_addslashes_ptr = php_addslashes_sse42;
3619 } else {
3620 php_addslashes_ptr = php_addslashes_default;
3621 }
3622 return SUCCESS;
3623 }
3624 /* }}} */
3625 # endif
3626 #endif
3627
3628 #if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3629 # ifdef ZEND_INTRIN_SSE4_2_NATIVE
3630 PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
3631 # elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3632 zend_string *php_addslashes_sse42(zend_string *str)
3633 # endif
3634 {
3635 ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
3636 __m128i w128, s128;
3637 uint32_t res = 0;
3638 /* maximum string length, worst case situation */
3639 char *target;
3640 const char *source, *end;
3641 size_t offset;
3642 zend_string *new_str;
3643
3644 if (!str) {
3645 return ZSTR_EMPTY_ALLOC();
3646 }
3647
3648 source = ZSTR_VAL(str);
3649 end = source + ZSTR_LEN(str);
3650
3651 if (ZSTR_LEN(str) > 15) {
3652 w128 = _mm_load_si128((__m128i *)slashchars);
3653 do {
3654 s128 = _mm_loadu_si128((__m128i *)source);
3655 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3656 if (res) {
3657 goto do_escape;
3658 }
3659 source += 16;
3660 } while ((end - source) > 15);
3661 }
3662
3663 while (source < end) {
3664 switch (*source) {
3665 case '\0':
3666 case '\'':
3667 case '\"':
3668 case '\\':
3669 goto do_escape;
3670 default:
3671 source++;
3672 break;
3673 }
3674 }
3675
3676 return zend_string_copy(str);
3677
3678 do_escape:
3679 offset = source - (char *)ZSTR_VAL(str);
3680 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3681 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3682 target = ZSTR_VAL(new_str) + offset;
3683
3684 if (res) {
3685 int pos = 0;
3686 do {
3687 int i, n = zend_ulong_ntz(res);
3688 for (i = 0; i < n; i++) {
3689 *target++ = source[pos + i];
3690 }
3691 pos += n;
3692 *target++ = '\\';
3693 if (source[pos] == '\0') {
3694 *target++ = '0';
3695 } else {
3696 *target++ = source[pos];
3697 }
3698 pos++;
3699 res = res >> (n + 1);
3700 } while (res);
3701
3702 for (; pos < 16; pos++) {
3703 *target++ = source[pos];
3704 }
3705 source += 16;
3706 } else if (end - source > 15) {
3707 w128 = _mm_load_si128((__m128i *)slashchars);
3708 }
3709
3710 for (; end - source > 15; source += 16) {
3711 int pos = 0;
3712 s128 = _mm_loadu_si128((__m128i *)source);
3713 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3714 if (res) {
3715 do {
3716 int i, n = zend_ulong_ntz(res);
3717 for (i = 0; i < n; i++) {
3718 *target++ = source[pos + i];
3719 }
3720 pos += n;
3721 *target++ = '\\';
3722 if (source[pos] == '\0') {
3723 *target++ = '0';
3724 } else {
3725 *target++ = source[pos];
3726 }
3727 pos++;
3728 res = res >> (n + 1);
3729 } while (res);
3730 for (; pos < 16; pos++) {
3731 *target++ = source[pos];
3732 }
3733 } else {
3734 _mm_storeu_si128((__m128i*)target, s128);
3735 target += 16;
3736 }
3737 }
3738
3739 while (source < end) {
3740 switch (*source) {
3741 case '\0':
3742 *target++ = '\\';
3743 *target++ = '0';
3744 break;
3745 case '\'':
3746 case '\"':
3747 case '\\':
3748 *target++ = '\\';
3749 ZEND_FALLTHROUGH;
3750 default:
3751 *target++ = *source;
3752 break;
3753 }
3754 source++;
3755 }
3756
3757 *target = '\0';
3758
3759 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3760 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3761 } else {
3762 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3763 }
3764
3765 return new_str;
3766 }
3767 /* }}} */
3768 #endif
3769
3770 #if defined(__aarch64__) || defined(_M_ARM64)
3771 typedef union {
3772 uint8_t mem[16];
3773 uint64_t dw[2];
3774 } quad_word;
3775
3776 static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
3777 uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
3778 uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
3779 uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
3780 uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
3781 uint8x16_t s01 = vorrq_u8(s0, s1);
3782 uint8x16_t s23 = vorrq_u8(s2, s3);
3783 uint8x16_t s0123 = vorrq_u8(s01, s23);
3784 quad_word qw;
3785 vst1q_u8(qw.mem, s0123);
3786 return qw;
3787 }
3788
3789 static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
3790 {
3791 for (int i = 0; i < 16; i++) {
3792 char s = source[i];
3793 if (res.mem[i] == 0)
3794 *target++ = s;
3795 else {
3796 *target++ = '\\';
3797 if (s == '\0')
3798 *target++ = '0';
3799 else
3800 *target++ = s;
3801 }
3802 }
3803 return target;
3804 }
3805 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
3806
3807 #ifndef ZEND_INTRIN_SSE4_2_NATIVE
3808 # ifdef ZEND_INTRIN_SSE4_2_RESOLVER
3809 zend_string *php_addslashes_default(zend_string *str) /* {{{ */
3810 # else
3811 PHPAPI zend_string *php_addslashes(zend_string *str)
3812 # endif
3813 {
3814 /* maximum string length, worst case situation */
3815 char *target;
3816 const char *source, *end;
3817 size_t offset;
3818 zend_string *new_str;
3819
3820 if (!str) {
3821 return ZSTR_EMPTY_ALLOC();
3822 }
3823
3824 source = ZSTR_VAL(str);
3825 end = source + ZSTR_LEN(str);
3826
3827 # if defined(__aarch64__) || defined(_M_ARM64)
3828 quad_word res = {0};
3829 if (ZSTR_LEN(str) > 15) {
3830 do {
3831 res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
3832 if (res.dw[0] | res.dw[1])
3833 goto do_escape;
3834 source += 16;
3835 } while ((end - source) > 15);
3836 }
3837 /* Finish the last 15 bytes or less with the scalar loop. */
3838 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
3839
3840 while (source < end) {
3841 switch (*source) {
3842 case '\0':
3843 case '\'':
3844 case '\"':
3845 case '\\':
3846 goto do_escape;
3847 default:
3848 source++;
3849 break;
3850 }
3851 }
3852
3853 return zend_string_copy(str);
3854
3855 do_escape:
3856 offset = source - (char *)ZSTR_VAL(str);
3857 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3858 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3859 target = ZSTR_VAL(new_str) + offset;
3860
3861 # if defined(__aarch64__) || defined(_M_ARM64)
3862 if (res.dw[0] | res.dw[1]) {
3863 target = aarch64_add_slashes(res, source, target);
3864 source += 16;
3865 }
3866 for (; end - source > 15; source += 16) {
3867 uint8x16_t x = vld1q_u8((uint8_t *)source);
3868 res = aarch64_contains_slash_chars(x);
3869 if (res.dw[0] | res.dw[1]) {
3870 target = aarch64_add_slashes(res, source, target);
3871 } else {
3872 vst1q_u8((uint8_t*)target, x);
3873 target += 16;
3874 }
3875 }
3876 /* Finish the last 15 bytes or less with the scalar loop. */
3877 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
3878
3879 while (source < end) {
3880 switch (*source) {
3881 case '\0':
3882 *target++ = '\\';
3883 *target++ = '0';
3884 break;
3885 case '\'':
3886 case '\"':
3887 case '\\':
3888 *target++ = '\\';
3889 ZEND_FALLTHROUGH;
3890 default:
3891 *target++ = *source;
3892 break;
3893 }
3894 source++;
3895 }
3896
3897 *target = '\0';
3898
3899 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3900 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3901 } else {
3902 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3903 }
3904
3905 return new_str;
3906 }
3907 #endif
3908 /* }}} */
3909 /* }}} */
3910
3911 /* {{{ php_stripslashes
3912 *
3913 * be careful, this edits the string in-place */
3914 static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
3915 {
3916 #if defined(__aarch64__) || defined(_M_ARM64)
3917 while (len > 15) {
3918 uint8x16_t x = vld1q_u8((uint8_t *)str);
3919 quad_word q;
3920 vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
3921 if (q.dw[0] | q.dw[1]) {
3922 unsigned int i = 0;
3923 while (i < 16) {
3924 if (q.mem[i] == 0) {
3925 *out++ = str[i];
3926 i++;
3927 continue;
3928 }
3929
3930 i++; /* skip the slash */
3931 if (i < len) {
3932 char s = str[i];
3933 if (s == '0')
3934 *out++ = '\0';
3935 else
3936 *out++ = s; /* preserve the next character */
3937 i++;
3938 }
3939 }
3940 str += i;
3941 len -= i;
3942 } else {
3943 vst1q_u8((uint8_t*)out, x);
3944 out += 16;
3945 str += 16;
3946 len -= 16;
3947 }
3948 }
3949 /* Finish the last 15 bytes or less with the scalar loop. */
3950 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
3951 while (len > 0) {
3952 if (*str == '\\') {
3953 str++; /* skip the slash */
3954 len--;
3955 if (len > 0) {
3956 if (*str == '0') {
3957 *out++='\0';
3958 str++;
3959 } else {
3960 *out++ = *str++; /* preserve the next character */
3961 }
3962 len--;
3963 }
3964 } else {
3965 *out++ = *str++;
3966 len--;
3967 }
3968 }
3969
3970 return out;
3971 }
3972
3973 #ifdef __SSE2__
3974 PHPAPI void php_stripslashes(zend_string *str)
3975 {
3976 const char *s = ZSTR_VAL(str);
3977 char *t = ZSTR_VAL(str);
3978 size_t l = ZSTR_LEN(str);
3979
3980 if (l > 15) {
3981 const __m128i slash = _mm_set1_epi8('\\');
3982
3983 do {
3984 __m128i in = _mm_loadu_si128((__m128i *)s);
3985 __m128i any_slash = _mm_cmpeq_epi8(in, slash);
3986 uint32_t res = _mm_movemask_epi8(any_slash);
3987
3988 if (res) {
3989 int i, n = zend_ulong_ntz(res);
3990 const char *e = s + 15;
3991 l -= n;
3992 for (i = 0; i < n; i++) {
3993 *t++ = *s++;
3994 }
3995 for (; s < e; s++) {
3996 if (*s == '\\') {
3997 s++;
3998 l--;
3999 if (*s == '0') {
4000 *t = '\0';
4001 } else {
4002 *t = *s;
4003 }
4004 } else {
4005 *t = *s;
4006 }
4007 t++;
4008 l--;
4009 }
4010 } else {
4011 _mm_storeu_si128((__m128i *)t, in);
4012 s += 16;
4013 t += 16;
4014 l -= 16;
4015 }
4016 } while (l > 15);
4017 }
4018
4019 t = php_stripslashes_impl(s, t, l);
4020 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4021 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4022 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4023 }
4024 }
4025 #else
4026 PHPAPI void php_stripslashes(zend_string *str)
4027 {
4028 const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
4029 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4030 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4031 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4032 }
4033 }
4034 #endif
4035 /* }}} */
4036
4037 #define _HEB_BLOCK_TYPE_ENG 1
4038 #define _HEB_BLOCK_TYPE_HEB 2
4039 #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
4040 #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
4041 #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
4042
4043 /* {{{ php_str_replace_in_subject */
4044 static zend_long php_str_replace_in_subject(
4045 zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
4046 zend_string *subject_str, zval *result, bool case_sensitivity
4047 ) {
4048 zval *search_entry;
4049 zend_string *tmp_result;
4050 char *replace_value = NULL;
4051 size_t replace_len = 0;
4052 zend_long replace_count = 0;
4053 zend_string *lc_subject_str = NULL;
4054 uint32_t replace_idx;
4055
4056 if (ZSTR_LEN(subject_str) == 0) {
4057 ZVAL_EMPTY_STRING(result);
4058 return 0;
4059 }
4060
4061 /* If search is an array */
4062 if (search_ht) {
4063 /* Duplicate subject string for repeated replacement */
4064 zend_string_addref(subject_str);
4065
4066 if (replace_ht) {
4067 replace_idx = 0;
4068 } else {
4069 /* Set replacement value to the passed one */
4070 replace_value = ZSTR_VAL(replace_str);
4071 replace_len = ZSTR_LEN(replace_str);
4072 }
4073
4074 /* For each entry in the search array, get the entry */
4075 ZEND_HASH_FOREACH_VAL(search_ht, search_entry) {
4076 /* Make sure we're dealing with strings. */
4077 zend_string *tmp_search_str;
4078 zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
4079 zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
4080
4081 /* If replace is an array. */
4082 if (replace_ht) {
4083 /* Get current entry */
4084 zval *replace_entry = NULL;
4085 if (HT_IS_PACKED(replace_ht)) {
4086 while (replace_idx < replace_ht->nNumUsed) {
4087 replace_entry = &replace_ht->arPacked[replace_idx];
4088 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4089 break;
4090 }
4091 replace_idx++;
4092 }
4093 } else {
4094 while (replace_idx < replace_ht->nNumUsed) {
4095 replace_entry = &replace_ht->arData[replace_idx].val;
4096 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4097 break;
4098 }
4099 replace_idx++;
4100 }
4101 }
4102 if (replace_idx < replace_ht->nNumUsed) {
4103 /* Make sure we're dealing with strings. */
4104 replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
4105
4106 /* Set replacement value to the one we got from array */
4107 replace_value = ZSTR_VAL(replace_entry_str);
4108 replace_len = ZSTR_LEN(replace_entry_str);
4109
4110 replace_idx++;
4111 } else {
4112 /* We've run out of replacement strings, so use an empty one. */
4113 replace_value = "";
4114 replace_len = 0;
4115 }
4116 }
4117
4118 if (ZSTR_LEN(search_str) == 1) {
4119 zend_long old_replace_count = replace_count;
4120
4121 tmp_result = php_char_to_str_ex(subject_str,
4122 ZSTR_VAL(search_str)[0],
4123 replace_value,
4124 replace_len,
4125 case_sensitivity,
4126 &replace_count);
4127 if (lc_subject_str && replace_count != old_replace_count) {
4128 zend_string_release_ex(lc_subject_str, 0);
4129 lc_subject_str = NULL;
4130 }
4131 } else if (ZSTR_LEN(search_str) > 1) {
4132 if (case_sensitivity) {
4133 tmp_result = php_str_to_str_ex(subject_str,
4134 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4135 replace_value, replace_len, &replace_count);
4136 } else {
4137 zend_long old_replace_count = replace_count;
4138
4139 if (!lc_subject_str) {
4140 lc_subject_str = zend_string_tolower(subject_str);
4141 }
4142 tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4143 search_str, replace_value, replace_len, &replace_count);
4144 if (replace_count != old_replace_count) {
4145 zend_string_release_ex(lc_subject_str, 0);
4146 lc_subject_str = NULL;
4147 }
4148 }
4149 } else {
4150 zend_tmp_string_release(tmp_search_str);
4151 zend_tmp_string_release(tmp_replace_entry_str);
4152 continue;
4153 }
4154
4155 zend_tmp_string_release(tmp_search_str);
4156 zend_tmp_string_release(tmp_replace_entry_str);
4157
4158 if (subject_str == tmp_result) {
4159 zend_string_delref(subject_str);
4160 } else {
4161 zend_string_release_ex(subject_str, 0);
4162 subject_str = tmp_result;
4163 if (ZSTR_LEN(subject_str) == 0) {
4164 zend_string_release_ex(subject_str, 0);
4165 ZVAL_EMPTY_STRING(result);
4166 if (lc_subject_str) {
4167 zend_string_release_ex(lc_subject_str, 0);
4168 }
4169 return replace_count;
4170 }
4171 }
4172 } ZEND_HASH_FOREACH_END();
4173 ZVAL_STR(result, subject_str);
4174 if (lc_subject_str) {
4175 zend_string_release_ex(lc_subject_str, 0);
4176 }
4177 } else {
4178 ZEND_ASSERT(search_str);
4179 if (ZSTR_LEN(search_str) == 1) {
4180 ZVAL_STR(result,
4181 php_char_to_str_ex(subject_str,
4182 ZSTR_VAL(search_str)[0],
4183 ZSTR_VAL(replace_str),
4184 ZSTR_LEN(replace_str),
4185 case_sensitivity,
4186 &replace_count));
4187 } else if (ZSTR_LEN(search_str) > 1) {
4188 if (case_sensitivity) {
4189 ZVAL_STR(result, php_str_to_str_ex(subject_str,
4190 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4191 ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4192 } else {
4193 lc_subject_str = zend_string_tolower(subject_str);
4194 ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4195 search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4196 zend_string_release_ex(lc_subject_str, 0);
4197 }
4198 } else {
4199 ZVAL_STR_COPY(result, subject_str);
4200 }
4201 }
4202 return replace_count;
4203 }
4204 /* }}} */
4205
4206 /* {{{ php_str_replace_common */
4207 static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool case_sensitivity)
4208 {
4209 zend_string *search_str;
4210 HashTable *search_ht;
4211 zend_string *replace_str;
4212 HashTable *replace_ht;
4213 zend_string *subject_str;
4214 HashTable *subject_ht;
4215 zval *subject_entry, *zcount = NULL;
4216 zval result;
4217 zend_string *string_key;
4218 zend_ulong num_key;
4219 zend_long count = 0;
4220
4221 ZEND_PARSE_PARAMETERS_START(3, 4)
4222 Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
4223 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
4224 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
4225 Z_PARAM_OPTIONAL
4226 Z_PARAM_ZVAL(zcount)
4227 ZEND_PARSE_PARAMETERS_END();
4228
4229 /* Make sure we're dealing with strings and do the replacement. */
4230 if (search_str && replace_ht) {
4231 zend_argument_type_error(2, "must be of type string when argument #1 ($search) is a string");
4232 RETURN_THROWS();
4233 }
4234
4235 /* if subject is an array */
4236 if (subject_ht) {
4237 array_init(return_value);
4238
4239 /* For each subject entry, convert it to string, then perform replacement
4240 and add the result to the return_value array. */
4241 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
4242 zend_string *tmp_subject_str;
4243 ZVAL_DEREF(subject_entry);
4244 subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
4245 count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
4246 zend_tmp_string_release(tmp_subject_str);
4247
4248 /* Add to return array */
4249 if (string_key) {
4250 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
4251 } else {
4252 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
4253 }
4254 } ZEND_HASH_FOREACH_END();
4255 } else { /* if subject is not an array */
4256 count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
4257 }
4258 if (zcount) {
4259 ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
4260 }
4261 }
4262 /* }}} */
4263
4264 /* {{{ Replaces all occurrences of search in haystack with replace */
4265 PHP_FUNCTION(str_replace)
4266 {
4267 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4268 }
4269 /* }}} */
4270
4271 /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
4272 PHP_FUNCTION(str_ireplace)
4273 {
4274 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4275 }
4276 /* }}} */
4277
4278 /* {{{ Converts logical Hebrew text to visual text */
4279 PHP_FUNCTION(hebrev)
4280 {
4281 char *str, *heb_str, *target;
4282 const char *tmp;
4283 size_t block_start, block_end, block_type, i;
4284 zend_long max_chars=0, char_count;
4285 size_t begin, end, orig_begin;
4286 size_t str_len;
4287 zend_string *broken_str;
4288
4289 ZEND_PARSE_PARAMETERS_START(1, 2)
4290 Z_PARAM_STRING(str, str_len)
4291 Z_PARAM_OPTIONAL
4292 Z_PARAM_LONG(max_chars)
4293 ZEND_PARSE_PARAMETERS_END();
4294
4295 if (str_len == 0) {
4296 RETURN_EMPTY_STRING();
4297 }
4298
4299 tmp = str;
4300 block_start=block_end=0;
4301
4302 heb_str = (char *) emalloc(str_len+1);
4303 target = heb_str+str_len;
4304 *target = 0;
4305 target--;
4306
4307 if (isheb(*tmp)) {
4308 block_type = _HEB_BLOCK_TYPE_HEB;
4309 } else {
4310 block_type = _HEB_BLOCK_TYPE_ENG;
4311 }
4312
4313 do {
4314 if (block_type == _HEB_BLOCK_TYPE_HEB) {
4315 while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
4316 tmp++;
4317 block_end++;
4318 }
4319 for (i = block_start+1; i<= block_end+1; i++) {
4320 *target = str[i-1];
4321 switch (*target) {
4322 case '(':
4323 *target = ')';
4324 break;
4325 case ')':
4326 *target = '(';
4327 break;
4328 case '[':
4329 *target = ']';
4330 break;
4331 case ']':
4332 *target = '[';
4333 break;
4334 case '{':
4335 *target = '}';
4336 break;
4337 case '}':
4338 *target = '{';
4339 break;
4340 case '<':
4341 *target = '>';
4342 break;
4343 case '>':
4344 *target = '<';
4345 break;
4346 case '\\':
4347 *target = '/';
4348 break;
4349 case '/':
4350 *target = '\\';
4351 break;
4352 default:
4353 break;
4354 }
4355 target--;
4356 }
4357 block_type = _HEB_BLOCK_TYPE_ENG;
4358 } else {
4359 while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
4360 tmp++;
4361 block_end++;
4362 }
4363 while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
4364 tmp--;
4365 block_end--;
4366 }
4367 for (i = block_end+1; i >= block_start+1; i--) {
4368 *target = str[i-1];
4369 target--;
4370 }
4371 block_type = _HEB_BLOCK_TYPE_HEB;
4372 }
4373 block_start=block_end+1;
4374 } while (block_end < str_len-1);
4375
4376
4377 broken_str = zend_string_alloc(str_len, 0);
4378 begin = end = str_len-1;
4379 target = ZSTR_VAL(broken_str);
4380
4381 while (1) {
4382 char_count=0;
4383 while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
4384 char_count++;
4385 begin--;
4386 if (_isnewline(heb_str[begin])) {
4387 while (begin > 0 && _isnewline(heb_str[begin-1])) {
4388 begin--;
4389 char_count++;
4390 }
4391 break;
4392 }
4393 }
4394 if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
4395 size_t new_char_count=char_count, new_begin=begin;
4396
4397 while (new_char_count > 0) {
4398 if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
4399 break;
4400 }
4401 new_begin++;
4402 new_char_count--;
4403 }
4404 if (new_char_count > 0) {
4405 begin=new_begin;
4406 }
4407 }
4408 orig_begin=begin;
4409
4410 if (_isblank(heb_str[begin])) {
4411 heb_str[begin]='\n';
4412 }
4413 while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
4414 begin++;
4415 }
4416 for (i = begin; i <= end; i++) { /* copy content */
4417 *target = heb_str[i];
4418 target++;
4419 }
4420 for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
4421 *target = heb_str[i];
4422 target++;
4423 }
4424 begin=orig_begin;
4425
4426 if (begin == 0) {
4427 *target = 0;
4428 break;
4429 }
4430 begin--;
4431 end=begin;
4432 }
4433 efree(heb_str);
4434
4435 RETURN_NEW_STR(broken_str);
4436 }
4437 /* }}} */
4438
4439 /* {{{ Converts newlines to HTML line breaks */
4440 PHP_FUNCTION(nl2br)
4441 {
4442 /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
4443 const char *tmp, *end;
4444 zend_string *str;
4445 char *target;
4446 size_t repl_cnt = 0;
4447 bool is_xhtml = 1;
4448 zend_string *result;
4449
4450 ZEND_PARSE_PARAMETERS_START(1, 2)
4451 Z_PARAM_STR(str)
4452 Z_PARAM_OPTIONAL
4453 Z_PARAM_BOOL(is_xhtml)
4454 ZEND_PARSE_PARAMETERS_END();
4455
4456 tmp = ZSTR_VAL(str);
4457 end = ZSTR_VAL(str) + ZSTR_LEN(str);
4458
4459 /* it is really faster to scan twice and allocate mem once instead of scanning once
4460 and constantly reallocing */
4461 while (tmp < end) {
4462 if (*tmp == '\r') {
4463 if (*(tmp+1) == '\n') {
4464 tmp++;
4465 }
4466 repl_cnt++;
4467 } else if (*tmp == '\n') {
4468 if (*(tmp+1) == '\r') {
4469 tmp++;
4470 }
4471 repl_cnt++;
4472 }
4473
4474 tmp++;
4475 }
4476
4477 if (repl_cnt == 0) {
4478 RETURN_STR_COPY(str);
4479 }
4480
4481 {
4482 size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
4483
4484 result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
4485 target = ZSTR_VAL(result);
4486 }
4487
4488 tmp = ZSTR_VAL(str);
4489 while (tmp < end) {
4490 switch (*tmp) {
4491 case '\r':
4492 case '\n':
4493 *target++ = '<';
4494 *target++ = 'b';
4495 *target++ = 'r';
4496
4497 if (is_xhtml) {
4498 *target++ = ' ';
4499 *target++ = '/';
4500 }
4501
4502 *target++ = '>';
4503
4504 if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
4505 *target++ = *tmp++;
4506 }
4507 ZEND_FALLTHROUGH;
4508 default:
4509 *target++ = *tmp;
4510 }
4511
4512 tmp++;
4513 }
4514
4515 *target = '\0';
4516
4517 RETURN_NEW_STR(result);
4518 }
4519 /* }}} */
4520
4521 /* {{{ Strips HTML and PHP tags from a string */
4522 PHP_FUNCTION(strip_tags)
4523 {
4524 zend_string *buf;
4525 zend_string *str;
4526 zend_string *allow_str = NULL;
4527 HashTable *allow_ht = NULL;
4528 const char *allowed_tags=NULL;
4529 size_t allowed_tags_len=0;
4530 smart_str tags_ss = {0};
4531
4532 ZEND_PARSE_PARAMETERS_START(1, 2)
4533 Z_PARAM_STR(str)
4534 Z_PARAM_OPTIONAL
4535 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
4536 ZEND_PARSE_PARAMETERS_END();
4537
4538 if (allow_ht) {
4539 zval *tmp;
4540 zend_string *tag;
4541
4542 ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
4543 tag = zval_get_string(tmp);
4544 smart_str_appendc(&tags_ss, '<');
4545 smart_str_append(&tags_ss, tag);
4546 smart_str_appendc(&tags_ss, '>');
4547 zend_string_release(tag);
4548 } ZEND_HASH_FOREACH_END();
4549 if (tags_ss.s) {
4550 smart_str_0(&tags_ss);
4551 allowed_tags = ZSTR_VAL(tags_ss.s);
4552 allowed_tags_len = ZSTR_LEN(tags_ss.s);
4553 }
4554 } else if (allow_str) {
4555 allowed_tags = ZSTR_VAL(allow_str);
4556 allowed_tags_len = ZSTR_LEN(allow_str);
4557 }
4558
4559 buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
4560 ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
4561 smart_str_free(&tags_ss);
4562 RETURN_NEW_STR(buf);
4563 }
4564 /* }}} */
4565
4566 static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
4567 const char *retval;
4568
4569 if (zend_string_equals_literal(loc, "0")) {
4570 loc = NULL;
4571 } else {
4572 if (ZSTR_LEN(loc) >= 255) {
4573 php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
4574 return NULL;
4575 }
4576 }
4577
4578 # ifndef PHP_WIN32
4579 retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
4580 # else
4581 if (loc) {
4582 /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
4583 char *locp = ZSTR_VAL(loc);
4584 if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
4585 && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
4586 && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
4587 && (locp[5] == '\0' || locp[5] == '.')
4588 && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
4589 && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
4590 && locp[5] == '\0')
4591 ) {
4592 retval = NULL;
4593 } else {
4594 retval = setlocale(cat, ZSTR_VAL(loc));
4595 }
4596 } else {
4597 retval = setlocale(cat, NULL);
4598 }
4599 # endif
4600 if (!retval) {
4601 return NULL;
4602 }
4603
4604 if (loc) {
4605 /* Remember if locale was changed */
4606 size_t len = strlen(retval);
4607
4608 BG(locale_changed) = 1;
4609 if (cat == LC_CTYPE || cat == LC_ALL) {
4610 zend_update_current_locale();
4611 if (BG(ctype_string)) {
4612 zend_string_release_ex(BG(ctype_string), 0);
4613 }
4614 if (len == 1 && *retval == 'C') {
4615 /* C locale is represented as NULL. */
4616 BG(ctype_string) = NULL;
4617 return ZSTR_CHAR('C');
4618 } else if (zend_string_equals_cstr(loc, retval, len)) {
4619 BG(ctype_string) = zend_string_copy(loc);
4620 return zend_string_copy(BG(ctype_string));
4621 } else {
4622 BG(ctype_string) = zend_string_init(retval, len, 0);
4623 return zend_string_copy(BG(ctype_string));
4624 }
4625 } else if (zend_string_equals_cstr(loc, retval, len)) {
4626 return zend_string_copy(loc);
4627 }
4628 }
4629 return zend_string_init(retval, strlen(retval), 0);
4630 }
4631
4632 static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
4633 zend_string *tmp_loc_str;
4634 zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
4635 if (UNEXPECTED(loc_str == NULL)) {
4636 return NULL;
4637 }
4638 zend_string *result = try_setlocale_str(cat, loc_str);
4639 zend_tmp_string_release(tmp_loc_str);
4640 return result;
4641 }
4642
4643 /* {{{ Set locale information */
4644 PHP_FUNCTION(setlocale)
4645 {
4646 zend_long cat;
4647 zval *args = NULL;
4648 int num_args;
4649
4650 ZEND_PARSE_PARAMETERS_START(2, -1)
4651 Z_PARAM_LONG(cat)
4652 Z_PARAM_VARIADIC('+', args, num_args)
4653 ZEND_PARSE_PARAMETERS_END();
4654
4655 for (uint32_t i = 0; i < num_args; i++) {
4656 if (Z_TYPE(args[i]) == IS_ARRAY) {
4657 zval *elem;
4658 ZEND_HASH_FOREACH_VAL(Z_ARRVAL(args[i]), elem) {
4659 zend_string *result = try_setlocale_zval(cat, elem);
4660 if (EG(exception)) {
4661 RETURN_THROWS();
4662 }
4663 if (result) {
4664 RETURN_STR(result);
4665 }
4666 } ZEND_HASH_FOREACH_END();
4667 } else {
4668 zend_string *result = try_setlocale_zval(cat, &args[i]);
4669 if (EG(exception)) {
4670 RETURN_THROWS();
4671 }
4672 if (result) {
4673 RETURN_STR(result);
4674 }
4675 }
4676 }
4677
4678 RETURN_FALSE;
4679 }
4680 /* }}} */
4681
4682 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
4683 PHP_FUNCTION(parse_str)
4684 {
4685 char *arg;
4686 zval *arrayArg = NULL;
4687 char *res = NULL;
4688 size_t arglen;
4689
4690 ZEND_PARSE_PARAMETERS_START(2, 2)
4691 Z_PARAM_STRING(arg, arglen)
4692 Z_PARAM_ZVAL(arrayArg)
4693 ZEND_PARSE_PARAMETERS_END();
4694
4695 arrayArg = zend_try_array_init(arrayArg);
4696 if (!arrayArg) {
4697 RETURN_THROWS();
4698 }
4699
4700 res = estrndup(arg, arglen);
4701 sapi_module.treat_data(PARSE_STRING, res, arrayArg);
4702 }
4703 /* }}} */
4704
4705 #define PHP_TAG_BUF_SIZE 1023
4706
4707 /* {{{ php_tag_find
4708 *
4709 * Check if tag is in a set of tags
4710 *
4711 * states:
4712 *
4713 * 0 start tag
4714 * 1 first non-whitespace char seen
4715 */
4716 static bool php_tag_find(char *tag, size_t len, const char *set) {
4717 char c, *n;
4718 const char *t;
4719 int state = 0;
4720 bool done = 0;
4721 char *norm;
4722
4723 if (len == 0) {
4724 return 0;
4725 }
4726
4727 norm = emalloc(len+1);
4728
4729 n = norm;
4730 t = tag;
4731 c = zend_tolower_ascii(*t);
4732 /*
4733 normalize the tag removing leading and trailing whitespace
4734 and turn any <a whatever...> into just <a> and any </tag>
4735 into <tag>
4736 */
4737 while (!done) {
4738 switch (c) {
4739 case '<':
4740 *(n++) = c;
4741 break;
4742 case '>':
4743 done =1;
4744 break;
4745 default:
4746 if (!isspace((int)c)) {
4747 if (state == 0) {
4748 state=1;
4749 }
4750 if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
4751 *(n++) = c;
4752 }
4753 } else {
4754 if (state == 1)
4755 done=1;
4756 }
4757 break;
4758 }
4759 c = zend_tolower_ascii(*(++t));
4760 }
4761 *(n++) = '>';
4762 *n = '\0';
4763 if (strstr(set, norm)) {
4764 done=1;
4765 } else {
4766 done=0;
4767 }
4768 efree(norm);
4769 return done;
4770 }
4771 /* }}} */
4772
4773 PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
4774 {
4775 return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
4776 }
4777 /* }}} */
4778
4779 /* {{{ php_strip_tags
4780
4781 A simple little state-machine to strip out html and php tags
4782
4783 State 0 is the output state, State 1 means we are inside a
4784 normal html tag and state 2 means we are inside a php tag.
4785
4786 The state variable is passed in to allow a function like fgetss
4787 to maintain state across calls to the function.
4788
4789 lc holds the last significant character read and br is a bracket
4790 counter.
4791
4792 When an allow string is passed in we keep track of the string
4793 in state 1 and when the tag is closed check it against the
4794 allow string to see if we should allow it.
4795
4796 swm: Added ability to strip <?xml tags without assuming it PHP
4797 code.
4798 */
4799 PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, bool allow_tag_spaces)
4800 {
4801 char *tbuf, *tp, *rp, c, lc;
4802 const char *buf, *p, *end;
4803 int br, depth=0, in_q = 0;
4804 uint8_t state = 0;
4805 size_t pos;
4806 char *allow_free = NULL;
4807 char is_xml = 0;
4808
4809 buf = estrndup(rbuf, len);
4810 end = buf + len;
4811 lc = '\0';
4812 p = buf;
4813 rp = rbuf;
4814 br = 0;
4815 if (allow) {
4816 allow_free = zend_str_tolower_dup_ex(allow, allow_len);
4817 allow = allow_free ? allow_free : allow;
4818 tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
4819 tp = tbuf;
4820 } else {
4821 tbuf = tp = NULL;
4822 }
4823
4824 state_0:
4825 if (p >= end) {
4826 goto finish;
4827 }
4828 c = *p;
4829 switch (c) {
4830 case '\0':
4831 break;
4832 case '<':
4833 if (in_q) {
4834 break;
4835 }
4836 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4837 *(rp++) = c;
4838 break;
4839 }
4840 lc = '<';
4841 state = 1;
4842 if (allow) {
4843 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4844 pos = tp - tbuf;
4845 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4846 tp = tbuf + pos;
4847 }
4848 *(tp++) = '<';
4849 }
4850 p++;
4851 goto state_1;
4852 case '>':
4853 if (depth) {
4854 depth--;
4855 break;
4856 }
4857
4858 if (in_q) {
4859 break;
4860 }
4861
4862 *(rp++) = c;
4863 break;
4864 default:
4865 *(rp++) = c;
4866 break;
4867 }
4868 p++;
4869 goto state_0;
4870
4871 state_1:
4872 if (p >= end) {
4873 goto finish;
4874 }
4875 c = *p;
4876 switch (c) {
4877 case '\0':
4878 break;
4879 case '<':
4880 if (in_q) {
4881 break;
4882 }
4883 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4884 goto reg_char_1;
4885 }
4886 depth++;
4887 break;
4888 case '>':
4889 if (depth) {
4890 depth--;
4891 break;
4892 }
4893 if (in_q) {
4894 break;
4895 }
4896
4897 lc = '>';
4898 if (is_xml && p >= buf + 1 && *(p -1) == '-') {
4899 break;
4900 }
4901 in_q = state = is_xml = 0;
4902 if (allow) {
4903 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4904 pos = tp - tbuf;
4905 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4906 tp = tbuf + pos;
4907 }
4908 *(tp++) = '>';
4909 *tp='\0';
4910 if (php_tag_find(tbuf, tp-tbuf, allow)) {
4911 memcpy(rp, tbuf, tp-tbuf);
4912 rp += tp-tbuf;
4913 }
4914 tp = tbuf;
4915 }
4916 p++;
4917 goto state_0;
4918 case '"':
4919 case '\'':
4920 if (p != buf && (!in_q || *p == in_q)) {
4921 if (in_q) {
4922 in_q = 0;
4923 } else {
4924 in_q = *p;
4925 }
4926 }
4927 goto reg_char_1;
4928 case '!':
4929 /* JavaScript & Other HTML scripting languages */
4930 if (p >= buf + 1 && *(p-1) == '<') {
4931 state = 3;
4932 lc = c;
4933 p++;
4934 goto state_3;
4935 } else {
4936 goto reg_char_1;
4937 }
4938 break;
4939 case '?':
4940 if (p >= buf + 1 && *(p-1) == '<') {
4941 br=0;
4942 state = 2;
4943 p++;
4944 goto state_2;
4945 } else {
4946 goto reg_char_1;
4947 }
4948 break;
4949 default:
4950 reg_char_1:
4951 if (allow) {
4952 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4953 pos = tp - tbuf;
4954 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4955 tp = tbuf + pos;
4956 }
4957 *(tp++) = c;
4958 }
4959 break;
4960 }
4961 p++;
4962 goto state_1;
4963
4964 state_2:
4965 if (p >= end) {
4966 goto finish;
4967 }
4968 c = *p;
4969 switch (c) {
4970 case '(':
4971 if (lc != '"' && lc != '\'') {
4972 lc = '(';
4973 br++;
4974 }
4975 break;
4976 case ')':
4977 if (lc != '"' && lc != '\'') {
4978 lc = ')';
4979 br--;
4980 }
4981 break;
4982 case '>':
4983 if (depth) {
4984 depth--;
4985 break;
4986 }
4987 if (in_q) {
4988 break;
4989 }
4990
4991 if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
4992 in_q = state = 0;
4993 tp = tbuf;
4994 p++;
4995 goto state_0;
4996 }
4997 break;
4998 case '"':
4999 case '\'':
5000 if (p >= buf + 1 && *(p-1) != '\\') {
5001 if (lc == c) {
5002 lc = '\0';
5003 } else if (lc != '\\') {
5004 lc = c;
5005 }
5006 if (p != buf && (!in_q || *p == in_q)) {
5007 if (in_q) {
5008 in_q = 0;
5009 } else {
5010 in_q = *p;
5011 }
5012 }
5013 }
5014 break;
5015 case 'l':
5016 case 'L':
5017 /* swm: If we encounter '<?xml' then we shouldn't be in
5018 * state == 2 (PHP). Switch back to HTML.
5019 */
5020 if (state == 2 && p > buf+4
5021 && (*(p-1) == 'm' || *(p-1) == 'M')
5022 && (*(p-2) == 'x' || *(p-2) == 'X')
5023 && *(p-3) == '?'
5024 && *(p-4) == '<') {
5025 state = 1; is_xml=1;
5026 p++;
5027 goto state_1;
5028 }
5029 break;
5030 default:
5031 break;
5032 }
5033 p++;
5034 goto state_2;
5035
5036 state_3:
5037 if (p >= end) {
5038 goto finish;
5039 }
5040 c = *p;
5041 switch (c) {
5042 case '>':
5043 if (depth) {
5044 depth--;
5045 break;
5046 }
5047 if (in_q) {
5048 break;
5049 }
5050 in_q = state = 0;
5051 tp = tbuf;
5052 p++;
5053 goto state_0;
5054 case '"':
5055 case '\'':
5056 if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
5057 if (in_q) {
5058 in_q = 0;
5059 } else {
5060 in_q = *p;
5061 }
5062 }
5063 break;
5064 case '-':
5065 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
5066 state = 4;
5067 p++;
5068 goto state_4;
5069 }
5070 break;
5071 case 'E':
5072 case 'e':
5073 /* !DOCTYPE exception */
5074 if (p > buf+6
5075 && (*(p-1) == 'p' || *(p-1) == 'P')
5076 && (*(p-2) == 'y' || *(p-2) == 'Y')
5077 && (*(p-3) == 't' || *(p-3) == 'T')
5078 && (*(p-4) == 'c' || *(p-4) == 'C')
5079 && (*(p-5) == 'o' || *(p-5) == 'O')
5080 && (*(p-6) == 'd' || *(p-6) == 'D')) {
5081 state = 1;
5082 p++;
5083 goto state_1;
5084 }
5085 break;
5086 default:
5087 break;
5088 }
5089 p++;
5090 goto state_3;
5091
5092 state_4:
5093 while (p < end) {
5094 c = *p;
5095 if (c == '>' && !in_q) {
5096 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
5097 in_q = state = 0;
5098 tp = tbuf;
5099 p++;
5100 goto state_0;
5101 }
5102 }
5103 p++;
5104 }
5105
5106 finish:
5107 if (rp < rbuf + len) {
5108 *rp = '\0';
5109 }
5110 efree((void *)buf);
5111 if (tbuf) {
5112 efree(tbuf);
5113 }
5114 if (allow_free) {
5115 efree(allow_free);
5116 }
5117
5118 return (size_t)(rp - rbuf);
5119 }
5120 /* }}} */
5121
5122 /* {{{ Parse a CSV string into an array */
5123 PHP_FUNCTION(str_getcsv)
5124 {
5125 zend_string *str;
5126 char delim = ',', enc = '"';
5127 int esc = (unsigned char) '\\';
5128 char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
5129 size_t delim_len = 0, enc_len = 0, esc_len = 0;
5130
5131 ZEND_PARSE_PARAMETERS_START(1, 4)
5132 Z_PARAM_STR(str)
5133 Z_PARAM_OPTIONAL
5134 Z_PARAM_STRING(delim_str, delim_len)
5135 Z_PARAM_STRING(enc_str, enc_len)
5136 Z_PARAM_STRING(esc_str, esc_len)
5137 ZEND_PARSE_PARAMETERS_END();
5138
5139 delim = delim_len ? delim_str[0] : delim;
5140 enc = enc_len ? enc_str[0] : enc;
5141 if (esc_str != NULL) {
5142 esc = esc_len ? (unsigned char) esc_str[0] : PHP_CSV_NO_ESCAPE;
5143 }
5144
5145 HashTable *values = php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str));
5146 if (values == NULL) {
5147 values = php_bc_fgetcsv_empty_line();
5148 }
5149 RETURN_ARR(values);
5150 }
5151 /* }}} */
5152
5153 /* {{{ Returns the input string repeat mult times */
5154 PHP_FUNCTION(str_repeat)
5155 {
5156 zend_string *input_str; /* Input string */
5157 zend_long mult; /* Multiplier */
5158 zend_string *result; /* Resulting string */
5159 size_t result_len; /* Length of the resulting string */
5160
5161 ZEND_PARSE_PARAMETERS_START(2, 2)
5162 Z_PARAM_STR(input_str)
5163 Z_PARAM_LONG(mult)
5164 ZEND_PARSE_PARAMETERS_END();
5165
5166 if (mult < 0) {
5167 zend_argument_value_error(2, "must be greater than or equal to 0");
5168 RETURN_THROWS();
5169 }
5170
5171 /* Don't waste our time if it's empty */
5172 /* ... or if the multiplier is zero */
5173 if (ZSTR_LEN(input_str) == 0 || mult == 0)
5174 RETURN_EMPTY_STRING();
5175
5176 /* Initialize the result string */
5177 result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
5178 result_len = ZSTR_LEN(input_str) * mult;
5179 ZSTR_COPY_CONCAT_PROPERTIES(result, input_str);
5180
5181 /* Heavy optimization for situations where input string is 1 byte long */
5182 if (ZSTR_LEN(input_str) == 1) {
5183 memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
5184 } else {
5185 const char *s, *ee;
5186 char *e;
5187 ptrdiff_t l=0;
5188 memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
5189 s = ZSTR_VAL(result);
5190 e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
5191 ee = ZSTR_VAL(result) + result_len;
5192
5193 while (e<ee) {
5194 l = (e-s) < (ee-e) ? (e-s) : (ee-e);
5195 memmove(e, s, l);
5196 e += l;
5197 }
5198 }
5199
5200 ZSTR_VAL(result)[result_len] = '\0';
5201
5202 RETURN_NEW_STR(result);
5203 }
5204 /* }}} */
5205
5206 /* {{{ Returns info about what characters are used in input */
5207 PHP_FUNCTION(count_chars)
5208 {
5209 zend_string *input;
5210 int chars[256];
5211 zend_long mymode=0;
5212 const unsigned char *buf;
5213 int inx;
5214 char retstr[256];
5215 size_t retlen=0;
5216 size_t tmp = 0;
5217
5218 ZEND_PARSE_PARAMETERS_START(1, 2)
5219 Z_PARAM_STR(input)
5220 Z_PARAM_OPTIONAL
5221 Z_PARAM_LONG(mymode)
5222 ZEND_PARSE_PARAMETERS_END();
5223
5224 if (mymode < 0 || mymode > 4) {
5225 zend_argument_value_error(2, "must be between 0 and 4 (inclusive)");
5226 RETURN_THROWS();
5227 }
5228
5229 buf = (const unsigned char *) ZSTR_VAL(input);
5230 memset((void*) chars, 0, sizeof(chars));
5231
5232 while (tmp < ZSTR_LEN(input)) {
5233 chars[*buf]++;
5234 buf++;
5235 tmp++;
5236 }
5237
5238 if (mymode < 3) {
5239 array_init(return_value);
5240 }
5241
5242 for (inx = 0; inx < 256; inx++) {
5243 switch (mymode) {
5244 case 0:
5245 add_index_long(return_value, inx, chars[inx]);
5246 break;
5247 case 1:
5248 if (chars[inx] != 0) {
5249 add_index_long(return_value, inx, chars[inx]);
5250 }
5251 break;
5252 case 2:
5253 if (chars[inx] == 0) {
5254 add_index_long(return_value, inx, chars[inx]);
5255 }
5256 break;
5257 case 3:
5258 if (chars[inx] != 0) {
5259 retstr[retlen++] = inx;
5260 }
5261 break;
5262 case 4:
5263 if (chars[inx] == 0) {
5264 retstr[retlen++] = inx;
5265 }
5266 break;
5267 }
5268 }
5269
5270 if (mymode == 3 || mymode == 4) {
5271 RETURN_STRINGL(retstr, retlen);
5272 }
5273 }
5274 /* }}} */
5275
5276 /* {{{ php_strnatcmp */
5277 static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, bool is_case_insensitive)
5278 {
5279 zend_string *s1, *s2;
5280
5281 ZEND_PARSE_PARAMETERS_START(2, 2)
5282 Z_PARAM_STR(s1)
5283 Z_PARAM_STR(s2)
5284 ZEND_PARSE_PARAMETERS_END();
5285
5286 RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
5287 ZSTR_VAL(s2), ZSTR_LEN(s2),
5288 is_case_insensitive));
5289 }
5290 /* }}} */
5291
5292 /* {{{ Returns the result of string comparison using 'natural' algorithm */
5293 PHP_FUNCTION(strnatcmp)
5294 {
5295 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
5296 }
5297 /* }}} */
5298
5299 /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5300 PHP_FUNCTION(strnatcasecmp)
5301 {
5302 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5303 }
5304 /* }}} */
5305
5306 /* {{{ Returns numeric formatting information based on the current locale */
5307 PHP_FUNCTION(localeconv)
5308 {
5309 zval grouping, mon_grouping;
5310 size_t len, i;
5311
5312 ZEND_PARSE_PARAMETERS_NONE();
5313
5314 array_init(return_value);
5315 array_init(&grouping);
5316 array_init(&mon_grouping);
5317
5318 {
5319 struct lconv currlocdata;
5320
5321 localeconv_r( &currlocdata );
5322
5323 /* Grab the grouping data out of the array */
5324 len = strlen(currlocdata.grouping);
5325
5326 for (i = 0; i < len; i++) {
5327 add_index_long(&grouping, i, currlocdata.grouping[i]);
5328 }
5329
5330 /* Grab the monetary grouping data out of the array */
5331 len = strlen(currlocdata.mon_grouping);
5332
5333 for (i = 0; i < len; i++) {
5334 add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
5335 }
5336
5337 add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
5338 add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
5339 add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
5340 add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
5341 add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
5342 add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
5343 add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
5344 add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
5345 add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
5346 add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
5347 add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
5348 add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
5349 add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
5350 add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
5351 add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
5352 add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
5353 }
5354
5355 zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
5356 zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
5357 }
5358 /* }}} */
5359
5360 /* {{{ Returns the number of times a substring occurs in the string */
5361 PHP_FUNCTION(substr_count)
5362 {
5363 char *haystack, *needle;
5364 zend_long offset = 0, length = 0;
5365 bool length_is_null = 1;
5366 zend_long count;
5367 size_t haystack_len, needle_len;
5368 const char *p, *endp;
5369
5370 ZEND_PARSE_PARAMETERS_START(2, 4)
5371 Z_PARAM_STRING(haystack, haystack_len)
5372 Z_PARAM_STRING(needle, needle_len)
5373 Z_PARAM_OPTIONAL
5374 Z_PARAM_LONG(offset)
5375 Z_PARAM_LONG_OR_NULL(length, length_is_null)
5376 ZEND_PARSE_PARAMETERS_END();
5377
5378 if (needle_len == 0) {
5379 zend_argument_value_error(2, "cannot be empty");
5380 RETURN_THROWS();
5381 }
5382
5383 p = haystack;
5384
5385 if (offset) {
5386 if (offset < 0) {
5387 offset += (zend_long)haystack_len;
5388 }
5389 if ((offset < 0) || ((size_t)offset > haystack_len)) {
5390 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5391 RETURN_THROWS();
5392 }
5393 p += offset;
5394 haystack_len -= offset;
5395 }
5396
5397 if (!length_is_null) {
5398 if (length < 0) {
5399 length += haystack_len;
5400 }
5401 if (length < 0 || ((size_t)length > haystack_len)) {
5402 zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
5403 RETURN_THROWS();
5404 }
5405 } else {
5406 length = haystack_len;
5407 }
5408
5409 if (needle_len == 1) {
5410 count = count_chars(p, length, needle[0]);
5411 } else {
5412 count = 0;
5413 endp = p + length;
5414 while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
5415 p += needle_len;
5416 count++;
5417 }
5418 }
5419
5420 RETURN_LONG(count);
5421 }
5422 /* }}} */
5423
5424 /* {{{ Returns input string padded on the left or right to specified length with pad_string */
5425 PHP_FUNCTION(str_pad)
5426 {
5427 /* Input arguments */
5428 zend_string *input; /* Input string */
5429 zend_long pad_length; /* Length to pad to */
5430
5431 /* Helper variables */
5432 size_t num_pad_chars; /* Number of padding characters (total - input size) */
5433 char *pad_str = " "; /* Pointer to padding string */
5434 size_t pad_str_len = 1;
5435 zend_long pad_type_val = PHP_STR_PAD_RIGHT; /* The padding type value */
5436 size_t i, left_pad=0, right_pad=0;
5437 zend_string *result = NULL; /* Resulting string */
5438
5439 ZEND_PARSE_PARAMETERS_START(2, 4)
5440 Z_PARAM_STR(input)
5441 Z_PARAM_LONG(pad_length)
5442 Z_PARAM_OPTIONAL
5443 Z_PARAM_STRING(pad_str, pad_str_len)
5444 Z_PARAM_LONG(pad_type_val)
5445 ZEND_PARSE_PARAMETERS_END();
5446
5447 /* If resulting string turns out to be shorter than input string,
5448 we simply copy the input and return. */
5449 if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
5450 RETURN_STR_COPY(input);
5451 }
5452
5453 if (pad_str_len == 0) {
5454 zend_argument_value_error(3, "must be a non-empty string");
5455 RETURN_THROWS();
5456 }
5457
5458 if (pad_type_val < PHP_STR_PAD_LEFT || pad_type_val > PHP_STR_PAD_BOTH) {
5459 zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
5460 RETURN_THROWS();
5461 }
5462
5463 num_pad_chars = pad_length - ZSTR_LEN(input);
5464 result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
5465 ZSTR_LEN(result) = 0;
5466
5467 /* We need to figure out the left/right padding lengths. */
5468 switch (pad_type_val) {
5469 case PHP_STR_PAD_RIGHT:
5470 left_pad = 0;
5471 right_pad = num_pad_chars;
5472 break;
5473
5474 case PHP_STR_PAD_LEFT:
5475 left_pad = num_pad_chars;
5476 right_pad = 0;
5477 break;
5478
5479 case PHP_STR_PAD_BOTH:
5480 left_pad = num_pad_chars / 2;
5481 right_pad = num_pad_chars - left_pad;
5482 break;
5483 }
5484
5485 /* First we pad on the left. */
5486 for (i = 0; i < left_pad; i++)
5487 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5488
5489 /* Then we copy the input string. */
5490 memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
5491 ZSTR_LEN(result) += ZSTR_LEN(input);
5492
5493 /* Finally, we pad on the right. */
5494 for (i = 0; i < right_pad; i++)
5495 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5496
5497 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
5498
5499 RETURN_NEW_STR(result);
5500 }
5501 /* }}} */
5502
5503 /* {{{ Implements an ANSI C compatible sscanf */
5504 PHP_FUNCTION(sscanf)
5505 {
5506 zval *args = NULL;
5507 char *str, *format;
5508 size_t str_len, format_len;
5509 int result, num_args = 0;
5510
5511 ZEND_PARSE_PARAMETERS_START(2, -1)
5512 Z_PARAM_STRING(str, str_len)
5513 Z_PARAM_STRING(format, format_len)
5514 Z_PARAM_VARIADIC('*', args, num_args)
5515 ZEND_PARSE_PARAMETERS_END();
5516
5517 result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
5518
5519 if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
5520 WRONG_PARAM_COUNT;
5521 }
5522 }
5523 /* }}} */
5524
5525 /* static zend_string *php_str_rot13(zend_string *str) {{{ */
5526 static zend_string *php_str_rot13(zend_string *str)
5527 {
5528 zend_string *ret;
5529 const char *p, *e;
5530 char *target;
5531
5532 if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
5533 return ZSTR_EMPTY_ALLOC();
5534 }
5535
5536 ret = zend_string_alloc(ZSTR_LEN(str), 0);
5537
5538 p = ZSTR_VAL(str);
5539 e = p + ZSTR_LEN(str);
5540 target = ZSTR_VAL(ret);
5541
5542 #ifdef __SSE2__
5543 if (e - p > 15) {
5544 const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
5545 const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
5546 const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
5547 const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
5548 const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
5549 const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
5550 const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
5551 const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
5552 const __m128i add = _mm_set1_epi8(13);
5553 const __m128i sub = _mm_set1_epi8(-13);
5554
5555 do {
5556 __m128i in, gt, lt, cmp, delta;
5557
5558 delta = _mm_setzero_si128();
5559 in = _mm_loadu_si128((__m128i *)p);
5560
5561 gt = _mm_cmpgt_epi8(in, a_minus_1);
5562 lt = _mm_cmplt_epi8(in, m_plus_1);
5563 cmp = _mm_and_si128(lt, gt);
5564 if (_mm_movemask_epi8(cmp)) {
5565 cmp = _mm_and_si128(cmp, add);
5566 delta = _mm_or_si128(delta, cmp);
5567 }
5568
5569 gt = _mm_cmpgt_epi8(in, n_minus_1);
5570 lt = _mm_cmplt_epi8(in, z_plus_1);
5571 cmp = _mm_and_si128(lt, gt);
5572 if (_mm_movemask_epi8(cmp)) {
5573 cmp = _mm_and_si128(cmp, sub);
5574 delta = _mm_or_si128(delta, cmp);
5575 }
5576
5577 gt = _mm_cmpgt_epi8(in, A_minus_1);
5578 lt = _mm_cmplt_epi8(in, M_plus_1);
5579 cmp = _mm_and_si128(lt, gt);
5580 if (_mm_movemask_epi8(cmp)) {
5581 cmp = _mm_and_si128(cmp, add);
5582 delta = _mm_or_si128(delta, cmp);
5583 }
5584
5585 gt = _mm_cmpgt_epi8(in, N_minus_1);
5586 lt = _mm_cmplt_epi8(in, Z_plus_1);
5587 cmp = _mm_and_si128(lt, gt);
5588 if (_mm_movemask_epi8(cmp)) {
5589 cmp = _mm_and_si128(cmp, sub);
5590 delta = _mm_or_si128(delta, cmp);
5591 }
5592
5593 in = _mm_add_epi8(in, delta);
5594 _mm_storeu_si128((__m128i *)target, in);
5595
5596 p += 16;
5597 target += 16;
5598 } while (e - p > 15);
5599 }
5600 #endif
5601
5602 while (p < e) {
5603 if (*p >= 'a' && *p <= 'z') {
5604 *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
5605 } else if (*p >= 'A' && *p <= 'Z') {
5606 *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
5607 } else {
5608 *target++ = *p++;
5609 }
5610 }
5611
5612 *target = '\0';
5613
5614 return ret;
5615 }
5616 /* }}} */
5617
5618 /* {{{ Perform the rot13 transform on a string */
5619 PHP_FUNCTION(str_rot13)
5620 {
5621 zend_string *arg;
5622
5623 ZEND_PARSE_PARAMETERS_START(1, 1)
5624 Z_PARAM_STR(arg)
5625 ZEND_PARSE_PARAMETERS_END();
5626
5627 RETURN_STR(php_str_rot13(arg));
5628 }
5629 /* }}} */
5630
5631 /* {{{ php_binary_string_shuffle */
5632 PHPAPI bool php_binary_string_shuffle(const php_random_algo *algo, php_random_status *status, char *str, zend_long len) /* {{{ */
5633 {
5634 int64_t n_elems, rnd_idx, n_left;
5635 char temp;
5636
5637 /* The implementation is stolen from array_data_shuffle */
5638 /* Thus the characteristics of the randomization are the same */
5639 n_elems = len;
5640
5641 if (n_elems <= 1) {
5642 return true;
5643 }
5644
5645 n_left = n_elems;
5646
5647 while (--n_left) {
5648 rnd_idx = algo->range(status, 0, n_left);
5649 if (EG(exception)) {
5650 return false;
5651 }
5652 if (rnd_idx != n_left) {
5653 temp = str[n_left];
5654 str[n_left] = str[rnd_idx];
5655 str[rnd_idx] = temp;
5656 }
5657 }
5658
5659 return true;
5660 }
5661 /* }}} */
5662
5663 /* {{{ Shuffles string. One permutation of all possible is created */
5664 PHP_FUNCTION(str_shuffle)
5665 {
5666 zend_string *arg;
5667
5668 ZEND_PARSE_PARAMETERS_START(1, 1)
5669 Z_PARAM_STR(arg)
5670 ZEND_PARSE_PARAMETERS_END();
5671
5672 RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
5673 if (Z_STRLEN_P(return_value) > 1) {
5674 php_binary_string_shuffle(
5675 php_random_default_algo(),
5676 php_random_default_status(),
5677 Z_STRVAL_P(return_value),
5678 Z_STRLEN_P(return_value)
5679 );
5680 }
5681 }
5682 /* }}} */
5683
5684 /* {{{ Counts the number of words inside a string. If format of 1 is specified,
5685 then the function will return an array containing all the words
5686 found inside the string. If format of 2 is specified, then the function
5687 will return an associated array where the position of the word is the key
5688 and the word itself is the value.
5689 For the purpose of this function, 'word' is defined as a locale dependent
5690 string containing alphabetic characters, which also may contain, but not start
5691 with "'" and "-" characters.
5692 */
5693 PHP_FUNCTION(str_word_count)
5694 {
5695 zend_string *str;
5696 char *char_list = NULL, ch[256];
5697 const char *p, *e, *s;
5698 size_t char_list_len = 0, word_count = 0;
5699 zend_long type = 0;
5700
5701 ZEND_PARSE_PARAMETERS_START(1, 3)
5702 Z_PARAM_STR(str)
5703 Z_PARAM_OPTIONAL
5704 Z_PARAM_LONG(type)
5705 Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
5706 ZEND_PARSE_PARAMETERS_END();
5707
5708 switch(type) {
5709 case 1:
5710 case 2:
5711 array_init(return_value);
5712 if (!ZSTR_LEN(str)) {
5713 return;
5714 }
5715 break;
5716 case 0:
5717 if (!ZSTR_LEN(str)) {
5718 RETURN_LONG(0);
5719 }
5720 /* nothing to be done */
5721 break;
5722 default:
5723 zend_argument_value_error(2, "must be a valid format value");
5724 RETURN_THROWS();
5725 }
5726
5727 if (char_list) {
5728 php_charmask((const unsigned char *) char_list, char_list_len, ch);
5729 }
5730
5731 p = ZSTR_VAL(str);
5732 e = ZSTR_VAL(str) + ZSTR_LEN(str);
5733
5734 /* first character cannot be ' or -, unless explicitly allowed by the user */
5735 if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
5736 p++;
5737 }
5738 /* last character cannot be -, unless explicitly allowed by the user */
5739 if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
5740 e--;
5741 }
5742
5743 while (p < e) {
5744 s = p;
5745 while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
5746 p++;
5747 }
5748 if (p > s) {
5749 switch (type)
5750 {
5751 case 1:
5752 add_next_index_stringl(return_value, s, p - s);
5753 break;
5754 case 2:
5755 add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
5756 break;
5757 default:
5758 word_count++;
5759 break;
5760 }
5761 }
5762 p++;
5763 }
5764
5765 if (!type) {
5766 RETURN_LONG(word_count);
5767 }
5768 }
5769
5770 /* }}} */
5771
5772 /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
5773 PHP_FUNCTION(str_split)
5774 {
5775 zend_string *str;
5776 zend_long split_length = 1;
5777 const char *p;
5778 size_t n_reg_segments;
5779
5780 ZEND_PARSE_PARAMETERS_START(1, 2)
5781 Z_PARAM_STR(str)
5782 Z_PARAM_OPTIONAL
5783 Z_PARAM_LONG(split_length)
5784 ZEND_PARSE_PARAMETERS_END();
5785
5786 if (split_length <= 0) {
5787 zend_argument_value_error(2, "must be greater than 0");
5788 RETURN_THROWS();
5789 }
5790
5791 if ((size_t)split_length >= ZSTR_LEN(str)) {
5792 if (0 == ZSTR_LEN(str)) {
5793 RETURN_EMPTY_ARRAY();
5794 }
5795
5796 array_init_size(return_value, 1);
5797 add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
5798 return;
5799 }
5800
5801 array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
5802
5803 n_reg_segments = ZSTR_LEN(str) / split_length;
5804 p = ZSTR_VAL(str);
5805
5806 while (n_reg_segments-- > 0) {
5807 add_next_index_stringl(return_value, p, split_length);
5808 p += split_length;
5809 }
5810
5811 if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
5812 add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
5813 }
5814 }
5815 /* }}} */
5816
5817 /* {{{ Search a string for any of a set of characters */
5818 PHP_FUNCTION(strpbrk)
5819 {
5820 zend_string *haystack, *char_list;
5821 const char *haystack_ptr, *cl_ptr;
5822
5823 ZEND_PARSE_PARAMETERS_START(2, 2)
5824 Z_PARAM_STR(haystack)
5825 Z_PARAM_STR(char_list)
5826 ZEND_PARSE_PARAMETERS_END();
5827
5828 if (!ZSTR_LEN(char_list)) {
5829 zend_argument_value_error(2, "must be a non-empty string");
5830 RETURN_THROWS();
5831 }
5832
5833 for (haystack_ptr = ZSTR_VAL(haystack); haystack_ptr < (ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); ++haystack_ptr) {
5834 for (cl_ptr = ZSTR_VAL(char_list); cl_ptr < (ZSTR_VAL(char_list) + ZSTR_LEN(char_list)); ++cl_ptr) {
5835 if (*cl_ptr == *haystack_ptr) {
5836 RETURN_STRINGL(haystack_ptr, (ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - haystack_ptr));
5837 }
5838 }
5839 }
5840
5841 RETURN_FALSE;
5842 }
5843 /* }}} */
5844
5845 /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
5846 PHP_FUNCTION(substr_compare)
5847 {
5848 zend_string *s1, *s2;
5849 zend_long offset, len=0;
5850 bool len_is_default=1;
5851 bool cs=0;
5852 size_t cmp_len;
5853
5854 ZEND_PARSE_PARAMETERS_START(3, 5)
5855 Z_PARAM_STR(s1)
5856 Z_PARAM_STR(s2)
5857 Z_PARAM_LONG(offset)
5858 Z_PARAM_OPTIONAL
5859 Z_PARAM_LONG_OR_NULL(len, len_is_default)
5860 Z_PARAM_BOOL(cs)
5861 ZEND_PARSE_PARAMETERS_END();
5862
5863 if (!len_is_default && len <= 0) {
5864 if (len == 0) {
5865 RETURN_LONG(0L);
5866 } else {
5867 zend_argument_value_error(4, "must be greater than or equal to 0");
5868 RETURN_THROWS();
5869 }
5870 }
5871
5872 if (offset < 0) {
5873 offset = ZSTR_LEN(s1) + offset;
5874 offset = (offset < 0) ? 0 : offset;
5875 }
5876
5877 if ((size_t)offset > ZSTR_LEN(s1)) {
5878 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5879 RETURN_THROWS();
5880 }
5881
5882 cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
5883
5884 if (!cs) {
5885 RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5886 } else {
5887 RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5888 }
5889 }
5890 /* }}} */
5891
5892 /* {{{ */
5893 static zend_string *php_utf8_encode(const char *s, size_t len)
5894 {
5895 size_t pos = len;
5896 zend_string *str;
5897 unsigned char c;
5898
5899 str = zend_string_safe_alloc(len, 2, 0, 0);
5900 ZSTR_LEN(str) = 0;
5901 while (pos > 0) {
5902 /* The lower 256 codepoints of Unicode are identical to Latin-1,
5903 * so we don't need to do any mapping here. */
5904 c = (unsigned char)(*s);
5905 if (c < 0x80) {
5906 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
5907 /* We only account for the single-byte and two-byte cases because
5908 * we're only dealing with the first 256 Unicode codepoints. */
5909 } else {
5910 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
5911 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
5912 }
5913 pos--;
5914 s++;
5915 }
5916 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
5917 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
5918 return str;
5919 }
5920 /* }}} */
5921
5922 /* {{{ */
5923 static zend_string *php_utf8_decode(const char *s, size_t len)
5924 {
5925 size_t pos = 0;
5926 unsigned int c;
5927 zend_string *str;
5928
5929 str = zend_string_alloc(len, 0);
5930 ZSTR_LEN(str) = 0;
5931 while (pos < len) {
5932 zend_result status = FAILURE;
5933 c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
5934
5935 /* The lower 256 codepoints of Unicode are identical to Latin-1,
5936 * so we don't need to do any mapping here beyond replacing non-Latin-1
5937 * characters. */
5938 if (status == FAILURE || c > 0xFFU) {
5939 c = '?';
5940 }
5941
5942 ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
5943 }
5944 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
5945 if (ZSTR_LEN(str) < len) {
5946 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
5947 }
5948
5949 return str;
5950 }
5951 /* }}} */
5952
5953 /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
5954 PHP_FUNCTION(utf8_encode)
5955 {
5956 char *arg;
5957 size_t arg_len;
5958
5959 ZEND_PARSE_PARAMETERS_START(1, 1)
5960 Z_PARAM_STRING(arg, arg_len)
5961 ZEND_PARSE_PARAMETERS_END();
5962
5963 RETURN_STR(php_utf8_encode(arg, arg_len));
5964 }
5965 /* }}} */
5966
5967 /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
5968 PHP_FUNCTION(utf8_decode)
5969 {
5970 char *arg;
5971 size_t arg_len;
5972
5973 ZEND_PARSE_PARAMETERS_START(1, 1)
5974 Z_PARAM_STRING(arg, arg_len)
5975 ZEND_PARSE_PARAMETERS_END();
5976
5977 RETURN_STR(php_utf8_decode(arg, arg_len));
5978 }
5979 /* }}} */
5980