1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Rasmus Lerdorf <rasmus@php.net> |
14 | Stig Sæther Bakken <ssb@php.net> |
15 | Zeev Suraski <zeev@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include <stdio.h>
20 #include "php.h"
21 #include "php_string.h"
22 #include "php_variables.h"
23 #include <locale.h>
24 #ifdef HAVE_LANGINFO_H
25 # include <langinfo.h>
26 #endif
27
28 #ifdef HAVE_LIBINTL
29 # include <libintl.h> /* For LC_MESSAGES */
30 #endif
31
32 #include "scanf.h"
33 #include "zend_API.h"
34 #include "zend_execute.h"
35 #include "php_globals.h"
36 #include "basic_functions.h"
37 #include "zend_smart_str.h"
38 #include <Zend/zend_exceptions.h>
39 #ifdef ZTS
40 #include "TSRM.h"
41 #endif
42
43 /* For str_getcsv() support */
44 #include "ext/standard/file.h"
45 /* For php_next_utf8_char() */
46 #include "ext/standard/html.h"
47 #include "ext/random/php_random.h"
48
49 #ifdef __SSE2__
50 #include <emmintrin.h>
51 #endif
52
53 /* this is read-only, so it's ok */
54 ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
55
56 /* localeconv mutex */
57 #ifdef ZTS
58 static MUTEX_T locale_mutex = NULL;
59 #endif
60
61 /* {{{ php_bin2hex */
php_bin2hex(const unsigned char * old,const size_t oldlen)62 static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
63 {
64 zend_string *result;
65 size_t i, j;
66
67 result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
68
69 for (i = j = 0; i < oldlen; i++) {
70 ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
71 ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
72 }
73 ZSTR_VAL(result)[j] = '\0';
74
75 return result;
76 }
77 /* }}} */
78
79 /* {{{ php_hex2bin */
php_hex2bin(const unsigned char * old,const size_t oldlen)80 static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
81 {
82 size_t target_length = oldlen >> 1;
83 zend_string *str = zend_string_alloc(target_length, 0);
84 unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
85 size_t i, j;
86
87 for (i = j = 0; i < target_length; i++) {
88 unsigned char c = old[j++];
89 unsigned char l = c & ~0x20;
90 int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
91 unsigned char d;
92
93 /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
94 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
95 d = (l - 0x10 - 0x27 * is_letter) << 4;
96 } else {
97 zend_string_efree(str);
98 return NULL;
99 }
100 c = old[j++];
101 l = c & ~0x20;
102 is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
103 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
104 d |= l - 0x10 - 0x27 * is_letter;
105 } else {
106 zend_string_efree(str);
107 return NULL;
108 }
109 ret[i] = d;
110 }
111 ret[i] = '\0';
112
113 return str;
114 }
115 /* }}} */
116
117 /* {{{ localeconv_r
118 * glibc's localeconv is not reentrant, so lets make it so ... sorta */
localeconv_r(struct lconv * out)119 PHPAPI struct lconv *localeconv_r(struct lconv *out)
120 {
121
122 #ifdef ZTS
123 tsrm_mutex_lock( locale_mutex );
124 #endif
125
126 /* cur->locinfo is struct __crt_locale_info which implementation is
127 hidden in vc14. TODO revisit this and check if a workaround available
128 and needed. */
129 #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
130 {
131 /* Even with the enabled per thread locale, localeconv
132 won't check any locale change in the master thread. */
133 _locale_t cur = _get_current_locale();
134 *out = *cur->locinfo->lconv;
135 _free_locale(cur);
136 }
137 #else
138 /* localeconv doesn't return an error condition */
139 *out = *localeconv();
140 #endif
141
142 #ifdef ZTS
143 tsrm_mutex_unlock( locale_mutex );
144 #endif
145
146 return out;
147 }
148 /* }}} */
149
150 #ifdef ZTS
151 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(localeconv)152 PHP_MINIT_FUNCTION(localeconv)
153 {
154 locale_mutex = tsrm_mutex_alloc();
155 return SUCCESS;
156 }
157 /* }}} */
158
159 /* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(localeconv)160 PHP_MSHUTDOWN_FUNCTION(localeconv)
161 {
162 tsrm_mutex_free( locale_mutex );
163 locale_mutex = NULL;
164 return SUCCESS;
165 }
166 /* }}} */
167 #endif
168
169 /* {{{ Converts the binary representation of data to hex */
PHP_FUNCTION(bin2hex)170 PHP_FUNCTION(bin2hex)
171 {
172 zend_string *result;
173 zend_string *data;
174
175 ZEND_PARSE_PARAMETERS_START(1, 1)
176 Z_PARAM_STR(data)
177 ZEND_PARSE_PARAMETERS_END();
178
179 result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
180
181 RETURN_STR(result);
182 }
183 /* }}} */
184
185 /* {{{ Converts the hex representation of data to binary */
PHP_FUNCTION(hex2bin)186 PHP_FUNCTION(hex2bin)
187 {
188 zend_string *result, *data;
189
190 ZEND_PARSE_PARAMETERS_START(1, 1)
191 Z_PARAM_STR(data)
192 ZEND_PARSE_PARAMETERS_END();
193
194 if (ZSTR_LEN(data) % 2 != 0) {
195 php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
196 RETURN_FALSE;
197 }
198
199 result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
200
201 if (!result) {
202 php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
203 RETURN_FALSE;
204 }
205
206 RETVAL_STR(result);
207 }
208 /* }}} */
209
php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS,int behavior)210 static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
211 {
212 zend_string *s11, *s22;
213 zend_long start = 0, len = 0;
214 bool len_is_null = 1;
215
216 ZEND_PARSE_PARAMETERS_START(2, 4)
217 Z_PARAM_STR(s11)
218 Z_PARAM_STR(s22)
219 Z_PARAM_OPTIONAL
220 Z_PARAM_LONG(start)
221 Z_PARAM_LONG_OR_NULL(len, len_is_null)
222 ZEND_PARSE_PARAMETERS_END();
223
224 size_t remain_len = ZSTR_LEN(s11);
225 if (start < 0) {
226 start += remain_len;
227 if (start < 0) {
228 start = 0;
229 }
230 } else if ((size_t) start > remain_len) {
231 start = remain_len;
232 }
233
234 remain_len -= start;
235 if (!len_is_null) {
236 if (len < 0) {
237 len += remain_len;
238 if (len < 0) {
239 len = 0;
240 }
241 } else if ((size_t) len > remain_len) {
242 len = remain_len;
243 }
244 } else {
245 len = remain_len;
246 }
247
248 if (len == 0) {
249 RETURN_LONG(0);
250 }
251
252 if (behavior == PHP_STR_STRSPN) {
253 RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
254 ZSTR_VAL(s22) /*str2_start*/,
255 ZSTR_VAL(s11) + start + len /*str1_end*/,
256 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
257 } else {
258 ZEND_ASSERT(behavior == PHP_STR_STRCSPN);
259 RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
260 ZSTR_VAL(s22) /*str2_start*/,
261 ZSTR_VAL(s11) + start + len /*str1_end*/,
262 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
263 }
264 }
265 /* }}} */
266
267 /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
PHP_FUNCTION(strspn)268 PHP_FUNCTION(strspn)
269 {
270 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_STR_STRSPN);
271 }
272 /* }}} */
273
274 /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
PHP_FUNCTION(strcspn)275 PHP_FUNCTION(strcspn)
276 {
277 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_STR_STRCSPN);
278 }
279 /* }}} */
280
281 /* {{{ PHP_MINIT_FUNCTION(nl_langinfo) */
282 #ifdef HAVE_NL_LANGINFO
PHP_MINIT_FUNCTION(nl_langinfo)283 PHP_MINIT_FUNCTION(nl_langinfo)
284 {
285 #define REGISTER_NL_LANGINFO_CONSTANT(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS | CONST_PERSISTENT)
286 #ifdef ABDAY_1
287 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_1);
288 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_2);
289 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_3);
290 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_4);
291 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_5);
292 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_6);
293 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_7);
294 #endif
295 #ifdef DAY_1
296 REGISTER_NL_LANGINFO_CONSTANT(DAY_1);
297 REGISTER_NL_LANGINFO_CONSTANT(DAY_2);
298 REGISTER_NL_LANGINFO_CONSTANT(DAY_3);
299 REGISTER_NL_LANGINFO_CONSTANT(DAY_4);
300 REGISTER_NL_LANGINFO_CONSTANT(DAY_5);
301 REGISTER_NL_LANGINFO_CONSTANT(DAY_6);
302 REGISTER_NL_LANGINFO_CONSTANT(DAY_7);
303 #endif
304 #ifdef ABMON_1
305 REGISTER_NL_LANGINFO_CONSTANT(ABMON_1);
306 REGISTER_NL_LANGINFO_CONSTANT(ABMON_2);
307 REGISTER_NL_LANGINFO_CONSTANT(ABMON_3);
308 REGISTER_NL_LANGINFO_CONSTANT(ABMON_4);
309 REGISTER_NL_LANGINFO_CONSTANT(ABMON_5);
310 REGISTER_NL_LANGINFO_CONSTANT(ABMON_6);
311 REGISTER_NL_LANGINFO_CONSTANT(ABMON_7);
312 REGISTER_NL_LANGINFO_CONSTANT(ABMON_8);
313 REGISTER_NL_LANGINFO_CONSTANT(ABMON_9);
314 REGISTER_NL_LANGINFO_CONSTANT(ABMON_10);
315 REGISTER_NL_LANGINFO_CONSTANT(ABMON_11);
316 REGISTER_NL_LANGINFO_CONSTANT(ABMON_12);
317 #endif
318 #ifdef MON_1
319 REGISTER_NL_LANGINFO_CONSTANT(MON_1);
320 REGISTER_NL_LANGINFO_CONSTANT(MON_2);
321 REGISTER_NL_LANGINFO_CONSTANT(MON_3);
322 REGISTER_NL_LANGINFO_CONSTANT(MON_4);
323 REGISTER_NL_LANGINFO_CONSTANT(MON_5);
324 REGISTER_NL_LANGINFO_CONSTANT(MON_6);
325 REGISTER_NL_LANGINFO_CONSTANT(MON_7);
326 REGISTER_NL_LANGINFO_CONSTANT(MON_8);
327 REGISTER_NL_LANGINFO_CONSTANT(MON_9);
328 REGISTER_NL_LANGINFO_CONSTANT(MON_10);
329 REGISTER_NL_LANGINFO_CONSTANT(MON_11);
330 REGISTER_NL_LANGINFO_CONSTANT(MON_12);
331 #endif
332 #ifdef AM_STR
333 REGISTER_NL_LANGINFO_CONSTANT(AM_STR);
334 #endif
335 #ifdef PM_STR
336 REGISTER_NL_LANGINFO_CONSTANT(PM_STR);
337 #endif
338 #ifdef D_T_FMT
339 REGISTER_NL_LANGINFO_CONSTANT(D_T_FMT);
340 #endif
341 #ifdef D_FMT
342 REGISTER_NL_LANGINFO_CONSTANT(D_FMT);
343 #endif
344 #ifdef T_FMT
345 REGISTER_NL_LANGINFO_CONSTANT(T_FMT);
346 #endif
347 #ifdef T_FMT_AMPM
348 REGISTER_NL_LANGINFO_CONSTANT(T_FMT_AMPM);
349 #endif
350 #ifdef ERA
351 REGISTER_NL_LANGINFO_CONSTANT(ERA);
352 #endif
353 #ifdef ERA_YEAR
354 REGISTER_NL_LANGINFO_CONSTANT(ERA_YEAR);
355 #endif
356 #ifdef ERA_D_T_FMT
357 REGISTER_NL_LANGINFO_CONSTANT(ERA_D_T_FMT);
358 #endif
359 #ifdef ERA_D_FMT
360 REGISTER_NL_LANGINFO_CONSTANT(ERA_D_FMT);
361 #endif
362 #ifdef ERA_T_FMT
363 REGISTER_NL_LANGINFO_CONSTANT(ERA_T_FMT);
364 #endif
365 #ifdef ALT_DIGITS
366 REGISTER_NL_LANGINFO_CONSTANT(ALT_DIGITS);
367 #endif
368 #ifdef INT_CURR_SYMBOL
369 REGISTER_NL_LANGINFO_CONSTANT(INT_CURR_SYMBOL);
370 #endif
371 #ifdef CURRENCY_SYMBOL
372 REGISTER_NL_LANGINFO_CONSTANT(CURRENCY_SYMBOL);
373 #endif
374 #ifdef CRNCYSTR
375 REGISTER_NL_LANGINFO_CONSTANT(CRNCYSTR);
376 #endif
377 #ifdef MON_DECIMAL_POINT
378 REGISTER_NL_LANGINFO_CONSTANT(MON_DECIMAL_POINT);
379 #endif
380 #ifdef MON_THOUSANDS_SEP
381 REGISTER_NL_LANGINFO_CONSTANT(MON_THOUSANDS_SEP);
382 #endif
383 #ifdef MON_GROUPING
384 REGISTER_NL_LANGINFO_CONSTANT(MON_GROUPING);
385 #endif
386 #ifdef POSITIVE_SIGN
387 REGISTER_NL_LANGINFO_CONSTANT(POSITIVE_SIGN);
388 #endif
389 #ifdef NEGATIVE_SIGN
390 REGISTER_NL_LANGINFO_CONSTANT(NEGATIVE_SIGN);
391 #endif
392 #ifdef INT_FRAC_DIGITS
393 REGISTER_NL_LANGINFO_CONSTANT(INT_FRAC_DIGITS);
394 #endif
395 #ifdef FRAC_DIGITS
396 REGISTER_NL_LANGINFO_CONSTANT(FRAC_DIGITS);
397 #endif
398 #ifdef P_CS_PRECEDES
399 REGISTER_NL_LANGINFO_CONSTANT(P_CS_PRECEDES);
400 #endif
401 #ifdef P_SEP_BY_SPACE
402 REGISTER_NL_LANGINFO_CONSTANT(P_SEP_BY_SPACE);
403 #endif
404 #ifdef N_CS_PRECEDES
405 REGISTER_NL_LANGINFO_CONSTANT(N_CS_PRECEDES);
406 #endif
407 #ifdef N_SEP_BY_SPACE
408 REGISTER_NL_LANGINFO_CONSTANT(N_SEP_BY_SPACE);
409 #endif
410 #ifdef P_SIGN_POSN
411 REGISTER_NL_LANGINFO_CONSTANT(P_SIGN_POSN);
412 #endif
413 #ifdef N_SIGN_POSN
414 REGISTER_NL_LANGINFO_CONSTANT(N_SIGN_POSN);
415 #endif
416 #ifdef DECIMAL_POINT
417 REGISTER_NL_LANGINFO_CONSTANT(DECIMAL_POINT);
418 #endif
419 #ifdef RADIXCHAR
420 REGISTER_NL_LANGINFO_CONSTANT(RADIXCHAR);
421 #endif
422 #ifdef THOUSANDS_SEP
423 REGISTER_NL_LANGINFO_CONSTANT(THOUSANDS_SEP);
424 #endif
425 #ifdef THOUSEP
426 REGISTER_NL_LANGINFO_CONSTANT(THOUSEP);
427 #endif
428 #ifdef GROUPING
429 REGISTER_NL_LANGINFO_CONSTANT(GROUPING);
430 #endif
431 #ifdef YESEXPR
432 REGISTER_NL_LANGINFO_CONSTANT(YESEXPR);
433 #endif
434 #ifdef NOEXPR
435 REGISTER_NL_LANGINFO_CONSTANT(NOEXPR);
436 #endif
437 #ifdef YESSTR
438 REGISTER_NL_LANGINFO_CONSTANT(YESSTR);
439 #endif
440 #ifdef NOSTR
441 REGISTER_NL_LANGINFO_CONSTANT(NOSTR);
442 #endif
443 #ifdef CODESET
444 REGISTER_NL_LANGINFO_CONSTANT(CODESET);
445 #endif
446 #undef REGISTER_NL_LANGINFO_CONSTANT
447 return SUCCESS;
448 }
449 /* }}} */
450
451 /* {{{ Query language and locale information */
PHP_FUNCTION(nl_langinfo)452 PHP_FUNCTION(nl_langinfo)
453 {
454 zend_long item;
455 char *value;
456
457 ZEND_PARSE_PARAMETERS_START(1, 1)
458 Z_PARAM_LONG(item)
459 ZEND_PARSE_PARAMETERS_END();
460
461 switch(item) { /* {{{ */
462 #ifdef ABDAY_1
463 case ABDAY_1:
464 case ABDAY_2:
465 case ABDAY_3:
466 case ABDAY_4:
467 case ABDAY_5:
468 case ABDAY_6:
469 case ABDAY_7:
470 #endif
471 #ifdef DAY_1
472 case DAY_1:
473 case DAY_2:
474 case DAY_3:
475 case DAY_4:
476 case DAY_5:
477 case DAY_6:
478 case DAY_7:
479 #endif
480 #ifdef ABMON_1
481 case ABMON_1:
482 case ABMON_2:
483 case ABMON_3:
484 case ABMON_4:
485 case ABMON_5:
486 case ABMON_6:
487 case ABMON_7:
488 case ABMON_8:
489 case ABMON_9:
490 case ABMON_10:
491 case ABMON_11:
492 case ABMON_12:
493 #endif
494 #ifdef MON_1
495 case MON_1:
496 case MON_2:
497 case MON_3:
498 case MON_4:
499 case MON_5:
500 case MON_6:
501 case MON_7:
502 case MON_8:
503 case MON_9:
504 case MON_10:
505 case MON_11:
506 case MON_12:
507 #endif
508 #ifdef AM_STR
509 case AM_STR:
510 #endif
511 #ifdef PM_STR
512 case PM_STR:
513 #endif
514 #ifdef D_T_FMT
515 case D_T_FMT:
516 #endif
517 #ifdef D_FMT
518 case D_FMT:
519 #endif
520 #ifdef T_FMT
521 case T_FMT:
522 #endif
523 #ifdef T_FMT_AMPM
524 case T_FMT_AMPM:
525 #endif
526 #ifdef ERA
527 case ERA:
528 #endif
529 #ifdef ERA_YEAR
530 case ERA_YEAR:
531 #endif
532 #ifdef ERA_D_T_FMT
533 case ERA_D_T_FMT:
534 #endif
535 #ifdef ERA_D_FMT
536 case ERA_D_FMT:
537 #endif
538 #ifdef ERA_T_FMT
539 case ERA_T_FMT:
540 #endif
541 #ifdef ALT_DIGITS
542 case ALT_DIGITS:
543 #endif
544 #ifdef INT_CURR_SYMBOL
545 case INT_CURR_SYMBOL:
546 #endif
547 #ifdef CURRENCY_SYMBOL
548 case CURRENCY_SYMBOL:
549 #endif
550 #ifdef CRNCYSTR
551 case CRNCYSTR:
552 #endif
553 #ifdef MON_DECIMAL_POINT
554 case MON_DECIMAL_POINT:
555 #endif
556 #ifdef MON_THOUSANDS_SEP
557 case MON_THOUSANDS_SEP:
558 #endif
559 #ifdef MON_GROUPING
560 case MON_GROUPING:
561 #endif
562 #ifdef POSITIVE_SIGN
563 case POSITIVE_SIGN:
564 #endif
565 #ifdef NEGATIVE_SIGN
566 case NEGATIVE_SIGN:
567 #endif
568 #ifdef INT_FRAC_DIGITS
569 case INT_FRAC_DIGITS:
570 #endif
571 #ifdef FRAC_DIGITS
572 case FRAC_DIGITS:
573 #endif
574 #ifdef P_CS_PRECEDES
575 case P_CS_PRECEDES:
576 #endif
577 #ifdef P_SEP_BY_SPACE
578 case P_SEP_BY_SPACE:
579 #endif
580 #ifdef N_CS_PRECEDES
581 case N_CS_PRECEDES:
582 #endif
583 #ifdef N_SEP_BY_SPACE
584 case N_SEP_BY_SPACE:
585 #endif
586 #ifdef P_SIGN_POSN
587 case P_SIGN_POSN:
588 #endif
589 #ifdef N_SIGN_POSN
590 case N_SIGN_POSN:
591 #endif
592 #ifdef DECIMAL_POINT
593 case DECIMAL_POINT:
594 #elif defined(RADIXCHAR)
595 case RADIXCHAR:
596 #endif
597 #ifdef THOUSANDS_SEP
598 case THOUSANDS_SEP:
599 #elif defined(THOUSEP)
600 case THOUSEP:
601 #endif
602 #ifdef GROUPING
603 case GROUPING:
604 #endif
605 #ifdef YESEXPR
606 case YESEXPR:
607 #endif
608 #ifdef NOEXPR
609 case NOEXPR:
610 #endif
611 #ifdef YESSTR
612 case YESSTR:
613 #endif
614 #ifdef NOSTR
615 case NOSTR:
616 #endif
617 #ifdef CODESET
618 case CODESET:
619 #endif
620 break;
621 default:
622 php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
623 RETURN_FALSE;
624 }
625 /* }}} */
626
627 value = nl_langinfo(item);
628 if (value == NULL) {
629 RETURN_FALSE;
630 } else {
631 RETURN_STRING(value);
632 }
633 }
634 #endif
635 /* }}} */
636
637 /* {{{ Compares two strings using the current locale */
PHP_FUNCTION(strcoll)638 PHP_FUNCTION(strcoll)
639 {
640 zend_string *s1, *s2;
641
642 ZEND_PARSE_PARAMETERS_START(2, 2)
643 Z_PARAM_STR(s1)
644 Z_PARAM_STR(s2)
645 ZEND_PARSE_PARAMETERS_END();
646
647 RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
648 (const char *) ZSTR_VAL(s2)));
649 }
650 /* }}} */
651
652 /* {{{ php_charmask
653 * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
654 * it needs to be incrementing.
655 * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
656 */
php_charmask(const unsigned char * input,size_t len,char * mask)657 static inline zend_result php_charmask(const unsigned char *input, size_t len, char *mask)
658 {
659 const unsigned char *end;
660 unsigned char c;
661 zend_result result = SUCCESS;
662
663 memset(mask, 0, 256);
664 for (end = input+len; input < end; input++) {
665 c=*input;
666 if ((input+3 < end) && input[1] == '.' && input[2] == '.'
667 && input[3] >= c) {
668 memset(mask+c, 1, input[3] - c + 1);
669 input+=3;
670 } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
671 /* Error, try to be as helpful as possible:
672 (a range ending/starting with '.' won't be captured here) */
673 if (end-len >= input) { /* there was no 'left' char */
674 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
675 result = FAILURE;
676 continue;
677 }
678 if (input+2 >= end) { /* there is no 'right' char */
679 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
680 result = FAILURE;
681 continue;
682 }
683 if (input[-1] > input[2]) { /* wrong order */
684 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
685 result = FAILURE;
686 continue;
687 }
688 /* FIXME: better error (a..b..c is the only left possibility?) */
689 php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
690 result = FAILURE;
691 continue;
692 } else {
693 mask[c]=1;
694 }
695 }
696 return result;
697 }
698 /* }}} */
699
700 /* {{{ php_trim_int()
701 * mode 1 : trim left
702 * mode 2 : trim right
703 * mode 3 : trim left and right
704 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
705 */
php_trim_int(zend_string * str,const char * what,size_t what_len,int mode)706 static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
707 {
708 const char *start = ZSTR_VAL(str);
709 const char *end = start + ZSTR_LEN(str);
710 char mask[256];
711
712 if (what) {
713 if (what_len == 1) {
714 char p = *what;
715 if (mode & 1) {
716 while (start != end) {
717 if (*start == p) {
718 start++;
719 } else {
720 break;
721 }
722 }
723 }
724 if (mode & 2) {
725 while (start != end) {
726 if (*(end-1) == p) {
727 end--;
728 } else {
729 break;
730 }
731 }
732 }
733 } else {
734 php_charmask((const unsigned char *) what, what_len, mask);
735
736 if (mode & 1) {
737 while (start != end) {
738 if (mask[(unsigned char)*start]) {
739 start++;
740 } else {
741 break;
742 }
743 }
744 }
745 if (mode & 2) {
746 while (start != end) {
747 if (mask[(unsigned char)*(end-1)]) {
748 end--;
749 } else {
750 break;
751 }
752 }
753 }
754 }
755 } else {
756 if (mode & 1) {
757 while (start != end) {
758 unsigned char c = (unsigned char)*start;
759
760 if (c <= ' ' &&
761 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
762 start++;
763 } else {
764 break;
765 }
766 }
767 }
768 if (mode & 2) {
769 while (start != end) {
770 unsigned char c = (unsigned char)*(end-1);
771
772 if (c <= ' ' &&
773 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
774 end--;
775 } else {
776 break;
777 }
778 }
779 }
780 }
781
782 if (ZSTR_LEN(str) == end - start) {
783 return zend_string_copy(str);
784 } else if (end - start == 0) {
785 return ZSTR_EMPTY_ALLOC();
786 } else {
787 return zend_string_init(start, end - start, 0);
788 }
789 }
790 /* }}} */
791
792 /* {{{ php_trim_int()
793 * mode 1 : trim left
794 * mode 2 : trim right
795 * mode 3 : trim left and right
796 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
797 */
php_trim(zend_string * str,const char * what,size_t what_len,int mode)798 PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
799 {
800 return php_trim_int(str, what, what_len, mode);
801 }
802 /* }}} */
803
804 /* {{{ php_do_trim
805 * Base for trim(), rtrim() and ltrim() functions.
806 */
php_do_trim(INTERNAL_FUNCTION_PARAMETERS,int mode)807 static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
808 {
809 zend_string *str;
810 zend_string *what = NULL;
811
812 ZEND_PARSE_PARAMETERS_START(1, 2)
813 Z_PARAM_STR(str)
814 Z_PARAM_OPTIONAL
815 Z_PARAM_STR(what)
816 ZEND_PARSE_PARAMETERS_END();
817
818 ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
819 }
820 /* }}} */
821
822 /* {{{ Strips whitespace from the beginning and end of a string */
PHP_FUNCTION(trim)823 PHP_FUNCTION(trim)
824 {
825 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
826 }
827 /* }}} */
828
829 /* {{{ Removes trailing whitespace */
PHP_FUNCTION(rtrim)830 PHP_FUNCTION(rtrim)
831 {
832 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
833 }
834 /* }}} */
835
836 /* {{{ Strips whitespace from the beginning of a string */
PHP_FUNCTION(ltrim)837 PHP_FUNCTION(ltrim)
838 {
839 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
840 }
841 /* }}} */
842
843 /* {{{ Wraps buffer to selected number of characters using string break char */
PHP_FUNCTION(wordwrap)844 PHP_FUNCTION(wordwrap)
845 {
846 zend_string *text;
847 char *breakchar = "\n";
848 size_t newtextlen, chk, breakchar_len = 1;
849 size_t alloced;
850 zend_long current = 0, laststart = 0, lastspace = 0;
851 zend_long linelength = 75;
852 bool docut = 0;
853 zend_string *newtext;
854
855 ZEND_PARSE_PARAMETERS_START(1, 4)
856 Z_PARAM_STR(text)
857 Z_PARAM_OPTIONAL
858 Z_PARAM_LONG(linelength)
859 Z_PARAM_STRING(breakchar, breakchar_len)
860 Z_PARAM_BOOL(docut)
861 ZEND_PARSE_PARAMETERS_END();
862
863 if (ZSTR_LEN(text) == 0) {
864 RETURN_EMPTY_STRING();
865 }
866
867 if (breakchar_len == 0) {
868 zend_argument_value_error(3, "cannot be empty");
869 RETURN_THROWS();
870 }
871
872 if (linelength == 0 && docut) {
873 zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
874 RETURN_THROWS();
875 }
876
877 /* Special case for a single-character break as it needs no
878 additional storage space */
879 if (breakchar_len == 1 && !docut) {
880 newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
881
882 laststart = lastspace = 0;
883 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
884 if (ZSTR_VAL(text)[current] == breakchar[0]) {
885 laststart = lastspace = current + 1;
886 } else if (ZSTR_VAL(text)[current] == ' ') {
887 if (current - laststart >= linelength) {
888 ZSTR_VAL(newtext)[current] = breakchar[0];
889 laststart = current + 1;
890 }
891 lastspace = current;
892 } else if (current - laststart >= linelength && laststart != lastspace) {
893 ZSTR_VAL(newtext)[lastspace] = breakchar[0];
894 laststart = lastspace + 1;
895 }
896 }
897
898 RETURN_NEW_STR(newtext);
899 } else {
900 /* Multiple character line break or forced cut */
901 if (linelength > 0) {
902 chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
903 newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
904 alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
905 } else {
906 chk = ZSTR_LEN(text);
907 alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
908 newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
909 }
910
911 /* now keep track of the actual new text length */
912 newtextlen = 0;
913
914 laststart = lastspace = 0;
915 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
916 if (chk == 0) {
917 alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
918 newtext = zend_string_extend(newtext, alloced, 0);
919 chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
920 }
921 /* when we hit an existing break, copy to new buffer, and
922 * fix up laststart and lastspace */
923 if (ZSTR_VAL(text)[current] == breakchar[0]
924 && current + breakchar_len < ZSTR_LEN(text)
925 && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
926 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
927 newtextlen += current - laststart + breakchar_len;
928 current += breakchar_len - 1;
929 laststart = lastspace = current + 1;
930 chk--;
931 }
932 /* if it is a space, check if it is at the line boundary,
933 * copy and insert a break, or just keep track of it */
934 else if (ZSTR_VAL(text)[current] == ' ') {
935 if (current - laststart >= linelength) {
936 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
937 newtextlen += current - laststart;
938 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
939 newtextlen += breakchar_len;
940 laststart = current + 1;
941 chk--;
942 }
943 lastspace = current;
944 }
945 /* if we are cutting, and we've accumulated enough
946 * characters, and we haven't see a space for this line,
947 * copy and insert a break. */
948 else if (current - laststart >= linelength
949 && docut && laststart >= lastspace) {
950 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
951 newtextlen += current - laststart;
952 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
953 newtextlen += breakchar_len;
954 laststart = lastspace = current;
955 chk--;
956 }
957 /* if the current word puts us over the linelength, copy
958 * back up until the last space, insert a break, and move
959 * up the laststart */
960 else if (current - laststart >= linelength
961 && laststart < lastspace) {
962 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
963 newtextlen += lastspace - laststart;
964 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
965 newtextlen += breakchar_len;
966 laststart = lastspace = lastspace + 1;
967 chk--;
968 }
969 }
970
971 /* copy over any stragglers */
972 if (laststart != current) {
973 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
974 newtextlen += current - laststart;
975 }
976
977 ZSTR_VAL(newtext)[newtextlen] = '\0';
978 /* free unused memory */
979 newtext = zend_string_truncate(newtext, newtextlen, 0);
980
981 RETURN_NEW_STR(newtext);
982 }
983 }
984 /* }}} */
985
986 /* {{{ php_explode */
php_explode(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)987 PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
988 {
989 const char *p1 = ZSTR_VAL(str);
990 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
991 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
992 zval tmp;
993
994 if (p2 == NULL) {
995 ZVAL_STR_COPY(&tmp, str);
996 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
997 } else {
998 zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
999 ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
1000 do {
1001 ZEND_HASH_FILL_GROW();
1002 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
1003 ZEND_HASH_FILL_NEXT();
1004 p1 = p2 + ZSTR_LEN(delim);
1005 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1006 } while (p2 != NULL && --limit > 1);
1007
1008 if (p1 <= endp) {
1009 ZEND_HASH_FILL_GROW();
1010 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
1011 ZEND_HASH_FILL_NEXT();
1012 }
1013 } ZEND_HASH_FILL_END();
1014 }
1015 }
1016 /* }}} */
1017
1018 /* {{{ php_explode_negative_limit */
php_explode_negative_limit(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)1019 PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
1020 {
1021 #define EXPLODE_ALLOC_STEP 64
1022 const char *p1 = ZSTR_VAL(str);
1023 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
1024 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1025 zval tmp;
1026
1027 if (p2 == NULL) {
1028 /*
1029 do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
1030 by doing nothing we return empty array
1031 */
1032 } else {
1033 size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
1034 zend_long i, to_return;
1035 const char **positions = emalloc(allocated * sizeof(char *));
1036
1037 positions[found++] = p1;
1038 do {
1039 if (found >= allocated) {
1040 allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
1041 positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
1042 }
1043 positions[found++] = p1 = p2 + ZSTR_LEN(delim);
1044 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1045 } while (p2 != NULL);
1046
1047 to_return = limit + found;
1048 /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
1049 for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
1050 ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
1051 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1052 }
1053 efree((void *)positions);
1054 }
1055 #undef EXPLODE_ALLOC_STEP
1056 }
1057 /* }}} */
1058
1059 /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
PHP_FUNCTION(explode)1060 PHP_FUNCTION(explode)
1061 {
1062 zend_string *str, *delim;
1063 zend_long limit = ZEND_LONG_MAX; /* No limit */
1064 zval tmp;
1065
1066 ZEND_PARSE_PARAMETERS_START(2, 3)
1067 Z_PARAM_STR(delim)
1068 Z_PARAM_STR(str)
1069 Z_PARAM_OPTIONAL
1070 Z_PARAM_LONG(limit)
1071 ZEND_PARSE_PARAMETERS_END();
1072
1073 if (ZSTR_LEN(delim) == 0) {
1074 zend_argument_value_error(1, "cannot be empty");
1075 RETURN_THROWS();
1076 }
1077
1078 array_init(return_value);
1079
1080 if (ZSTR_LEN(str) == 0) {
1081 if (limit >= 0) {
1082 ZVAL_EMPTY_STRING(&tmp);
1083 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
1084 }
1085 return;
1086 }
1087
1088 if (limit > 1) {
1089 php_explode(delim, str, return_value, limit);
1090 } else if (limit < 0) {
1091 php_explode_negative_limit(delim, str, return_value, limit);
1092 } else {
1093 ZVAL_STR_COPY(&tmp, str);
1094 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
1095 }
1096 }
1097 /* }}} */
1098
1099 /* {{{ An alias for implode */
1100 /* }}} */
1101
1102 /* {{{ php_implode */
php_implode(const zend_string * glue,HashTable * pieces,zval * return_value)1103 PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
1104 {
1105 zval *tmp;
1106 uint32_t numelems;
1107 zend_string *str;
1108 char *cptr;
1109 size_t len = 0;
1110 struct {
1111 zend_string *str;
1112 zend_long lval;
1113 } *strings, *ptr;
1114 ALLOCA_FLAG(use_heap)
1115
1116 numelems = zend_hash_num_elements(pieces);
1117
1118 if (numelems == 0) {
1119 RETURN_EMPTY_STRING();
1120 } else if (numelems == 1) {
1121 /* loop to search the first not undefined element... */
1122 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
1123 RETURN_STR(zval_get_string(tmp));
1124 } ZEND_HASH_FOREACH_END();
1125 }
1126
1127 ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
1128
1129 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
1130 if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
1131 ptr->str = Z_STR_P(tmp);
1132 len += ZSTR_LEN(ptr->str);
1133 ptr->lval = 0;
1134 ptr++;
1135 } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
1136 zend_long val = Z_LVAL_P(tmp);
1137
1138 ptr->str = NULL;
1139 ptr->lval = val;
1140 ptr++;
1141 if (val <= 0) {
1142 len++;
1143 }
1144 while (val) {
1145 val /= 10;
1146 len++;
1147 }
1148 } else {
1149 ptr->str = zval_get_string_func(tmp);
1150 len += ZSTR_LEN(ptr->str);
1151 ptr->lval = 1;
1152 ptr++;
1153 }
1154 } ZEND_HASH_FOREACH_END();
1155
1156 /* numelems cannot be 0, we checked above */
1157 str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
1158 cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
1159 *cptr = 0;
1160
1161 while (1) {
1162 ptr--;
1163 if (EXPECTED(ptr->str)) {
1164 cptr -= ZSTR_LEN(ptr->str);
1165 memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
1166 if (ptr->lval) {
1167 zend_string_release_ex(ptr->str, 0);
1168 }
1169 } else {
1170 char *oldPtr = cptr;
1171 char oldVal = *cptr;
1172 cptr = zend_print_long_to_buf(cptr, ptr->lval);
1173 *oldPtr = oldVal;
1174 }
1175
1176 if (ptr == strings) {
1177 break;
1178 }
1179
1180 cptr -= ZSTR_LEN(glue);
1181 memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
1182 }
1183
1184 free_alloca(strings, use_heap);
1185 RETURN_NEW_STR(str);
1186 }
1187 /* }}} */
1188
1189 /* {{{ Joins array elements placing glue string between items and return one string */
PHP_FUNCTION(implode)1190 PHP_FUNCTION(implode)
1191 {
1192 zend_string *arg1_str = NULL;
1193 HashTable *arg1_array = NULL;
1194 zend_array *pieces = NULL;
1195
1196 ZEND_PARSE_PARAMETERS_START(1, 2)
1197 Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
1198 Z_PARAM_OPTIONAL
1199 Z_PARAM_ARRAY_HT_OR_NULL(pieces)
1200 ZEND_PARSE_PARAMETERS_END();
1201
1202 if (pieces == NULL) {
1203 if (arg1_array == NULL) {
1204 zend_type_error("%s(): Argument #1 ($array) must be of type array, string given", get_active_function_name());
1205 RETURN_THROWS();
1206 }
1207
1208 arg1_str = ZSTR_EMPTY_ALLOC();
1209 pieces = arg1_array;
1210 } else {
1211 if (arg1_str == NULL) {
1212 zend_argument_type_error(1, "must be of type string, array given");
1213 RETURN_THROWS();
1214 }
1215 }
1216
1217 php_implode(arg1_str, pieces, return_value);
1218 }
1219 /* }}} */
1220
1221 #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
1222
1223 /* {{{ Tokenize a string */
PHP_FUNCTION(strtok)1224 PHP_FUNCTION(strtok)
1225 {
1226 zend_string *str, *tok = NULL;
1227 char *token;
1228 char *token_end;
1229 char *p;
1230 char *pe;
1231 size_t skipped = 0;
1232
1233 ZEND_PARSE_PARAMETERS_START(1, 2)
1234 Z_PARAM_STR(str)
1235 Z_PARAM_OPTIONAL
1236 Z_PARAM_STR_OR_NULL(tok)
1237 ZEND_PARSE_PARAMETERS_END();
1238
1239 if (!tok) {
1240 tok = str;
1241 } else {
1242 if (BG(strtok_string)) {
1243 zend_string_release(BG(strtok_string));
1244 }
1245 BG(strtok_string) = zend_string_copy(str);
1246 BG(strtok_last) = ZSTR_VAL(str);
1247 BG(strtok_len) = ZSTR_LEN(str);
1248 }
1249
1250 if (!BG(strtok_string)) {
1251 /* String to tokenize not set. */
1252 // TODO: Should this warn?
1253 RETURN_FALSE;
1254 }
1255
1256 p = BG(strtok_last); /* Where we start to search */
1257 pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
1258 if (p >= pe) {
1259 /* Reached the end of the string. */
1260 RETURN_FALSE;
1261 }
1262
1263 token = ZSTR_VAL(tok);
1264 token_end = token + ZSTR_LEN(tok);
1265
1266 while (token < token_end) {
1267 STRTOK_TABLE(token++) = 1;
1268 }
1269
1270 /* Skip leading delimiters */
1271 while (STRTOK_TABLE(p)) {
1272 if (++p >= pe) {
1273 /* no other chars left */
1274 goto return_false;
1275 }
1276 skipped++;
1277 }
1278
1279 /* We know at this place that *p is no delimiter, so skip it */
1280 while (++p < pe) {
1281 if (STRTOK_TABLE(p)) {
1282 goto return_token;
1283 }
1284 }
1285
1286 if (p - BG(strtok_last)) {
1287 return_token:
1288 RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
1289 BG(strtok_last) = p + 1;
1290 } else {
1291 return_false:
1292 RETVAL_FALSE;
1293 zend_string_release(BG(strtok_string));
1294 BG(strtok_string) = NULL;
1295 }
1296
1297 /* Restore table -- usually faster then memset'ing the table on every invocation */
1298 token = ZSTR_VAL(tok);
1299 while (token < token_end) {
1300 STRTOK_TABLE(token++) = 0;
1301 }
1302 }
1303 /* }}} */
1304
1305 /* {{{ php_strtoupper */
php_strtoupper(char * s,size_t len)1306 PHPAPI char *php_strtoupper(char *s, size_t len)
1307 {
1308 zend_str_toupper(s, len);
1309 return s;
1310 }
1311 /* }}} */
1312
1313 /* {{{ php_string_toupper */
php_string_toupper(zend_string * s)1314 PHPAPI zend_string *php_string_toupper(zend_string *s)
1315 {
1316 return zend_string_toupper(s);
1317 }
1318 /* }}} */
1319
1320 /* {{{ Makes a string uppercase */
PHP_FUNCTION(strtoupper)1321 PHP_FUNCTION(strtoupper)
1322 {
1323 zend_string *arg;
1324
1325 ZEND_PARSE_PARAMETERS_START(1, 1)
1326 Z_PARAM_STR(arg)
1327 ZEND_PARSE_PARAMETERS_END();
1328
1329 RETURN_STR(zend_string_toupper(arg));
1330 }
1331 /* }}} */
1332
1333 /* {{{ php_strtolower */
php_strtolower(char * s,size_t len)1334 PHPAPI char *php_strtolower(char *s, size_t len)
1335 {
1336 zend_str_tolower(s, len);
1337 return s;
1338 }
1339 /* }}} */
1340
1341 /* {{{ php_string_tolower */
php_string_tolower(zend_string * s)1342 PHPAPI zend_string *php_string_tolower(zend_string *s)
1343 {
1344 return zend_string_tolower(s);
1345 }
1346 /* }}} */
1347
1348 /* {{{ Makes a string lowercase */
PHP_FUNCTION(strtolower)1349 PHP_FUNCTION(strtolower)
1350 {
1351 zend_string *str;
1352
1353 ZEND_PARSE_PARAMETERS_START(1, 1)
1354 Z_PARAM_STR(str)
1355 ZEND_PARSE_PARAMETERS_END();
1356
1357 RETURN_STR(zend_string_tolower(str));
1358 }
1359 /* }}} */
1360
1361 #if defined(PHP_WIN32)
_is_basename_start(const char * start,const char * pos)1362 static bool _is_basename_start(const char *start, const char *pos)
1363 {
1364 if (pos - start >= 1
1365 && *(pos-1) != '/'
1366 && *(pos-1) != '\\') {
1367 if (pos - start == 1) {
1368 return 1;
1369 } else if (*(pos-2) == '/' || *(pos-2) == '\\') {
1370 return 1;
1371 } else if (*(pos-2) == ':'
1372 && _is_basename_start(start, pos - 2)) {
1373 return 1;
1374 }
1375 }
1376 return 0;
1377 }
1378 #endif
1379
1380 /* {{{ php_basename */
php_basename(const char * s,size_t len,const char * suffix,size_t suffix_len)1381 PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
1382 {
1383 const char *basename_start;
1384 const char *basename_end;
1385
1386 if (CG(ascii_compatible_locale)) {
1387 basename_end = s + len - 1;
1388
1389 /* Strip trailing slashes */
1390 while (basename_end >= s
1391 #ifdef PHP_WIN32
1392 && (*basename_end == '/'
1393 || *basename_end == '\\'
1394 || (*basename_end == ':'
1395 && _is_basename_start(s, basename_end)))) {
1396 #else
1397 && *basename_end == '/') {
1398 #endif
1399 basename_end--;
1400 }
1401 if (basename_end < s) {
1402 return ZSTR_EMPTY_ALLOC();
1403 }
1404
1405 /* Extract filename */
1406 basename_start = basename_end;
1407 basename_end++;
1408 while (basename_start > s
1409 #ifdef PHP_WIN32
1410 && *(basename_start-1) != '/'
1411 && *(basename_start-1) != '\\') {
1412
1413 if (*(basename_start-1) == ':' &&
1414 _is_basename_start(s, basename_start - 1)) {
1415 break;
1416 }
1417 #else
1418 && *(basename_start-1) != '/') {
1419 #endif
1420 basename_start--;
1421 }
1422 } else {
1423 /* State 0 is directly after a directory separator (or at the start of the string).
1424 * State 1 is everything else. */
1425 int state = 0;
1426
1427 basename_start = s;
1428 basename_end = s;
1429 while (len > 0) {
1430 int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
1431
1432 switch (inc_len) {
1433 case 0:
1434 goto quit_loop;
1435 case 1:
1436 #ifdef PHP_WIN32
1437 if (*s == '/' || *s == '\\') {
1438 #else
1439 if (*s == '/') {
1440 #endif
1441 if (state == 1) {
1442 state = 0;
1443 basename_end = s;
1444 }
1445 #ifdef PHP_WIN32
1446 /* Catch relative paths in c:file.txt style. They're not to confuse
1447 with the NTFS streams. This part ensures also, that no drive
1448 letter traversing happens. */
1449 } else if ((*s == ':' && (s - basename_start == 1))) {
1450 if (state == 0) {
1451 basename_start = s;
1452 state = 1;
1453 } else {
1454 basename_end = s;
1455 state = 0;
1456 }
1457 #endif
1458 } else {
1459 if (state == 0) {
1460 basename_start = s;
1461 state = 1;
1462 }
1463 }
1464 break;
1465 default:
1466 if (inc_len < 0) {
1467 /* If character is invalid, treat it like other non-significant characters. */
1468 inc_len = 1;
1469 php_mb_reset();
1470 }
1471 if (state == 0) {
1472 basename_start = s;
1473 state = 1;
1474 }
1475 break;
1476 }
1477 s += inc_len;
1478 len -= inc_len;
1479 }
1480
1481 quit_loop:
1482 if (state == 1) {
1483 basename_end = s;
1484 }
1485 }
1486
1487 if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
1488 memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
1489 basename_end -= suffix_len;
1490 }
1491
1492 return zend_string_init(basename_start, basename_end - basename_start, 0);
1493 }
1494 /* }}} */
1495
1496 /* {{{ Returns the filename component of the path */
1497 PHP_FUNCTION(basename)
1498 {
1499 char *string, *suffix = NULL;
1500 size_t string_len, suffix_len = 0;
1501
1502 ZEND_PARSE_PARAMETERS_START(1, 2)
1503 Z_PARAM_STRING(string, string_len)
1504 Z_PARAM_OPTIONAL
1505 Z_PARAM_STRING(suffix, suffix_len)
1506 ZEND_PARSE_PARAMETERS_END();
1507
1508 RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
1509 }
1510 /* }}} */
1511
1512 /* {{{ php_dirname
1513 Returns directory name component of path */
1514 PHPAPI size_t php_dirname(char *path, size_t len)
1515 {
1516 return zend_dirname(path, len);
1517 }
1518 /* }}} */
1519
1520 /* {{{ Returns the directory name component of the path */
1521 PHP_FUNCTION(dirname)
1522 {
1523 char *str;
1524 size_t str_len;
1525 zend_string *ret;
1526 zend_long levels = 1;
1527
1528 ZEND_PARSE_PARAMETERS_START(1, 2)
1529 Z_PARAM_STRING(str, str_len)
1530 Z_PARAM_OPTIONAL
1531 Z_PARAM_LONG(levels)
1532 ZEND_PARSE_PARAMETERS_END();
1533
1534 ret = zend_string_init(str, str_len, 0);
1535
1536 if (levels == 1) {
1537 /* Default case */
1538 #ifdef PHP_WIN32
1539 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len);
1540 #else
1541 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len);
1542 #endif
1543 } else if (levels < 1) {
1544 zend_argument_value_error(2, "must be greater than or equal to 1");
1545 zend_string_efree(ret);
1546 RETURN_THROWS();
1547 } else {
1548 /* Some levels up */
1549 do {
1550 #ifdef PHP_WIN32
1551 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1552 #else
1553 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1554 #endif
1555 } while (ZSTR_LEN(ret) < str_len && --levels);
1556 }
1557
1558 RETURN_NEW_STR(ret);
1559 }
1560 /* }}} */
1561
1562 /* {{{ Returns information about a certain string */
1563 PHP_FUNCTION(pathinfo)
1564 {
1565 zval tmp;
1566 char *path, *dirname;
1567 size_t path_len;
1568 bool have_basename;
1569 zend_long opt = PHP_PATHINFO_ALL;
1570 zend_string *ret = NULL;
1571
1572 ZEND_PARSE_PARAMETERS_START(1, 2)
1573 Z_PARAM_STRING(path, path_len)
1574 Z_PARAM_OPTIONAL
1575 Z_PARAM_LONG(opt)
1576 ZEND_PARSE_PARAMETERS_END();
1577
1578 have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
1579
1580 array_init(&tmp);
1581
1582 if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
1583 dirname = estrndup(path, path_len);
1584 php_dirname(dirname, path_len);
1585 if (*dirname) {
1586 add_assoc_string(&tmp, "dirname", dirname);
1587 }
1588 efree(dirname);
1589 }
1590
1591 if (have_basename) {
1592 ret = php_basename(path, path_len, NULL, 0);
1593 add_assoc_str(&tmp, "basename", zend_string_copy(ret));
1594 }
1595
1596 if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
1597 const char *p;
1598 ptrdiff_t idx;
1599
1600 if (!have_basename) {
1601 ret = php_basename(path, path_len, NULL, 0);
1602 }
1603
1604 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1605
1606 if (p) {
1607 idx = p - ZSTR_VAL(ret);
1608 add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
1609 }
1610 }
1611
1612 if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
1613 const char *p;
1614 ptrdiff_t idx;
1615
1616 /* Have we already looked up the basename? */
1617 if (!have_basename && !ret) {
1618 ret = php_basename(path, path_len, NULL, 0);
1619 }
1620
1621 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1622
1623 idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
1624 add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
1625 }
1626
1627 if (ret) {
1628 zend_string_release_ex(ret, 0);
1629 }
1630
1631 if (opt == PHP_PATHINFO_ALL) {
1632 RETURN_COPY_VALUE(&tmp);
1633 } else {
1634 zval *element;
1635 if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
1636 RETVAL_COPY_DEREF(element);
1637 } else {
1638 RETVAL_EMPTY_STRING();
1639 }
1640 zval_ptr_dtor(&tmp);
1641 }
1642 }
1643 /* }}} */
1644
1645 /* {{{ php_stristr
1646 case insensitive strstr */
1647 PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
1648 {
1649 return (char*)php_memnistr(s, t, t_len, s + s_len);
1650 }
1651 /* }}} */
1652
1653 /* {{{ php_strspn */
1654 PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1655 {
1656 const char *p = s1, *spanp;
1657 char c = *p;
1658
1659 cont:
1660 for (spanp = s2; p != s1_end && spanp != s2_end;) {
1661 if (*spanp++ == c) {
1662 c = *(++p);
1663 goto cont;
1664 }
1665 }
1666 return (p - s1);
1667 }
1668 /* }}} */
1669
1670 /* {{{ php_strcspn */
1671 PHPAPI size_t php_strcspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1672 {
1673 const char *p, *spanp;
1674 char c = *s1;
1675
1676 for (p = s1;;) {
1677 spanp = s2;
1678 do {
1679 if (*spanp == c || p == s1_end) {
1680 return p - s1;
1681 }
1682 } while (spanp++ < (s2_end - 1));
1683 c = *++p;
1684 }
1685 /* NOTREACHED */
1686 }
1687 /* }}} */
1688
1689 /* {{{ Finds first occurrence of a string within another, case insensitive */
1690 PHP_FUNCTION(stristr)
1691 {
1692 zend_string *haystack, *needle;
1693 const char *found = NULL;
1694 size_t found_offset;
1695 bool part = 0;
1696
1697 ZEND_PARSE_PARAMETERS_START(2, 3)
1698 Z_PARAM_STR(haystack)
1699 Z_PARAM_STR(needle)
1700 Z_PARAM_OPTIONAL
1701 Z_PARAM_BOOL(part)
1702 ZEND_PARSE_PARAMETERS_END();
1703
1704 found = php_stristr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle));
1705
1706 if (UNEXPECTED(!found)) {
1707 RETURN_FALSE;
1708 }
1709 found_offset = found - ZSTR_VAL(haystack);
1710 if (part) {
1711 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1712 }
1713 RETURN_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
1714 }
1715 /* }}} */
1716
1717 /* {{{ Finds first occurrence of a string within another */
1718 PHP_FUNCTION(strstr)
1719 {
1720 zend_string *haystack, *needle;
1721 const char *found = NULL;
1722 zend_long found_offset;
1723 bool part = 0;
1724
1725 ZEND_PARSE_PARAMETERS_START(2, 3)
1726 Z_PARAM_STR(haystack)
1727 Z_PARAM_STR(needle)
1728 Z_PARAM_OPTIONAL
1729 Z_PARAM_BOOL(part)
1730 ZEND_PARSE_PARAMETERS_END();
1731
1732 found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1733
1734 if (UNEXPECTED(!found)) {
1735 RETURN_FALSE;
1736 }
1737 found_offset = found - ZSTR_VAL(haystack);
1738 if (part) {
1739 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1740 }
1741 RETURN_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
1742 }
1743 /* }}} */
1744
1745 /* {{{ Checks if a string contains another */
1746 PHP_FUNCTION(str_contains)
1747 {
1748 zend_string *haystack, *needle;
1749
1750 ZEND_PARSE_PARAMETERS_START(2, 2)
1751 Z_PARAM_STR(haystack)
1752 Z_PARAM_STR(needle)
1753 ZEND_PARSE_PARAMETERS_END();
1754
1755 RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1756 }
1757 /* }}} */
1758
1759 /* {{{ Checks if haystack starts with needle */
1760 PHP_FUNCTION(str_starts_with)
1761 {
1762 zend_string *haystack, *needle;
1763
1764 ZEND_PARSE_PARAMETERS_START(2, 2)
1765 Z_PARAM_STR(haystack)
1766 Z_PARAM_STR(needle)
1767 ZEND_PARSE_PARAMETERS_END();
1768
1769 RETURN_BOOL(zend_string_starts_with(haystack, needle));
1770 }
1771 /* }}} */
1772
1773 /* {{{ Checks if haystack ends with needle */
1774 PHP_FUNCTION(str_ends_with)
1775 {
1776 zend_string *haystack, *needle;
1777
1778 ZEND_PARSE_PARAMETERS_START(2, 2)
1779 Z_PARAM_STR(haystack)
1780 Z_PARAM_STR(needle)
1781 ZEND_PARSE_PARAMETERS_END();
1782
1783 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1784 RETURN_FALSE;
1785 }
1786
1787 RETURN_BOOL(memcmp(
1788 ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
1789 ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1790 }
1791 /* }}} */
1792
1793 /* {{{ An alias for strstr */
1794 /* }}} */
1795
1796 /* {{{ Finds position of first occurrence of a string within another */
1797 PHP_FUNCTION(strpos)
1798 {
1799 zend_string *haystack, *needle;
1800 const char *found = NULL;
1801 zend_long offset = 0;
1802
1803 ZEND_PARSE_PARAMETERS_START(2, 3)
1804 Z_PARAM_STR(haystack)
1805 Z_PARAM_STR(needle)
1806 Z_PARAM_OPTIONAL
1807 Z_PARAM_LONG(offset)
1808 ZEND_PARSE_PARAMETERS_END();
1809
1810 if (offset < 0) {
1811 offset += (zend_long)ZSTR_LEN(haystack);
1812 }
1813 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1814 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1815 RETURN_THROWS();
1816 }
1817
1818 found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
1819 ZSTR_VAL(needle), ZSTR_LEN(needle),
1820 ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1821
1822 if (UNEXPECTED(!found)) {
1823 RETURN_FALSE;
1824 }
1825 RETURN_LONG(found - ZSTR_VAL(haystack));
1826 }
1827 /* }}} */
1828
1829 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
1830 PHP_FUNCTION(stripos)
1831 {
1832 const char *found = NULL;
1833 zend_string *haystack, *needle;
1834 zend_long offset = 0;
1835
1836 ZEND_PARSE_PARAMETERS_START(2, 3)
1837 Z_PARAM_STR(haystack)
1838 Z_PARAM_STR(needle)
1839 Z_PARAM_OPTIONAL
1840 Z_PARAM_LONG(offset)
1841 ZEND_PARSE_PARAMETERS_END();
1842
1843 if (offset < 0) {
1844 offset += (zend_long)ZSTR_LEN(haystack);
1845 }
1846 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1847 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1848 RETURN_THROWS();
1849 }
1850
1851 found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset,
1852 ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1853
1854 if (UNEXPECTED(!found)) {
1855 RETURN_FALSE;
1856 }
1857 RETURN_LONG(found - ZSTR_VAL(haystack));
1858 }
1859 /* }}} */
1860
1861 /* {{{ Finds position of last occurrence of a string within another string */
1862 PHP_FUNCTION(strrpos)
1863 {
1864 zend_string *needle;
1865 zend_string *haystack;
1866 zend_long offset = 0;
1867 const char *p, *e, *found;
1868
1869 ZEND_PARSE_PARAMETERS_START(2, 3)
1870 Z_PARAM_STR(haystack)
1871 Z_PARAM_STR(needle)
1872 Z_PARAM_OPTIONAL
1873 Z_PARAM_LONG(offset)
1874 ZEND_PARSE_PARAMETERS_END();
1875
1876 if (offset >= 0) {
1877 if ((size_t)offset > ZSTR_LEN(haystack)) {
1878 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1879 RETURN_THROWS();
1880 }
1881 p = ZSTR_VAL(haystack) + (size_t)offset;
1882 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
1883 } else {
1884 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
1885 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1886 RETURN_THROWS();
1887 }
1888
1889 p = ZSTR_VAL(haystack);
1890 if ((size_t)-offset < ZSTR_LEN(needle)) {
1891 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
1892 } else {
1893 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
1894 }
1895 }
1896
1897 found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e);
1898
1899 if (UNEXPECTED(!found)) {
1900 RETURN_FALSE;
1901 }
1902 RETURN_LONG(found - ZSTR_VAL(haystack));
1903 }
1904 /* }}} */
1905
1906 /* {{{ Finds position of last occurrence of a string within another string */
1907 PHP_FUNCTION(strripos)
1908 {
1909 zend_string *needle;
1910 zend_string *haystack;
1911 zend_long offset = 0;
1912 const char *p, *e, *found;
1913 zend_string *needle_dup, *haystack_dup;
1914
1915 ZEND_PARSE_PARAMETERS_START(2, 3)
1916 Z_PARAM_STR(haystack)
1917 Z_PARAM_STR(needle)
1918 Z_PARAM_OPTIONAL
1919 Z_PARAM_LONG(offset)
1920 ZEND_PARSE_PARAMETERS_END();
1921
1922 if (ZSTR_LEN(needle) == 1) {
1923 /* Single character search can shortcut memcmps
1924 Can also avoid tolower emallocs */
1925 char lowered;
1926 if (offset >= 0) {
1927 if ((size_t)offset > ZSTR_LEN(haystack)) {
1928 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1929 RETURN_THROWS();
1930 }
1931 p = ZSTR_VAL(haystack) + (size_t)offset;
1932 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
1933 } else {
1934 p = ZSTR_VAL(haystack);
1935 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
1936 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1937 RETURN_THROWS();
1938 }
1939 e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
1940 }
1941 lowered = zend_tolower_ascii(*ZSTR_VAL(needle));
1942 while (e >= p) {
1943 if (zend_tolower_ascii(*e) == lowered) {
1944 RETURN_LONG(e - p + (offset > 0 ? offset : 0));
1945 }
1946 e--;
1947 }
1948 RETURN_FALSE;
1949 }
1950
1951 haystack_dup = zend_string_tolower(haystack);
1952 if (offset >= 0) {
1953 if ((size_t)offset > ZSTR_LEN(haystack)) {
1954 zend_string_release_ex(haystack_dup, 0);
1955 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1956 RETURN_THROWS();
1957 }
1958 p = ZSTR_VAL(haystack_dup) + offset;
1959 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
1960 } else {
1961 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
1962 zend_string_release_ex(haystack_dup, 0);
1963 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1964 RETURN_THROWS();
1965 }
1966
1967 p = ZSTR_VAL(haystack_dup);
1968 if ((size_t)-offset < ZSTR_LEN(needle)) {
1969 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
1970 } else {
1971 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
1972 }
1973 }
1974
1975 needle_dup = zend_string_tolower(needle);
1976 if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
1977 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
1978 zend_string_release_ex(needle_dup, 0);
1979 zend_string_release_ex(haystack_dup, 0);
1980 } else {
1981 zend_string_release_ex(needle_dup, 0);
1982 zend_string_release_ex(haystack_dup, 0);
1983 RETURN_FALSE;
1984 }
1985 }
1986 /* }}} */
1987
1988 /* {{{ Finds the last occurrence of a character in a string within another */
1989 PHP_FUNCTION(strrchr)
1990 {
1991 zend_string *haystack, *needle;
1992 const char *found = NULL;
1993 zend_long found_offset;
1994
1995 ZEND_PARSE_PARAMETERS_START(2, 2)
1996 Z_PARAM_STR(haystack)
1997 Z_PARAM_STR(needle)
1998 ZEND_PARSE_PARAMETERS_END();
1999
2000 found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
2001 if (UNEXPECTED(!found)) {
2002 RETURN_FALSE;
2003 }
2004 found_offset = found - ZSTR_VAL(haystack);
2005 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
2006 }
2007 /* }}} */
2008
2009 /* {{{ php_chunk_split */
2010 static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
2011 {
2012 char *q;
2013 const char *p;
2014 size_t chunks;
2015 size_t restlen;
2016 zend_string *dest;
2017
2018 chunks = srclen / chunklen;
2019 restlen = srclen - chunks * chunklen; /* srclen % chunklen */
2020 if (restlen) {
2021 /* We want chunks to be rounded up rather than rounded down.
2022 * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
2023 chunks++;
2024 }
2025
2026 dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
2027
2028 for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
2029 memcpy(q, p, chunklen);
2030 q += chunklen;
2031 memcpy(q, end, endlen);
2032 q += endlen;
2033 p += chunklen;
2034 }
2035
2036 if (restlen) {
2037 memcpy(q, p, restlen);
2038 q += restlen;
2039 memcpy(q, end, endlen);
2040 q += endlen;
2041 }
2042
2043 *q = '\0';
2044 ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
2045
2046 return dest;
2047 }
2048 /* }}} */
2049
2050 /* {{{ Returns split line */
2051 PHP_FUNCTION(chunk_split)
2052 {
2053 zend_string *str;
2054 char *end = "\r\n";
2055 size_t endlen = 2;
2056 zend_long chunklen = 76;
2057 zend_string *result;
2058
2059 ZEND_PARSE_PARAMETERS_START(1, 3)
2060 Z_PARAM_STR(str)
2061 Z_PARAM_OPTIONAL
2062 Z_PARAM_LONG(chunklen)
2063 Z_PARAM_STRING(end, endlen)
2064 ZEND_PARSE_PARAMETERS_END();
2065
2066 if (chunklen <= 0) {
2067 zend_argument_value_error(2, "must be greater than 0");
2068 RETURN_THROWS();
2069 }
2070
2071 if ((size_t)chunklen > ZSTR_LEN(str)) {
2072 /* to maintain BC, we must return original string + ending */
2073 result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
2074 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
2075 memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
2076 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2077 RETURN_NEW_STR(result);
2078 }
2079
2080 if (!ZSTR_LEN(str)) {
2081 RETURN_EMPTY_STRING();
2082 }
2083
2084 result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
2085
2086 RETURN_STR(result);
2087 }
2088 /* }}} */
2089
2090 /* {{{ Returns part of a string */
2091 PHP_FUNCTION(substr)
2092 {
2093 zend_string *str;
2094 zend_long l = 0, f;
2095 bool len_is_null = 1;
2096
2097 ZEND_PARSE_PARAMETERS_START(2, 3)
2098 Z_PARAM_STR(str)
2099 Z_PARAM_LONG(f)
2100 Z_PARAM_OPTIONAL
2101 Z_PARAM_LONG_OR_NULL(l, len_is_null)
2102 ZEND_PARSE_PARAMETERS_END();
2103
2104 if (f < 0) {
2105 /* if "from" position is negative, count start position from the end
2106 * of the string
2107 */
2108 if (-(size_t)f > ZSTR_LEN(str)) {
2109 f = 0;
2110 } else {
2111 f = (zend_long)ZSTR_LEN(str) + f;
2112 }
2113 } else if ((size_t)f > ZSTR_LEN(str)) {
2114 RETURN_EMPTY_STRING();
2115 }
2116
2117 if (!len_is_null) {
2118 if (l < 0) {
2119 /* if "length" position is negative, set it to the length
2120 * needed to stop that many chars from the end of the string
2121 */
2122 if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
2123 l = 0;
2124 } else {
2125 l = (zend_long)ZSTR_LEN(str) - f + l;
2126 }
2127 } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
2128 l = (zend_long)ZSTR_LEN(str) - f;
2129 }
2130 } else {
2131 l = (zend_long)ZSTR_LEN(str) - f;
2132 }
2133
2134 if (l == ZSTR_LEN(str)) {
2135 RETURN_STR_COPY(str);
2136 } else {
2137 RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
2138 }
2139 }
2140 /* }}} */
2141
2142 /* {{{ Replaces part of a string with another string */
2143 PHP_FUNCTION(substr_replace)
2144 {
2145 zend_string *str, *repl_str;
2146 HashTable *str_ht, *repl_ht;
2147 HashTable *from_ht;
2148 zend_long from_long;
2149 HashTable *len_ht = NULL;
2150 zend_long len_long;
2151 bool len_is_null = 1;
2152 zend_long l = 0;
2153 zend_long f;
2154 zend_string *result;
2155 HashPosition from_idx, repl_idx, len_idx;
2156 zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
2157
2158 ZEND_PARSE_PARAMETERS_START(3, 4)
2159 Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
2160 Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
2161 Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
2162 Z_PARAM_OPTIONAL
2163 Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
2164 ZEND_PARSE_PARAMETERS_END();
2165
2166 if (len_is_null) {
2167 if (str) {
2168 l = ZSTR_LEN(str);
2169 }
2170 } else if (!len_ht) {
2171 l = len_long;
2172 }
2173
2174 if (str) {
2175 if (from_ht) {
2176 zend_argument_type_error(3, "cannot be an array when working on a single string");
2177 RETURN_THROWS();
2178 }
2179 if (len_ht) {
2180 zend_argument_type_error(4, "cannot be an array when working on a single string");
2181 RETURN_THROWS();
2182 }
2183
2184 f = from_long;
2185
2186 /* if "from" position is negative, count start position from the end
2187 * of the string
2188 */
2189 if (f < 0) {
2190 f = (zend_long)ZSTR_LEN(str) + f;
2191 if (f < 0) {
2192 f = 0;
2193 }
2194 } else if ((size_t)f > ZSTR_LEN(str)) {
2195 f = ZSTR_LEN(str);
2196 }
2197 /* if "length" position is negative, set it to the length
2198 * needed to stop that many chars from the end of the string
2199 */
2200 if (l < 0) {
2201 l = ((zend_long)ZSTR_LEN(str) - f) + l;
2202 if (l < 0) {
2203 l = 0;
2204 }
2205 }
2206
2207 if ((size_t)l > ZSTR_LEN(str) || (l < 0 && (size_t)(-l) > ZSTR_LEN(str))) {
2208 l = ZSTR_LEN(str);
2209 }
2210
2211 if ((f + l) > (zend_long)ZSTR_LEN(str)) {
2212 l = ZSTR_LEN(str) - f;
2213 }
2214
2215 zend_string *tmp_repl_str = NULL;
2216 if (repl_ht) {
2217 repl_idx = 0;
2218 if (HT_IS_PACKED(repl_ht)) {
2219 while (repl_idx < repl_ht->nNumUsed) {
2220 tmp_repl = &repl_ht->arPacked[repl_idx];
2221 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2222 break;
2223 }
2224 repl_idx++;
2225 }
2226 } else {
2227 while (repl_idx < repl_ht->nNumUsed) {
2228 tmp_repl = &repl_ht->arData[repl_idx].val;
2229 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2230 break;
2231 }
2232 repl_idx++;
2233 }
2234 }
2235 if (repl_idx < repl_ht->nNumUsed) {
2236 repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2237 } else {
2238 repl_str = STR_EMPTY_ALLOC();
2239 }
2240 }
2241
2242 result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
2243
2244 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
2245 if (ZSTR_LEN(repl_str)) {
2246 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2247 }
2248 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
2249 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2250 zend_tmp_string_release(tmp_repl_str);
2251 RETURN_NEW_STR(result);
2252 } else { /* str is array of strings */
2253 zend_string *str_index = NULL;
2254 size_t result_len;
2255 zend_ulong num_index;
2256
2257 /* TODO
2258 if (!len_is_null && from_ht) {
2259 if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
2260 php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
2261 RETURN_STR_COPY(str);
2262 }
2263 }
2264 */
2265
2266 array_init(return_value);
2267
2268 from_idx = len_idx = repl_idx = 0;
2269
2270 ZEND_HASH_FOREACH_KEY_VAL(str_ht, num_index, str_index, tmp_str) {
2271 zend_string *tmp_orig_str;
2272 zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
2273
2274 if (from_ht) {
2275 if (HT_IS_PACKED(from_ht)) {
2276 while (from_idx < from_ht->nNumUsed) {
2277 tmp_from = &from_ht->arPacked[from_idx];
2278 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2279 break;
2280 }
2281 from_idx++;
2282 }
2283 } else {
2284 while (from_idx < from_ht->nNumUsed) {
2285 tmp_from = &from_ht->arData[from_idx].val;
2286 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2287 break;
2288 }
2289 from_idx++;
2290 }
2291 }
2292 if (from_idx < from_ht->nNumUsed) {
2293 f = zval_get_long(tmp_from);
2294
2295 if (f < 0) {
2296 f = (zend_long)ZSTR_LEN(orig_str) + f;
2297 if (f < 0) {
2298 f = 0;
2299 }
2300 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2301 f = ZSTR_LEN(orig_str);
2302 }
2303 from_idx++;
2304 } else {
2305 f = 0;
2306 }
2307 } else {
2308 f = from_long;
2309 if (f < 0) {
2310 f = (zend_long)ZSTR_LEN(orig_str) + f;
2311 if (f < 0) {
2312 f = 0;
2313 }
2314 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2315 f = ZSTR_LEN(orig_str);
2316 }
2317 }
2318
2319 if (len_ht) {
2320 if (HT_IS_PACKED(len_ht)) {
2321 while (len_idx < len_ht->nNumUsed) {
2322 tmp_len = &len_ht->arPacked[len_idx];
2323 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2324 break;
2325 }
2326 len_idx++;
2327 }
2328 } else {
2329 while (len_idx < len_ht->nNumUsed) {
2330 tmp_len = &len_ht->arData[len_idx].val;
2331 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2332 break;
2333 }
2334 len_idx++;
2335 }
2336 }
2337 if (len_idx < len_ht->nNumUsed) {
2338 l = zval_get_long(tmp_len);
2339 len_idx++;
2340 } else {
2341 l = ZSTR_LEN(orig_str);
2342 }
2343 } else if (!len_is_null) {
2344 l = len_long;
2345 } else {
2346 l = ZSTR_LEN(orig_str);
2347 }
2348
2349 if (l < 0) {
2350 l = (ZSTR_LEN(orig_str) - f) + l;
2351 if (l < 0) {
2352 l = 0;
2353 }
2354 }
2355
2356 ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
2357 ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
2358 if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
2359 l = ZSTR_LEN(orig_str) - f;
2360 }
2361
2362 result_len = ZSTR_LEN(orig_str) - l;
2363
2364 if (repl_ht) {
2365 if (HT_IS_PACKED(repl_ht)) {
2366 while (repl_idx < repl_ht->nNumUsed) {
2367 tmp_repl = &repl_ht->arPacked[repl_idx];
2368 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2369 break;
2370 }
2371 repl_idx++;
2372 }
2373 } else {
2374 while (repl_idx < repl_ht->nNumUsed) {
2375 tmp_repl = &repl_ht->arData[repl_idx].val;
2376 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2377 break;
2378 }
2379 repl_idx++;
2380 }
2381 }
2382 if (repl_idx < repl_ht->nNumUsed) {
2383 zend_string *tmp_repl_str;
2384 zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2385
2386 result_len += ZSTR_LEN(repl_str);
2387 repl_idx++;
2388 result = zend_string_safe_alloc(1, result_len, 0, 0);
2389
2390 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2391 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2392 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2393 zend_tmp_string_release(tmp_repl_str);
2394 } else {
2395 result = zend_string_safe_alloc(1, result_len, 0, 0);
2396
2397 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2398 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2399 }
2400 } else {
2401 result_len += ZSTR_LEN(repl_str);
2402
2403 result = zend_string_safe_alloc(1, result_len, 0, 0);
2404
2405 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2406 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2407 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2408 }
2409
2410 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2411
2412 if (str_index) {
2413 zval tmp;
2414
2415 ZVAL_NEW_STR(&tmp, result);
2416 zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
2417 } else {
2418 add_index_str(return_value, num_index, result);
2419 }
2420
2421 zend_tmp_string_release(tmp_orig_str);
2422 } ZEND_HASH_FOREACH_END();
2423 } /* if */
2424 }
2425 /* }}} */
2426
2427 /* {{{ Quotes meta characters */
2428 PHP_FUNCTION(quotemeta)
2429 {
2430 zend_string *old;
2431 const char *old_end, *p;
2432 char *q;
2433 char c;
2434 zend_string *str;
2435
2436 ZEND_PARSE_PARAMETERS_START(1, 1)
2437 Z_PARAM_STR(old)
2438 ZEND_PARSE_PARAMETERS_END();
2439
2440 old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
2441
2442 if (ZSTR_LEN(old) == 0) {
2443 RETURN_EMPTY_STRING();
2444 }
2445
2446 str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
2447
2448 for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
2449 c = *p;
2450 switch (c) {
2451 case '.':
2452 case '\\':
2453 case '+':
2454 case '*':
2455 case '?':
2456 case '[':
2457 case '^':
2458 case ']':
2459 case '$':
2460 case '(':
2461 case ')':
2462 *q++ = '\\';
2463 ZEND_FALLTHROUGH;
2464 default:
2465 *q++ = c;
2466 }
2467 }
2468
2469 *q = '\0';
2470
2471 RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
2472 }
2473 /* }}} */
2474
2475 /* {{{ Returns ASCII value of character
2476 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
2477 PHP_FUNCTION(ord)
2478 {
2479 zend_string *str;
2480
2481 ZEND_PARSE_PARAMETERS_START(1, 1)
2482 Z_PARAM_STR(str)
2483 ZEND_PARSE_PARAMETERS_END();
2484
2485 RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
2486 }
2487 /* }}} */
2488
2489 /* {{{ Converts ASCII code to a character
2490 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
2491 PHP_FUNCTION(chr)
2492 {
2493 zend_long c;
2494
2495 ZEND_PARSE_PARAMETERS_START(1, 1)
2496 Z_PARAM_LONG(c)
2497 ZEND_PARSE_PARAMETERS_END();
2498
2499 c &= 0xff;
2500 RETURN_CHAR(c);
2501 }
2502 /* }}} */
2503
2504 /* {{{ php_ucfirst
2505 Uppercase the first character of the word in a native string */
2506 static zend_string* php_ucfirst(zend_string *str)
2507 {
2508 const unsigned char ch = ZSTR_VAL(str)[0];
2509 unsigned char r = zend_toupper_ascii(ch);
2510 if (r == ch) {
2511 return zend_string_copy(str);
2512 } else {
2513 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2514 ZSTR_VAL(s)[0] = r;
2515 return s;
2516 }
2517 }
2518 /* }}} */
2519
2520 /* {{{ Makes a string's first character uppercase */
2521 PHP_FUNCTION(ucfirst)
2522 {
2523 zend_string *str;
2524
2525 ZEND_PARSE_PARAMETERS_START(1, 1)
2526 Z_PARAM_STR(str)
2527 ZEND_PARSE_PARAMETERS_END();
2528
2529 if (!ZSTR_LEN(str)) {
2530 RETURN_EMPTY_STRING();
2531 }
2532
2533 RETURN_STR(php_ucfirst(str));
2534 }
2535 /* }}} */
2536
2537 /* {{{
2538 Lowercase the first character of the word in a native string */
2539 static zend_string* php_lcfirst(zend_string *str)
2540 {
2541 unsigned char r = zend_tolower_ascii(ZSTR_VAL(str)[0]);
2542 if (r == ZSTR_VAL(str)[0]) {
2543 return zend_string_copy(str);
2544 } else {
2545 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2546 ZSTR_VAL(s)[0] = r;
2547 return s;
2548 }
2549 }
2550 /* }}} */
2551
2552 /* {{{ Make a string's first character lowercase */
2553 PHP_FUNCTION(lcfirst)
2554 {
2555 zend_string *str;
2556
2557 ZEND_PARSE_PARAMETERS_START(1, 1)
2558 Z_PARAM_STR(str)
2559 ZEND_PARSE_PARAMETERS_END();
2560
2561 if (!ZSTR_LEN(str)) {
2562 RETURN_EMPTY_STRING();
2563 }
2564
2565 RETURN_STR(php_lcfirst(str));
2566 }
2567 /* }}} */
2568
2569 /* {{{ Uppercase the first character of every word in a string */
2570 PHP_FUNCTION(ucwords)
2571 {
2572 zend_string *str;
2573 char *delims = " \t\r\n\f\v";
2574 char *r;
2575 const char *r_end;
2576 size_t delims_len = 6;
2577 char mask[256];
2578
2579 ZEND_PARSE_PARAMETERS_START(1, 2)
2580 Z_PARAM_STR(str)
2581 Z_PARAM_OPTIONAL
2582 Z_PARAM_STRING(delims, delims_len)
2583 ZEND_PARSE_PARAMETERS_END();
2584
2585 if (!ZSTR_LEN(str)) {
2586 RETURN_EMPTY_STRING();
2587 }
2588
2589 php_charmask((const unsigned char *) delims, delims_len, mask);
2590
2591 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
2592 r = Z_STRVAL_P(return_value);
2593
2594 *r = zend_toupper_ascii((unsigned char) *r);
2595 for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
2596 if (mask[(unsigned char)*r++]) {
2597 *r = zend_toupper_ascii((unsigned char) *r);
2598 }
2599 }
2600 }
2601 /* }}} */
2602
2603 /* {{{ php_strtr */
2604 PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
2605 {
2606 size_t i;
2607
2608 if (UNEXPECTED(trlen < 1)) {
2609 return str;
2610 } else if (trlen == 1) {
2611 char ch_from = *str_from;
2612 char ch_to = *str_to;
2613
2614 for (i = 0; i < len; i++) {
2615 if (str[i] == ch_from) {
2616 str[i] = ch_to;
2617 }
2618 }
2619 } else {
2620 unsigned char xlat[256];
2621
2622 memset(xlat, 0, sizeof(xlat));
2623
2624 for (i = 0; i < trlen; i++) {
2625 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2626 }
2627
2628 for (i = 0; i < len; i++) {
2629 str[i] += xlat[(size_t)(unsigned char) str[i]];
2630 }
2631 }
2632
2633 return str;
2634 }
2635 /* }}} */
2636
2637 /* {{{ php_strtr_ex */
2638 static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
2639 {
2640 zend_string *new_str = NULL;
2641 size_t i;
2642
2643 if (UNEXPECTED(trlen < 1)) {
2644 return zend_string_copy(str);
2645 } else if (trlen == 1) {
2646 char ch_from = *str_from;
2647 char ch_to = *str_to;
2648 char *output;
2649 char *input = ZSTR_VAL(str);
2650 size_t len = ZSTR_LEN(str);
2651
2652 #ifdef __SSE2__
2653 if (ZSTR_LEN(str) >= sizeof(__m128i)) {
2654 __m128i search = _mm_set1_epi8(ch_from);
2655 __m128i delta = _mm_set1_epi8(ch_to - ch_from);
2656
2657 do {
2658 __m128i src = _mm_loadu_si128((__m128i*)(input));
2659 __m128i mask = _mm_cmpeq_epi8(src, search);
2660 if (_mm_movemask_epi8(mask)) {
2661 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2662 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2663 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2664 _mm_storeu_si128((__m128i *)(output),
2665 _mm_add_epi8(src,
2666 _mm_and_si128(mask, delta)));
2667 input += sizeof(__m128i);
2668 output += sizeof(__m128i);
2669 len -= sizeof(__m128i);
2670 for (; len >= sizeof(__m128i); input += sizeof(__m128i), output += sizeof(__m128i), len -= sizeof(__m128i)) {
2671 src = _mm_loadu_si128((__m128i*)(input));
2672 mask = _mm_cmpeq_epi8(src, search);
2673 _mm_storeu_si128((__m128i *)(output),
2674 _mm_add_epi8(src,
2675 _mm_and_si128(mask, delta)));
2676 }
2677 for (; len > 0; input++, output++, len--) {
2678 *output = (*input == ch_from) ? ch_to : *input;
2679 }
2680 *output = 0;
2681 return new_str;
2682 }
2683 input += sizeof(__m128i);
2684 len -= sizeof(__m128i);
2685 } while (len >= sizeof(__m128i));
2686 }
2687 #endif
2688 for (; len > 0; input++, len--) {
2689 if (*input == ch_from) {
2690 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2691 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
2692 output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
2693 *output = ch_to;
2694 input++;
2695 output++;
2696 len--;
2697 for (; len > 0; input++, output++, len--) {
2698 *output = (*input == ch_from) ? ch_to : *input;
2699 }
2700 *output = 0;
2701 return new_str;
2702 }
2703 }
2704 } else {
2705 unsigned char xlat[256];
2706
2707 memset(xlat, 0, sizeof(xlat));;
2708
2709 for (i = 0; i < trlen; i++) {
2710 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2711 }
2712
2713 for (i = 0; i < ZSTR_LEN(str); i++) {
2714 if (xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
2715 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2716 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2717 do {
2718 ZSTR_VAL(new_str)[i] = ZSTR_VAL(str)[i] + xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
2719 i++;
2720 } while (i < ZSTR_LEN(str));
2721 ZSTR_VAL(new_str)[i] = 0;
2722 return new_str;
2723 }
2724 }
2725 }
2726
2727 return zend_string_copy(str);
2728 }
2729 /* }}} */
2730
2731 /* {{{ php_strtr_array */
2732 static void php_strtr_array(zval *return_value, zend_string *input, HashTable *pats)
2733 {
2734 const char *str = ZSTR_VAL(input);
2735 size_t slen = ZSTR_LEN(input);
2736 zend_ulong num_key;
2737 zend_string *str_key;
2738 size_t len, pos, old_pos;
2739 bool has_num_keys = false;
2740 size_t minlen = 128*1024;
2741 size_t maxlen = 0;
2742 HashTable str_hash;
2743 zval *entry;
2744 const char *key;
2745 smart_str result = {0};
2746 zend_ulong bitset[256/sizeof(zend_ulong)];
2747 zend_ulong *num_bitset;
2748
2749 /* we will collect all possible key lengths */
2750 num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
2751 memset(bitset, 0, sizeof(bitset));
2752
2753 /* check if original array has numeric keys */
2754 ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
2755 if (UNEXPECTED(!str_key)) {
2756 has_num_keys = true;
2757 } else {
2758 len = ZSTR_LEN(str_key);
2759 if (UNEXPECTED(len == 0)) {
2760 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
2761 continue;
2762 } else if (UNEXPECTED(len > slen)) {
2763 /* skip long patterns */
2764 continue;
2765 }
2766 if (len > maxlen) {
2767 maxlen = len;
2768 }
2769 if (len < minlen) {
2770 minlen = len;
2771 }
2772 /* remember possible key length */
2773 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2774 bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
2775 }
2776 } ZEND_HASH_FOREACH_END();
2777
2778 if (UNEXPECTED(has_num_keys)) {
2779 zend_string *key_used;
2780 /* we have to rebuild HashTable with numeric keys */
2781 zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
2782 ZEND_HASH_FOREACH_KEY_VAL(pats, num_key, str_key, entry) {
2783 if (UNEXPECTED(!str_key)) {
2784 key_used = zend_long_to_str(num_key);
2785 len = ZSTR_LEN(key_used);
2786 if (UNEXPECTED(len > slen)) {
2787 /* skip long patterns */
2788 zend_string_release(key_used);
2789 continue;
2790 }
2791 if (len > maxlen) {
2792 maxlen = len;
2793 }
2794 if (len < minlen) {
2795 minlen = len;
2796 }
2797 /* remember possible key length */
2798 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2799 bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
2800 } else {
2801 key_used = str_key;
2802 len = ZSTR_LEN(key_used);
2803 if (UNEXPECTED(len > slen)) {
2804 /* skip long patterns */
2805 continue;
2806 }
2807 }
2808 zend_hash_add(&str_hash, key_used, entry);
2809 if (UNEXPECTED(!str_key)) {
2810 zend_string_release_ex(key_used, 0);
2811 }
2812 } ZEND_HASH_FOREACH_END();
2813 pats = &str_hash;
2814 }
2815
2816 if (UNEXPECTED(minlen > maxlen)) {
2817 /* return the original string */
2818 if (pats == &str_hash) {
2819 zend_hash_destroy(&str_hash);
2820 }
2821 efree(num_bitset);
2822 RETURN_STR_COPY(input);
2823 }
2824
2825 old_pos = pos = 0;
2826 while (pos <= slen - minlen) {
2827 key = str + pos;
2828 if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
2829 len = maxlen;
2830 if (len > slen - pos) {
2831 len = slen - pos;
2832 }
2833 while (len >= minlen) {
2834 if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
2835 entry = zend_hash_str_find(pats, key, len);
2836 if (entry != NULL) {
2837 zend_string *tmp;
2838 zend_string *s = zval_get_tmp_string(entry, &tmp);
2839 smart_str_appendl(&result, str + old_pos, pos - old_pos);
2840 smart_str_append(&result, s);
2841 old_pos = pos + len;
2842 pos = old_pos - 1;
2843 zend_tmp_string_release(tmp);
2844 break;
2845 }
2846 }
2847 len--;
2848 }
2849 }
2850 pos++;
2851 }
2852
2853 if (result.s) {
2854 smart_str_appendl(&result, str + old_pos, slen - old_pos);
2855 RETVAL_STR(smart_str_extract(&result));
2856 } else {
2857 smart_str_free(&result);
2858 RETVAL_STR_COPY(input);
2859 }
2860
2861 if (pats == &str_hash) {
2862 zend_hash_destroy(&str_hash);
2863 }
2864 efree(num_bitset);
2865 }
2866 /* }}} */
2867
2868 /* {{{ count_chars */
2869 static zend_always_inline zend_long count_chars(const char *p, zend_long length, char ch)
2870 {
2871 zend_long count = 0;
2872 const char *endp;
2873
2874 #ifdef __SSE2__
2875 if (length >= sizeof(__m128i)) {
2876 __m128i search = _mm_set1_epi8(ch);
2877
2878 do {
2879 __m128i src = _mm_loadu_si128((__m128i*)(p));
2880 uint32_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(src, search));
2881 // TODO: It would be great to use POPCNT, but it's available only with SSE4.1
2882 #if 1
2883 while (mask != 0) {
2884 count++;
2885 mask = mask & (mask - 1);
2886 }
2887 #else
2888 if (mask) {
2889 mask = mask - ((mask >> 1) & 0x5555);
2890 mask = (mask & 0x3333) + ((mask >> 2) & 0x3333);
2891 mask = (mask + (mask >> 4)) & 0x0F0F;
2892 mask = (mask + (mask >> 8)) & 0x00ff;
2893 count += mask;
2894 }
2895 #endif
2896 p += sizeof(__m128i);
2897 length -= sizeof(__m128i);
2898 } while (length >= sizeof(__m128i));
2899 }
2900 endp = p + length;
2901 while (p != endp) {
2902 count += (*p == ch);
2903 p++;
2904 }
2905 #else
2906 endp = p + length;
2907 while ((p = memchr(p, ch, endp-p))) {
2908 count++;
2909 p++;
2910 }
2911 #endif
2912 return count;
2913 }
2914 /* }}} */
2915
2916 /* {{{ php_char_to_str_ex */
2917 static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, bool case_sensitivity, zend_long *replace_count)
2918 {
2919 zend_string *result;
2920 size_t char_count;
2921 int lc_from = 0;
2922 const char *source, *source_end;
2923 char *target;
2924
2925 if (case_sensitivity) {
2926 char_count = count_chars(ZSTR_VAL(str), ZSTR_LEN(str), from);
2927 } else {
2928 char_count = 0;
2929 lc_from = zend_tolower_ascii(from);
2930 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
2931 for (source = ZSTR_VAL(str); source < source_end; source++) {
2932 if (zend_tolower_ascii(*source) == lc_from) {
2933 char_count++;
2934 }
2935 }
2936 }
2937
2938 if (char_count == 0) {
2939 return zend_string_copy(str);
2940 }
2941
2942 if (replace_count) {
2943 *replace_count += char_count;
2944 }
2945
2946 if (to_len > 0) {
2947 result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
2948 } else {
2949 result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
2950 }
2951 target = ZSTR_VAL(result);
2952
2953 if (case_sensitivity) {
2954 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
2955
2956 while ((p = memchr(p, from, (e - p)))) {
2957 memcpy(target, s, (p - s));
2958 target += p - s;
2959 memcpy(target, to, to_len);
2960 target += to_len;
2961 p++;
2962 s = p;
2963 if (--char_count == 0) break;
2964 }
2965 if (s < e) {
2966 memcpy(target, s, (e - s));
2967 target += e - s;
2968 }
2969 } else {
2970 source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
2971 for (source = ZSTR_VAL(str); source < source_end; source++) {
2972 if (zend_tolower_ascii(*source) == lc_from) {
2973 memcpy(target, to, to_len);
2974 target += to_len;
2975 } else {
2976 *target = *source;
2977 target++;
2978 }
2979 }
2980 }
2981 *target = 0;
2982 return result;
2983 }
2984 /* }}} */
2985
2986 /* {{{ php_str_to_str_ex */
2987 static zend_string *php_str_to_str_ex(zend_string *haystack,
2988 const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
2989 {
2990
2991 if (needle_len < ZSTR_LEN(haystack)) {
2992 zend_string *new_str;
2993 const char *end;
2994 const char *p, *r;
2995 char *e;
2996
2997 if (needle_len == str_len) {
2998 new_str = NULL;
2999 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3000 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3001 if (!new_str) {
3002 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3003 }
3004 memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
3005 (*replace_count)++;
3006 }
3007 if (!new_str) {
3008 goto nothing_todo;
3009 }
3010 return new_str;
3011 } else {
3012 size_t count = 0;
3013 const char *o = ZSTR_VAL(haystack);
3014 const char *n = needle;
3015 const char *endp = o + ZSTR_LEN(haystack);
3016
3017 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3018 o += needle_len;
3019 count++;
3020 }
3021 if (count == 0) {
3022 /* Needle doesn't occur, shortcircuit the actual replacement. */
3023 goto nothing_todo;
3024 }
3025 if (str_len > needle_len) {
3026 new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
3027 } else {
3028 new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
3029 }
3030
3031 e = ZSTR_VAL(new_str);
3032 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3033 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3034 memcpy(e, p, r - p);
3035 e += r - p;
3036 memcpy(e, str, str_len);
3037 e += str_len;
3038 (*replace_count)++;
3039 }
3040
3041 if (p < end) {
3042 memcpy(e, p, end - p);
3043 e += end - p;
3044 }
3045
3046 *e = '\0';
3047 return new_str;
3048 }
3049 } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
3050 nothing_todo:
3051 return zend_string_copy(haystack);
3052 } else {
3053 (*replace_count)++;
3054 return zend_string_init_fast(str, str_len);
3055 }
3056 }
3057 /* }}} */
3058
3059 /* {{{ php_str_to_str_i_ex */
3060 static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
3061 zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
3062 {
3063 zend_string *new_str = NULL;
3064 zend_string *lc_needle;
3065
3066 if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
3067 const char *end;
3068 const char *p, *r;
3069 char *e;
3070
3071 if (ZSTR_LEN(needle) == str_len) {
3072 lc_needle = zend_string_tolower(needle);
3073 end = lc_haystack + ZSTR_LEN(haystack);
3074 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3075 if (!new_str) {
3076 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3077 }
3078 memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
3079 (*replace_count)++;
3080 }
3081 zend_string_release_ex(lc_needle, 0);
3082
3083 if (!new_str) {
3084 goto nothing_todo;
3085 }
3086 return new_str;
3087 } else {
3088 size_t count = 0;
3089 const char *o = lc_haystack;
3090 const char *n;
3091 const char *endp = o + ZSTR_LEN(haystack);
3092
3093 lc_needle = zend_string_tolower(needle);
3094 n = ZSTR_VAL(lc_needle);
3095
3096 while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
3097 o += ZSTR_LEN(lc_needle);
3098 count++;
3099 }
3100 if (count == 0) {
3101 /* Needle doesn't occur, shortcircuit the actual replacement. */
3102 zend_string_release_ex(lc_needle, 0);
3103 goto nothing_todo;
3104 }
3105
3106 if (str_len > ZSTR_LEN(lc_needle)) {
3107 new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
3108 } else {
3109 new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
3110 }
3111
3112 e = ZSTR_VAL(new_str);
3113 end = lc_haystack + ZSTR_LEN(haystack);
3114
3115 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3116 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
3117 e += r - p;
3118 memcpy(e, str, str_len);
3119 e += str_len;
3120 (*replace_count)++;
3121 }
3122
3123 if (p < end) {
3124 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
3125 e += end - p;
3126 }
3127 *e = '\0';
3128
3129 zend_string_release_ex(lc_needle, 0);
3130
3131 return new_str;
3132 }
3133 } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
3134 nothing_todo:
3135 return zend_string_copy(haystack);
3136 } else {
3137 lc_needle = zend_string_tolower(needle);
3138
3139 if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
3140 zend_string_release_ex(lc_needle, 0);
3141 goto nothing_todo;
3142 }
3143 zend_string_release_ex(lc_needle, 0);
3144
3145 new_str = zend_string_init(str, str_len, 0);
3146
3147 (*replace_count)++;
3148 return new_str;
3149 }
3150 }
3151 /* }}} */
3152
3153 /* {{{ php_str_to_str */
3154 PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
3155 {
3156 zend_string *new_str;
3157
3158 if (needle_len < length) {
3159 const char *end;
3160 const char *s, *p;
3161 char *e, *r;
3162
3163 if (needle_len == str_len) {
3164 new_str = zend_string_init(haystack, length, 0);
3165 end = ZSTR_VAL(new_str) + length;
3166 for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3167 memcpy(r, str, str_len);
3168 }
3169 return new_str;
3170 } else {
3171 if (str_len < needle_len) {
3172 new_str = zend_string_alloc(length, 0);
3173 } else {
3174 size_t count = 0;
3175 const char *o = haystack;
3176 const char *n = needle;
3177 const char *endp = o + length;
3178
3179 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3180 o += needle_len;
3181 count++;
3182 }
3183 if (count == 0) {
3184 /* Needle doesn't occur, shortcircuit the actual replacement. */
3185 new_str = zend_string_init(haystack, length, 0);
3186 return new_str;
3187 } else {
3188 if (str_len > needle_len) {
3189 new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
3190 } else {
3191 new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
3192 }
3193 }
3194 }
3195
3196 s = e = ZSTR_VAL(new_str);
3197 end = haystack + length;
3198 for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3199 memcpy(e, p, r - p);
3200 e += r - p;
3201 memcpy(e, str, str_len);
3202 e += str_len;
3203 }
3204
3205 if (p < end) {
3206 memcpy(e, p, end - p);
3207 e += end - p;
3208 }
3209
3210 *e = '\0';
3211 new_str = zend_string_truncate(new_str, e - s, 0);
3212 return new_str;
3213 }
3214 } else if (needle_len > length || memcmp(haystack, needle, length)) {
3215 new_str = zend_string_init(haystack, length, 0);
3216 return new_str;
3217 } else {
3218 new_str = zend_string_init(str, str_len, 0);
3219
3220 return new_str;
3221 }
3222 }
3223 /* }}} */
3224
3225 /* {{{ Translates characters in str using given translation tables */
3226 PHP_FUNCTION(strtr)
3227 {
3228 zend_string *str, *from_str = NULL;
3229 HashTable *from_ht = NULL;
3230 char *to = NULL;
3231 size_t to_len = 0;
3232
3233 ZEND_PARSE_PARAMETERS_START(2, 3)
3234 Z_PARAM_STR(str)
3235 Z_PARAM_ARRAY_HT_OR_STR(from_ht, from_str)
3236 Z_PARAM_OPTIONAL
3237 Z_PARAM_STRING_OR_NULL(to, to_len)
3238 ZEND_PARSE_PARAMETERS_END();
3239
3240 if (!to && from_ht == NULL) {
3241 zend_argument_type_error(2, "must be of type array, string given");
3242 RETURN_THROWS();
3243 } else if (to && from_str == NULL) {
3244 zend_argument_type_error(2, "must be of type string, array given");
3245 RETURN_THROWS();
3246 }
3247
3248 /* shortcut for empty string */
3249 if (ZSTR_LEN(str) == 0) {
3250 RETURN_EMPTY_STRING();
3251 }
3252
3253 if (!to) {
3254 if (zend_hash_num_elements(from_ht) < 1) {
3255 RETURN_STR_COPY(str);
3256 } else if (zend_hash_num_elements(from_ht) == 1) {
3257 zend_long num_key;
3258 zend_string *str_key, *tmp_str, *replace, *tmp_replace;
3259 zval *entry;
3260
3261 ZEND_HASH_FOREACH_KEY_VAL(from_ht, num_key, str_key, entry) {
3262 tmp_str = NULL;
3263 if (UNEXPECTED(!str_key)) {
3264 str_key = tmp_str = zend_long_to_str(num_key);
3265 }
3266 replace = zval_get_tmp_string(entry, &tmp_replace);
3267 if (ZSTR_LEN(str_key) < 1) {
3268 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
3269 RETVAL_STR_COPY(str);
3270 } else if (ZSTR_LEN(str_key) == 1) {
3271 RETVAL_STR(php_char_to_str_ex(str,
3272 ZSTR_VAL(str_key)[0],
3273 ZSTR_VAL(replace),
3274 ZSTR_LEN(replace),
3275 /* case_sensitive */ true,
3276 NULL));
3277 } else {
3278 zend_long dummy = 0;
3279 RETVAL_STR(php_str_to_str_ex(str,
3280 ZSTR_VAL(str_key), ZSTR_LEN(str_key),
3281 ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
3282 }
3283 zend_tmp_string_release(tmp_str);
3284 zend_tmp_string_release(tmp_replace);
3285 return;
3286 } ZEND_HASH_FOREACH_END();
3287 } else {
3288 php_strtr_array(return_value, str, from_ht);
3289 }
3290 } else {
3291 RETURN_STR(php_strtr_ex(str,
3292 ZSTR_VAL(from_str),
3293 to,
3294 MIN(ZSTR_LEN(from_str), to_len)));
3295 }
3296 }
3297 /* }}} */
3298
3299 /* {{{ Reverse a string */
3300 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3301 #include <tmmintrin.h>
3302 #elif defined(__aarch64__) || defined(_M_ARM64)
3303 #include <arm_neon.h>
3304 #endif
3305 PHP_FUNCTION(strrev)
3306 {
3307 zend_string *str;
3308 const char *s, *e;
3309 char *p;
3310 zend_string *n;
3311
3312 ZEND_PARSE_PARAMETERS_START(1, 1)
3313 Z_PARAM_STR(str)
3314 ZEND_PARSE_PARAMETERS_END();
3315
3316 n = zend_string_alloc(ZSTR_LEN(str), 0);
3317 p = ZSTR_VAL(n);
3318
3319 s = ZSTR_VAL(str);
3320 e = s + ZSTR_LEN(str);
3321 --e;
3322 #ifdef ZEND_INTRIN_SSSE3_NATIVE
3323 if (e - s > 15) {
3324 const __m128i map = _mm_set_epi8(
3325 0, 1, 2, 3,
3326 4, 5, 6, 7,
3327 8, 9, 10, 11,
3328 12, 13, 14, 15);
3329 do {
3330 const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
3331 _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
3332 p += 16;
3333 e -= 16;
3334 } while (e - s > 15);
3335 }
3336 #elif defined(__aarch64__)
3337 if (e - s > 15) {
3338 do {
3339 const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
3340 /* Synthesize rev128 with a rev64 + ext. */
3341 const uint8x16_t rev = vrev64q_u8(str);
3342 const uint8x16_t ext = (uint8x16_t)
3343 vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
3344 vst1q_u8((uint8_t *)p, ext);
3345 p += 16;
3346 e -= 16;
3347 } while (e - s > 15);
3348 }
3349 #elif defined(_M_ARM64)
3350 if (e - s > 15) {
3351 do {
3352 const __n128 str = vld1q_u8((uint8_t *)(e - 15));
3353 /* Synthesize rev128 with a rev64 + ext. */
3354 /* strange force cast limit on windows: you cannot convert anything */
3355 const __n128 rev = vrev64q_u8(str);
3356 const __n128 ext = vextq_u64(rev, rev, 1);
3357 vst1q_u8((uint8_t *)p, ext);
3358 p += 16;
3359 e -= 16;
3360 } while (e - s > 15);
3361 }
3362 #endif
3363 while (e >= s) {
3364 *p++ = *e--;
3365 }
3366
3367 *p = '\0';
3368
3369 RETVAL_NEW_STR(n);
3370 }
3371 /* }}} */
3372
3373 /* {{{ php_similar_str */
3374 static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
3375 {
3376 const char *p, *q;
3377 const char *end1 = (char *) txt1 + len1;
3378 const char *end2 = (char *) txt2 + len2;
3379 size_t l;
3380
3381 *max = 0;
3382 *count = 0;
3383 for (p = (char *) txt1; p < end1; p++) {
3384 for (q = (char *) txt2; q < end2; q++) {
3385 for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
3386 if (l > *max) {
3387 *max = l;
3388 *count += 1;
3389 *pos1 = p - txt1;
3390 *pos2 = q - txt2;
3391 }
3392 }
3393 }
3394 }
3395 /* }}} */
3396
3397 /* {{{ php_similar_char */
3398 static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
3399 {
3400 size_t sum;
3401 size_t pos1 = 0, pos2 = 0, max, count;
3402
3403 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
3404 if ((sum = max)) {
3405 if (pos1 && pos2 && count > 1) {
3406 sum += php_similar_char(txt1, pos1,
3407 txt2, pos2);
3408 }
3409 if ((pos1 + max < len1) && (pos2 + max < len2)) {
3410 sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
3411 txt2 + pos2 + max, len2 - pos2 - max);
3412 }
3413 }
3414
3415 return sum;
3416 }
3417 /* }}} */
3418
3419 /* {{{ Calculates the similarity between two strings */
3420 PHP_FUNCTION(similar_text)
3421 {
3422 zend_string *t1, *t2;
3423 zval *percent = NULL;
3424 bool compute_percentage = ZEND_NUM_ARGS() >= 3;
3425 size_t sim;
3426
3427 ZEND_PARSE_PARAMETERS_START(2, 3)
3428 Z_PARAM_STR(t1)
3429 Z_PARAM_STR(t2)
3430 Z_PARAM_OPTIONAL
3431 Z_PARAM_ZVAL(percent)
3432 ZEND_PARSE_PARAMETERS_END();
3433
3434 if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
3435 if (compute_percentage) {
3436 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
3437 }
3438
3439 RETURN_LONG(0);
3440 }
3441
3442 sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
3443
3444 if (compute_percentage) {
3445 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
3446 }
3447
3448 RETURN_LONG(sim);
3449 }
3450 /* }}} */
3451
3452 /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
3453 PHP_FUNCTION(addcslashes)
3454 {
3455 zend_string *str, *what;
3456
3457 ZEND_PARSE_PARAMETERS_START(2, 2)
3458 Z_PARAM_STR(str)
3459 Z_PARAM_STR(what)
3460 ZEND_PARSE_PARAMETERS_END();
3461
3462 if (ZSTR_LEN(str) == 0) {
3463 RETURN_EMPTY_STRING();
3464 }
3465
3466 if (ZSTR_LEN(what) == 0) {
3467 RETURN_STR_COPY(str);
3468 }
3469
3470 RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
3471 }
3472 /* }}} */
3473
3474 /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
3475 PHP_FUNCTION(addslashes)
3476 {
3477 zend_string *str;
3478
3479 ZEND_PARSE_PARAMETERS_START(1, 1)
3480 Z_PARAM_STR(str)
3481 ZEND_PARSE_PARAMETERS_END();
3482
3483 if (ZSTR_LEN(str) == 0) {
3484 RETURN_EMPTY_STRING();
3485 }
3486
3487 RETURN_STR(php_addslashes(str));
3488 }
3489 /* }}} */
3490
3491 /* {{{ Strips backslashes from a string. Uses C-style conventions */
3492 PHP_FUNCTION(stripcslashes)
3493 {
3494 zend_string *str;
3495
3496 ZEND_PARSE_PARAMETERS_START(1, 1)
3497 Z_PARAM_STR(str)
3498 ZEND_PARSE_PARAMETERS_END();
3499
3500 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3501 php_stripcslashes(Z_STR_P(return_value));
3502 }
3503 /* }}} */
3504
3505 /* {{{ Strips backslashes from a string */
3506 PHP_FUNCTION(stripslashes)
3507 {
3508 zend_string *str;
3509
3510 ZEND_PARSE_PARAMETERS_START(1, 1)
3511 Z_PARAM_STR(str)
3512 ZEND_PARSE_PARAMETERS_END();
3513
3514 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3515 php_stripslashes(Z_STR_P(return_value));
3516 }
3517 /* }}} */
3518
3519 /* {{{ php_stripcslashes */
3520 PHPAPI void php_stripcslashes(zend_string *str)
3521 {
3522 const char *source, *end;
3523 char *target;
3524 size_t nlen = ZSTR_LEN(str), i;
3525 char numtmp[4];
3526
3527 for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
3528 if (*source == '\\' && source + 1 < end) {
3529 source++;
3530 switch (*source) {
3531 case 'n': *target++='\n'; nlen--; break;
3532 case 'r': *target++='\r'; nlen--; break;
3533 case 'a': *target++='\a'; nlen--; break;
3534 case 't': *target++='\t'; nlen--; break;
3535 case 'v': *target++='\v'; nlen--; break;
3536 case 'b': *target++='\b'; nlen--; break;
3537 case 'f': *target++='\f'; nlen--; break;
3538 case '\\': *target++='\\'; nlen--; break;
3539 case 'x':
3540 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3541 numtmp[0] = *++source;
3542 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3543 numtmp[1] = *++source;
3544 numtmp[2] = '\0';
3545 nlen-=3;
3546 } else {
3547 numtmp[1] = '\0';
3548 nlen-=2;
3549 }
3550 *target++=(char)strtol(numtmp, NULL, 16);
3551 break;
3552 }
3553 ZEND_FALLTHROUGH;
3554 default:
3555 i=0;
3556 while (source < end && *source >= '0' && *source <= '7' && i<3) {
3557 numtmp[i++] = *source++;
3558 }
3559 if (i) {
3560 numtmp[i]='\0';
3561 *target++=(char)strtol(numtmp, NULL, 8);
3562 nlen-=i;
3563 source--;
3564 } else {
3565 *target++=*source;
3566 nlen--;
3567 }
3568 }
3569 } else {
3570 *target++=*source;
3571 }
3572 }
3573
3574 if (nlen != 0) {
3575 *target='\0';
3576 }
3577
3578 ZSTR_LEN(str) = nlen;
3579 }
3580 /* }}} */
3581
3582 /* {{{ php_addcslashes_str */
3583 PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
3584 {
3585 char flags[256];
3586 char *target;
3587 const char *source, *end;
3588 char c;
3589 size_t newlen;
3590 zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
3591
3592 php_charmask((const unsigned char *) what, wlength, flags);
3593
3594 for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
3595 c = *source;
3596 if (flags[(unsigned char)c]) {
3597 if ((unsigned char) c < 32 || (unsigned char) c > 126) {
3598 *target++ = '\\';
3599 switch (c) {
3600 case '\n': *target++ = 'n'; break;
3601 case '\t': *target++ = 't'; break;
3602 case '\r': *target++ = 'r'; break;
3603 case '\a': *target++ = 'a'; break;
3604 case '\v': *target++ = 'v'; break;
3605 case '\b': *target++ = 'b'; break;
3606 case '\f': *target++ = 'f'; break;
3607 default: target += sprintf(target, "%03o", (unsigned char) c);
3608 }
3609 continue;
3610 }
3611 *target++ = '\\';
3612 }
3613 *target++ = c;
3614 }
3615 *target = 0;
3616 newlen = target - ZSTR_VAL(new_str);
3617 if (newlen < len * 4) {
3618 new_str = zend_string_truncate(new_str, newlen, 0);
3619 }
3620 return new_str;
3621 }
3622 /* }}} */
3623
3624 /* {{{ php_addcslashes */
3625 PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
3626 {
3627 return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
3628 }
3629 /* }}} */
3630
3631 /* {{{ php_addslashes */
3632
3633 #ifdef ZEND_INTRIN_SSE4_2_NATIVE
3634 # include <nmmintrin.h>
3635 # include "Zend/zend_bitset.h"
3636 #elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3637 # include <nmmintrin.h>
3638 # include "Zend/zend_bitset.h"
3639 # include "Zend/zend_cpuinfo.h"
3640
3641 ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
3642 zend_string *php_addslashes_default(zend_string *str);
3643
3644 ZEND_INTRIN_SSE4_2_FUNC_DECL(void php_stripslashes_sse42(zend_string *str));
3645 void php_stripslashes_default(zend_string *str);
3646
3647 # ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO
3648 PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
3649 PHPAPI void php_stripslashes(zend_string *str) __attribute__((ifunc("resolve_stripslashes")));
3650
3651 typedef zend_string *(*php_addslashes_func_t)(zend_string *);
3652 typedef void (*php_stripslashes_func_t)(zend_string *);
3653
3654 ZEND_NO_SANITIZE_ADDRESS
3655 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3656 static php_addslashes_func_t resolve_addslashes(void) {
3657 if (zend_cpu_supports_sse42()) {
3658 return php_addslashes_sse42;
3659 }
3660 return php_addslashes_default;
3661 }
3662
3663 ZEND_NO_SANITIZE_ADDRESS
3664 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3665 static php_stripslashes_func_t resolve_stripslashes(void) {
3666 if (zend_cpu_supports_sse42()) {
3667 return php_stripslashes_sse42;
3668 }
3669 return php_stripslashes_default;
3670 }
3671 # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
3672
3673 static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
3674 static void (*php_stripslashes_ptr)(zend_string *str) = NULL;
3675
3676 PHPAPI zend_string *php_addslashes(zend_string *str) {
3677 return php_addslashes_ptr(str);
3678 }
3679 PHPAPI void php_stripslashes(zend_string *str) {
3680 php_stripslashes_ptr(str);
3681 }
3682
3683 /* {{{ PHP_MINIT_FUNCTION */
3684 PHP_MINIT_FUNCTION(string_intrin)
3685 {
3686 if (zend_cpu_supports_sse42()) {
3687 php_addslashes_ptr = php_addslashes_sse42;
3688 php_stripslashes_ptr = php_stripslashes_sse42;
3689 } else {
3690 php_addslashes_ptr = php_addslashes_default;
3691 php_stripslashes_ptr = php_stripslashes_default;
3692 }
3693 return SUCCESS;
3694 }
3695 /* }}} */
3696 # endif
3697 #endif
3698
3699 #if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3700 # ifdef ZEND_INTRIN_SSE4_2_NATIVE
3701 PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
3702 # elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
3703 zend_string *php_addslashes_sse42(zend_string *str)
3704 # endif
3705 {
3706 ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
3707 __m128i w128, s128;
3708 uint32_t res = 0;
3709 /* maximum string length, worst case situation */
3710 char *target;
3711 const char *source, *end;
3712 size_t offset;
3713 zend_string *new_str;
3714
3715 if (!str) {
3716 return ZSTR_EMPTY_ALLOC();
3717 }
3718
3719 source = ZSTR_VAL(str);
3720 end = source + ZSTR_LEN(str);
3721
3722 if (ZSTR_LEN(str) > 15) {
3723 w128 = _mm_load_si128((__m128i *)slashchars);
3724 do {
3725 s128 = _mm_loadu_si128((__m128i *)source);
3726 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3727 if (res) {
3728 goto do_escape;
3729 }
3730 source += 16;
3731 } while ((end - source) > 15);
3732 }
3733
3734 while (source < end) {
3735 switch (*source) {
3736 case '\0':
3737 case '\'':
3738 case '\"':
3739 case '\\':
3740 goto do_escape;
3741 default:
3742 source++;
3743 break;
3744 }
3745 }
3746
3747 return zend_string_copy(str);
3748
3749 do_escape:
3750 offset = source - (char *)ZSTR_VAL(str);
3751 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3752 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3753 target = ZSTR_VAL(new_str) + offset;
3754
3755 if (res) {
3756 int pos = 0;
3757 do {
3758 int i, n = zend_ulong_ntz(res);
3759 for (i = 0; i < n; i++) {
3760 *target++ = source[pos + i];
3761 }
3762 pos += n;
3763 *target++ = '\\';
3764 if (source[pos] == '\0') {
3765 *target++ = '0';
3766 } else {
3767 *target++ = source[pos];
3768 }
3769 pos++;
3770 res = res >> (n + 1);
3771 } while (res);
3772
3773 for (; pos < 16; pos++) {
3774 *target++ = source[pos];
3775 }
3776 source += 16;
3777 } else if (end - source > 15) {
3778 w128 = _mm_load_si128((__m128i *)slashchars);
3779 }
3780
3781 for (; end - source > 15; source += 16) {
3782 int pos = 0;
3783 s128 = _mm_loadu_si128((__m128i *)source);
3784 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3785 if (res) {
3786 do {
3787 int i, n = zend_ulong_ntz(res);
3788 for (i = 0; i < n; i++) {
3789 *target++ = source[pos + i];
3790 }
3791 pos += n;
3792 *target++ = '\\';
3793 if (source[pos] == '\0') {
3794 *target++ = '0';
3795 } else {
3796 *target++ = source[pos];
3797 }
3798 pos++;
3799 res = res >> (n + 1);
3800 } while (res);
3801 for (; pos < 16; pos++) {
3802 *target++ = source[pos];
3803 }
3804 } else {
3805 _mm_storeu_si128((__m128i*)target, s128);
3806 target += 16;
3807 }
3808 }
3809
3810 while (source < end) {
3811 switch (*source) {
3812 case '\0':
3813 *target++ = '\\';
3814 *target++ = '0';
3815 break;
3816 case '\'':
3817 case '\"':
3818 case '\\':
3819 *target++ = '\\';
3820 ZEND_FALLTHROUGH;
3821 default:
3822 *target++ = *source;
3823 break;
3824 }
3825 source++;
3826 }
3827
3828 *target = '\0';
3829
3830 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3831 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3832 } else {
3833 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3834 }
3835
3836 return new_str;
3837 }
3838 /* }}} */
3839 #endif
3840
3841 #if defined(__aarch64__) || defined(_M_ARM64)
3842 typedef union {
3843 uint8_t mem[16];
3844 uint64_t dw[2];
3845 } quad_word;
3846
3847 static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
3848 uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
3849 uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
3850 uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
3851 uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
3852 uint8x16_t s01 = vorrq_u8(s0, s1);
3853 uint8x16_t s23 = vorrq_u8(s2, s3);
3854 uint8x16_t s0123 = vorrq_u8(s01, s23);
3855 quad_word qw;
3856 vst1q_u8(qw.mem, s0123);
3857 return qw;
3858 }
3859
3860 static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
3861 {
3862 for (int i = 0; i < 16; i++) {
3863 char s = source[i];
3864 if (res.mem[i] == 0)
3865 *target++ = s;
3866 else {
3867 *target++ = '\\';
3868 if (s == '\0')
3869 *target++ = '0';
3870 else
3871 *target++ = s;
3872 }
3873 }
3874 return target;
3875 }
3876 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
3877
3878 #ifndef ZEND_INTRIN_SSE4_2_NATIVE
3879 # ifdef ZEND_INTRIN_SSE4_2_RESOLVER
3880 zend_string *php_addslashes_default(zend_string *str) /* {{{ */
3881 # else
3882 PHPAPI zend_string *php_addslashes(zend_string *str)
3883 # endif
3884 {
3885 /* maximum string length, worst case situation */
3886 char *target;
3887 const char *source, *end;
3888 size_t offset;
3889 zend_string *new_str;
3890
3891 if (!str) {
3892 return ZSTR_EMPTY_ALLOC();
3893 }
3894
3895 source = ZSTR_VAL(str);
3896 end = source + ZSTR_LEN(str);
3897
3898 # if defined(__aarch64__) || defined(_M_ARM64)
3899 quad_word res = {0};
3900 if (ZSTR_LEN(str) > 15) {
3901 do {
3902 res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
3903 if (res.dw[0] | res.dw[1])
3904 goto do_escape;
3905 source += 16;
3906 } while ((end - source) > 15);
3907 }
3908 /* Finish the last 15 bytes or less with the scalar loop. */
3909 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
3910
3911 while (source < end) {
3912 switch (*source) {
3913 case '\0':
3914 case '\'':
3915 case '\"':
3916 case '\\':
3917 goto do_escape;
3918 default:
3919 source++;
3920 break;
3921 }
3922 }
3923
3924 return zend_string_copy(str);
3925
3926 do_escape:
3927 offset = source - (char *)ZSTR_VAL(str);
3928 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3929 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3930 target = ZSTR_VAL(new_str) + offset;
3931
3932 # if defined(__aarch64__) || defined(_M_ARM64)
3933 if (res.dw[0] | res.dw[1]) {
3934 target = aarch64_add_slashes(res, source, target);
3935 source += 16;
3936 }
3937 for (; end - source > 15; source += 16) {
3938 uint8x16_t x = vld1q_u8((uint8_t *)source);
3939 res = aarch64_contains_slash_chars(x);
3940 if (res.dw[0] | res.dw[1]) {
3941 target = aarch64_add_slashes(res, source, target);
3942 } else {
3943 vst1q_u8((uint8_t*)target, x);
3944 target += 16;
3945 }
3946 }
3947 /* Finish the last 15 bytes or less with the scalar loop. */
3948 # endif /* defined(__aarch64__) || defined(_M_ARM64) */
3949
3950 while (source < end) {
3951 switch (*source) {
3952 case '\0':
3953 *target++ = '\\';
3954 *target++ = '0';
3955 break;
3956 case '\'':
3957 case '\"':
3958 case '\\':
3959 *target++ = '\\';
3960 ZEND_FALLTHROUGH;
3961 default:
3962 *target++ = *source;
3963 break;
3964 }
3965 source++;
3966 }
3967
3968 *target = '\0';
3969
3970 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3971 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3972 } else {
3973 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3974 }
3975
3976 return new_str;
3977 }
3978 #endif
3979 /* }}} */
3980 /* }}} */
3981
3982 /* {{{ php_stripslashes
3983 *
3984 * be careful, this edits the string in-place */
3985 static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
3986 {
3987 #if defined(__aarch64__) || defined(_M_ARM64)
3988 while (len > 15) {
3989 uint8x16_t x = vld1q_u8((uint8_t *)str);
3990 quad_word q;
3991 vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
3992 if (q.dw[0] | q.dw[1]) {
3993 unsigned int i = 0;
3994 while (i < 16) {
3995 if (q.mem[i] == 0) {
3996 *out++ = str[i];
3997 i++;
3998 continue;
3999 }
4000
4001 i++; /* skip the slash */
4002 if (i < len) {
4003 char s = str[i];
4004 if (s == '0')
4005 *out++ = '\0';
4006 else
4007 *out++ = s; /* preserve the next character */
4008 i++;
4009 }
4010 }
4011 str += i;
4012 len -= i;
4013 } else {
4014 vst1q_u8((uint8_t*)out, x);
4015 out += 16;
4016 str += 16;
4017 len -= 16;
4018 }
4019 }
4020 /* Finish the last 15 bytes or less with the scalar loop. */
4021 #endif /* defined(__aarch64__) || defined(_M_ARM64) */
4022 while (len > 0) {
4023 if (*str == '\\') {
4024 str++; /* skip the slash */
4025 len--;
4026 if (len > 0) {
4027 if (*str == '0') {
4028 *out++='\0';
4029 str++;
4030 } else {
4031 *out++ = *str++; /* preserve the next character */
4032 }
4033 len--;
4034 }
4035 } else {
4036 *out++ = *str++;
4037 len--;
4038 }
4039 }
4040
4041 return out;
4042 }
4043
4044 #if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_RESOLVER)
4045 # ifdef ZEND_INTRIN_SSE4_2_NATIVE
4046 PHPAPI void php_stripslashes(zend_string *str)
4047 # elif defined(ZEND_INTRIN_SSE4_2_RESOLVER)
4048 void php_stripslashes_sse42(zend_string *str)
4049 # endif
4050 {
4051 const char *s = ZSTR_VAL(str);
4052 char *t = ZSTR_VAL(str);
4053 size_t l = ZSTR_LEN(str);
4054
4055 if (l > 15) {
4056 const __m128i slash = _mm_set1_epi8('\\');
4057
4058 do {
4059 __m128i in = _mm_loadu_si128((__m128i *)s);
4060 __m128i any_slash = _mm_cmpeq_epi8(in, slash);
4061 uint32_t res = _mm_movemask_epi8(any_slash);
4062
4063 if (res) {
4064 int i, n = zend_ulong_ntz(res);
4065 const char *e = s + 15;
4066 l -= n;
4067 for (i = 0; i < n; i++) {
4068 *t++ = *s++;
4069 }
4070 for (; s < e; s++) {
4071 if (*s == '\\') {
4072 s++;
4073 l--;
4074 if (*s == '0') {
4075 *t = '\0';
4076 } else {
4077 *t = *s;
4078 }
4079 } else {
4080 *t = *s;
4081 }
4082 t++;
4083 l--;
4084 }
4085 } else {
4086 _mm_storeu_si128((__m128i *)t, in);
4087 s += 16;
4088 t += 16;
4089 l -= 16;
4090 }
4091 } while (l > 15);
4092 }
4093
4094 t = php_stripslashes_impl(s, t, l);
4095 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4096 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4097 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4098 }
4099 }
4100 #endif
4101
4102 #ifndef ZEND_INTRIN_SSE4_2_NATIVE
4103 # ifdef ZEND_INTRIN_SSE4_2_RESOLVER
4104 void php_stripslashes_default(zend_string *str) /* {{{ */
4105 # else
4106 PHPAPI void php_stripslashes(zend_string *str)
4107 # endif
4108 {
4109 const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
4110 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4111 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4112 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4113 }
4114 }
4115 /* }}} */
4116 #endif
4117 /* }}} */
4118
4119 #define _HEB_BLOCK_TYPE_ENG 1
4120 #define _HEB_BLOCK_TYPE_HEB 2
4121 #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
4122 #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
4123 #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
4124
4125 /* {{{ php_str_replace_in_subject */
4126 static zend_long php_str_replace_in_subject(
4127 zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
4128 zend_string *subject_str, zval *result, bool case_sensitivity
4129 ) {
4130 zval *search_entry;
4131 zend_string *tmp_result;
4132 char *replace_value = NULL;
4133 size_t replace_len = 0;
4134 zend_long replace_count = 0;
4135 zend_string *lc_subject_str = NULL;
4136 uint32_t replace_idx;
4137
4138 if (ZSTR_LEN(subject_str) == 0) {
4139 ZVAL_EMPTY_STRING(result);
4140 return 0;
4141 }
4142
4143 /* If search is an array */
4144 if (search_ht) {
4145 /* Duplicate subject string for repeated replacement */
4146 zend_string_addref(subject_str);
4147
4148 if (replace_ht) {
4149 replace_idx = 0;
4150 } else {
4151 /* Set replacement value to the passed one */
4152 replace_value = ZSTR_VAL(replace_str);
4153 replace_len = ZSTR_LEN(replace_str);
4154 }
4155
4156 /* For each entry in the search array, get the entry */
4157 ZEND_HASH_FOREACH_VAL(search_ht, search_entry) {
4158 /* Make sure we're dealing with strings. */
4159 zend_string *tmp_search_str;
4160 zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
4161 zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
4162
4163 /* If replace is an array. */
4164 if (replace_ht) {
4165 /* Get current entry */
4166 zval *replace_entry = NULL;
4167 if (HT_IS_PACKED(replace_ht)) {
4168 while (replace_idx < replace_ht->nNumUsed) {
4169 replace_entry = &replace_ht->arPacked[replace_idx];
4170 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4171 break;
4172 }
4173 replace_idx++;
4174 }
4175 } else {
4176 while (replace_idx < replace_ht->nNumUsed) {
4177 replace_entry = &replace_ht->arData[replace_idx].val;
4178 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4179 break;
4180 }
4181 replace_idx++;
4182 }
4183 }
4184 if (replace_idx < replace_ht->nNumUsed) {
4185 /* Make sure we're dealing with strings. */
4186 replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
4187
4188 /* Set replacement value to the one we got from array */
4189 replace_value = ZSTR_VAL(replace_entry_str);
4190 replace_len = ZSTR_LEN(replace_entry_str);
4191
4192 replace_idx++;
4193 } else {
4194 /* We've run out of replacement strings, so use an empty one. */
4195 replace_value = "";
4196 replace_len = 0;
4197 }
4198 }
4199
4200 if (ZSTR_LEN(search_str) == 1) {
4201 zend_long old_replace_count = replace_count;
4202
4203 tmp_result = php_char_to_str_ex(subject_str,
4204 ZSTR_VAL(search_str)[0],
4205 replace_value,
4206 replace_len,
4207 case_sensitivity,
4208 &replace_count);
4209 if (lc_subject_str && replace_count != old_replace_count) {
4210 zend_string_release_ex(lc_subject_str, 0);
4211 lc_subject_str = NULL;
4212 }
4213 } else if (ZSTR_LEN(search_str) > 1) {
4214 if (case_sensitivity) {
4215 tmp_result = php_str_to_str_ex(subject_str,
4216 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4217 replace_value, replace_len, &replace_count);
4218 } else {
4219 zend_long old_replace_count = replace_count;
4220
4221 if (!lc_subject_str) {
4222 lc_subject_str = zend_string_tolower(subject_str);
4223 }
4224 tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4225 search_str, replace_value, replace_len, &replace_count);
4226 if (replace_count != old_replace_count) {
4227 zend_string_release_ex(lc_subject_str, 0);
4228 lc_subject_str = NULL;
4229 }
4230 }
4231 } else {
4232 zend_tmp_string_release(tmp_search_str);
4233 zend_tmp_string_release(tmp_replace_entry_str);
4234 continue;
4235 }
4236
4237 zend_tmp_string_release(tmp_search_str);
4238 zend_tmp_string_release(tmp_replace_entry_str);
4239
4240 if (subject_str == tmp_result) {
4241 zend_string_delref(subject_str);
4242 } else {
4243 zend_string_release_ex(subject_str, 0);
4244 subject_str = tmp_result;
4245 if (ZSTR_LEN(subject_str) == 0) {
4246 zend_string_release_ex(subject_str, 0);
4247 ZVAL_EMPTY_STRING(result);
4248 if (lc_subject_str) {
4249 zend_string_release_ex(lc_subject_str, 0);
4250 }
4251 return replace_count;
4252 }
4253 }
4254 } ZEND_HASH_FOREACH_END();
4255 ZVAL_STR(result, subject_str);
4256 if (lc_subject_str) {
4257 zend_string_release_ex(lc_subject_str, 0);
4258 }
4259 } else {
4260 ZEND_ASSERT(search_str);
4261 if (ZSTR_LEN(search_str) == 1) {
4262 ZVAL_STR(result,
4263 php_char_to_str_ex(subject_str,
4264 ZSTR_VAL(search_str)[0],
4265 ZSTR_VAL(replace_str),
4266 ZSTR_LEN(replace_str),
4267 case_sensitivity,
4268 &replace_count));
4269 } else if (ZSTR_LEN(search_str) > 1) {
4270 if (case_sensitivity) {
4271 ZVAL_STR(result, php_str_to_str_ex(subject_str,
4272 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4273 ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4274 } else {
4275 lc_subject_str = zend_string_tolower(subject_str);
4276 ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4277 search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4278 zend_string_release_ex(lc_subject_str, 0);
4279 }
4280 } else {
4281 ZVAL_STR_COPY(result, subject_str);
4282 }
4283 }
4284 return replace_count;
4285 }
4286 /* }}} */
4287
4288 /* {{{ php_str_replace_common */
4289 static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool case_sensitivity)
4290 {
4291 zend_string *search_str;
4292 HashTable *search_ht;
4293 zend_string *replace_str;
4294 HashTable *replace_ht;
4295 zend_string *subject_str;
4296 HashTable *subject_ht;
4297 zval *subject_entry, *zcount = NULL;
4298 zval result;
4299 zend_string *string_key;
4300 zend_ulong num_key;
4301 zend_long count = 0;
4302
4303 ZEND_PARSE_PARAMETERS_START(3, 4)
4304 Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
4305 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
4306 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
4307 Z_PARAM_OPTIONAL
4308 Z_PARAM_ZVAL(zcount)
4309 ZEND_PARSE_PARAMETERS_END();
4310
4311 /* Make sure we're dealing with strings and do the replacement. */
4312 if (search_str && replace_ht) {
4313 zend_argument_type_error(2, "must be of type %s when argument #1 ($search) is %s",
4314 search_str ? "string" : "array", search_str ? "a string" : "an array"
4315 );
4316 RETURN_THROWS();
4317 }
4318
4319 /* if subject is an array */
4320 if (subject_ht) {
4321 array_init(return_value);
4322
4323 /* For each subject entry, convert it to string, then perform replacement
4324 and add the result to the return_value array. */
4325 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
4326 zend_string *tmp_subject_str;
4327 ZVAL_DEREF(subject_entry);
4328 subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
4329 count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
4330 zend_tmp_string_release(tmp_subject_str);
4331
4332 /* Add to return array */
4333 if (string_key) {
4334 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
4335 } else {
4336 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
4337 }
4338 } ZEND_HASH_FOREACH_END();
4339 } else { /* if subject is not an array */
4340 count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
4341 }
4342 if (zcount) {
4343 ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
4344 }
4345 }
4346 /* }}} */
4347
4348 /* {{{ Replaces all occurrences of search in haystack with replace */
4349 PHP_FUNCTION(str_replace)
4350 {
4351 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4352 }
4353 /* }}} */
4354
4355 /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
4356 PHP_FUNCTION(str_ireplace)
4357 {
4358 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4359 }
4360 /* }}} */
4361
4362 /* {{{ Converts logical Hebrew text to visual text */
4363 PHP_FUNCTION(hebrev)
4364 {
4365 char *str, *heb_str, *target;
4366 const char *tmp;
4367 size_t block_start, block_end, block_type, i;
4368 zend_long max_chars=0, char_count;
4369 size_t begin, end, orig_begin;
4370 size_t str_len;
4371 zend_string *broken_str;
4372
4373 ZEND_PARSE_PARAMETERS_START(1, 2)
4374 Z_PARAM_STRING(str, str_len)
4375 Z_PARAM_OPTIONAL
4376 Z_PARAM_LONG(max_chars)
4377 ZEND_PARSE_PARAMETERS_END();
4378
4379 if (str_len == 0) {
4380 RETURN_EMPTY_STRING();
4381 }
4382
4383 tmp = str;
4384 block_start=block_end=0;
4385
4386 heb_str = (char *) emalloc(str_len+1);
4387 target = heb_str+str_len;
4388 *target = 0;
4389 target--;
4390
4391 if (isheb(*tmp)) {
4392 block_type = _HEB_BLOCK_TYPE_HEB;
4393 } else {
4394 block_type = _HEB_BLOCK_TYPE_ENG;
4395 }
4396
4397 do {
4398 if (block_type == _HEB_BLOCK_TYPE_HEB) {
4399 while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
4400 tmp++;
4401 block_end++;
4402 }
4403 for (i = block_start+1; i<= block_end+1; i++) {
4404 *target = str[i-1];
4405 switch (*target) {
4406 case '(':
4407 *target = ')';
4408 break;
4409 case ')':
4410 *target = '(';
4411 break;
4412 case '[':
4413 *target = ']';
4414 break;
4415 case ']':
4416 *target = '[';
4417 break;
4418 case '{':
4419 *target = '}';
4420 break;
4421 case '}':
4422 *target = '{';
4423 break;
4424 case '<':
4425 *target = '>';
4426 break;
4427 case '>':
4428 *target = '<';
4429 break;
4430 case '\\':
4431 *target = '/';
4432 break;
4433 case '/':
4434 *target = '\\';
4435 break;
4436 default:
4437 break;
4438 }
4439 target--;
4440 }
4441 block_type = _HEB_BLOCK_TYPE_ENG;
4442 } else {
4443 while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
4444 tmp++;
4445 block_end++;
4446 }
4447 while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
4448 tmp--;
4449 block_end--;
4450 }
4451 for (i = block_end+1; i >= block_start+1; i--) {
4452 *target = str[i-1];
4453 target--;
4454 }
4455 block_type = _HEB_BLOCK_TYPE_HEB;
4456 }
4457 block_start=block_end+1;
4458 } while (block_end < str_len-1);
4459
4460
4461 broken_str = zend_string_alloc(str_len, 0);
4462 begin = end = str_len-1;
4463 target = ZSTR_VAL(broken_str);
4464
4465 while (1) {
4466 char_count=0;
4467 while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
4468 char_count++;
4469 begin--;
4470 if (_isnewline(heb_str[begin])) {
4471 while (begin > 0 && _isnewline(heb_str[begin-1])) {
4472 begin--;
4473 char_count++;
4474 }
4475 break;
4476 }
4477 }
4478 if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
4479 size_t new_char_count=char_count, new_begin=begin;
4480
4481 while (new_char_count > 0) {
4482 if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
4483 break;
4484 }
4485 new_begin++;
4486 new_char_count--;
4487 }
4488 if (new_char_count > 0) {
4489 begin=new_begin;
4490 }
4491 }
4492 orig_begin=begin;
4493
4494 if (_isblank(heb_str[begin])) {
4495 heb_str[begin]='\n';
4496 }
4497 while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
4498 begin++;
4499 }
4500 for (i = begin; i <= end; i++) { /* copy content */
4501 *target = heb_str[i];
4502 target++;
4503 }
4504 for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
4505 *target = heb_str[i];
4506 target++;
4507 }
4508 begin=orig_begin;
4509
4510 if (begin == 0) {
4511 *target = 0;
4512 break;
4513 }
4514 begin--;
4515 end=begin;
4516 }
4517 efree(heb_str);
4518
4519 RETURN_NEW_STR(broken_str);
4520 }
4521 /* }}} */
4522
4523 /* {{{ Converts newlines to HTML line breaks */
4524 PHP_FUNCTION(nl2br)
4525 {
4526 /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
4527 const char *tmp, *end;
4528 zend_string *str;
4529 char *target;
4530 size_t repl_cnt = 0;
4531 bool is_xhtml = 1;
4532 zend_string *result;
4533
4534 ZEND_PARSE_PARAMETERS_START(1, 2)
4535 Z_PARAM_STR(str)
4536 Z_PARAM_OPTIONAL
4537 Z_PARAM_BOOL(is_xhtml)
4538 ZEND_PARSE_PARAMETERS_END();
4539
4540 tmp = ZSTR_VAL(str);
4541 end = ZSTR_VAL(str) + ZSTR_LEN(str);
4542
4543 /* it is really faster to scan twice and allocate mem once instead of scanning once
4544 and constantly reallocing */
4545 while (tmp < end) {
4546 if (*tmp == '\r') {
4547 if (*(tmp+1) == '\n') {
4548 tmp++;
4549 }
4550 repl_cnt++;
4551 } else if (*tmp == '\n') {
4552 if (*(tmp+1) == '\r') {
4553 tmp++;
4554 }
4555 repl_cnt++;
4556 }
4557
4558 tmp++;
4559 }
4560
4561 if (repl_cnt == 0) {
4562 RETURN_STR_COPY(str);
4563 }
4564
4565 {
4566 size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
4567
4568 result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
4569 target = ZSTR_VAL(result);
4570 }
4571
4572 tmp = ZSTR_VAL(str);
4573 while (tmp < end) {
4574 switch (*tmp) {
4575 case '\r':
4576 case '\n':
4577 *target++ = '<';
4578 *target++ = 'b';
4579 *target++ = 'r';
4580
4581 if (is_xhtml) {
4582 *target++ = ' ';
4583 *target++ = '/';
4584 }
4585
4586 *target++ = '>';
4587
4588 if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
4589 *target++ = *tmp++;
4590 }
4591 ZEND_FALLTHROUGH;
4592 default:
4593 *target++ = *tmp;
4594 }
4595
4596 tmp++;
4597 }
4598
4599 *target = '\0';
4600
4601 RETURN_NEW_STR(result);
4602 }
4603 /* }}} */
4604
4605 /* {{{ Strips HTML and PHP tags from a string */
4606 PHP_FUNCTION(strip_tags)
4607 {
4608 zend_string *buf;
4609 zend_string *str;
4610 zend_string *allow_str = NULL;
4611 HashTable *allow_ht = NULL;
4612 const char *allowed_tags=NULL;
4613 size_t allowed_tags_len=0;
4614 smart_str tags_ss = {0};
4615
4616 ZEND_PARSE_PARAMETERS_START(1, 2)
4617 Z_PARAM_STR(str)
4618 Z_PARAM_OPTIONAL
4619 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
4620 ZEND_PARSE_PARAMETERS_END();
4621
4622 if (allow_ht) {
4623 zval *tmp;
4624 zend_string *tag;
4625
4626 ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
4627 tag = zval_get_string(tmp);
4628 smart_str_appendc(&tags_ss, '<');
4629 smart_str_append(&tags_ss, tag);
4630 smart_str_appendc(&tags_ss, '>');
4631 zend_string_release(tag);
4632 } ZEND_HASH_FOREACH_END();
4633 if (tags_ss.s) {
4634 smart_str_0(&tags_ss);
4635 allowed_tags = ZSTR_VAL(tags_ss.s);
4636 allowed_tags_len = ZSTR_LEN(tags_ss.s);
4637 }
4638 } else if (allow_str) {
4639 allowed_tags = ZSTR_VAL(allow_str);
4640 allowed_tags_len = ZSTR_LEN(allow_str);
4641 }
4642
4643 buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
4644 ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
4645 smart_str_free(&tags_ss);
4646 RETURN_NEW_STR(buf);
4647 }
4648 /* }}} */
4649
4650 static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
4651 const char *retval;
4652
4653 if (zend_string_equals_literal(loc, "0")) {
4654 loc = NULL;
4655 } else {
4656 if (ZSTR_LEN(loc) >= 255) {
4657 php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
4658 return NULL;
4659 }
4660 }
4661
4662 # ifndef PHP_WIN32
4663 retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
4664 # else
4665 if (loc) {
4666 /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
4667 char *locp = ZSTR_VAL(loc);
4668 if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
4669 && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
4670 && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
4671 && (locp[5] == '\0' || locp[5] == '.')
4672 && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
4673 && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
4674 && locp[5] == '\0')
4675 ) {
4676 retval = NULL;
4677 } else {
4678 retval = setlocale(cat, ZSTR_VAL(loc));
4679 }
4680 } else {
4681 retval = setlocale(cat, NULL);
4682 }
4683 # endif
4684 if (!retval) {
4685 return NULL;
4686 }
4687
4688 if (loc) {
4689 /* Remember if locale was changed */
4690 size_t len = strlen(retval);
4691
4692 BG(locale_changed) = 1;
4693 if (cat == LC_CTYPE || cat == LC_ALL) {
4694 zend_update_current_locale();
4695 if (BG(ctype_string)) {
4696 zend_string_release_ex(BG(ctype_string), 0);
4697 }
4698 if (len == 1 && *retval == 'C') {
4699 /* C locale is represented as NULL. */
4700 BG(ctype_string) = NULL;
4701 return ZSTR_CHAR('C');
4702 } else if (zend_string_equals_cstr(loc, retval, len)) {
4703 BG(ctype_string) = zend_string_copy(loc);
4704 return zend_string_copy(BG(ctype_string));
4705 } else {
4706 BG(ctype_string) = zend_string_init(retval, len, 0);
4707 return zend_string_copy(BG(ctype_string));
4708 }
4709 } else if (zend_string_equals_cstr(loc, retval, len)) {
4710 return zend_string_copy(loc);
4711 }
4712 }
4713 return zend_string_init(retval, strlen(retval), 0);
4714 }
4715
4716 static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
4717 zend_string *tmp_loc_str;
4718 zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
4719 if (UNEXPECTED(loc_str == NULL)) {
4720 return NULL;
4721 }
4722 zend_string *result = try_setlocale_str(cat, loc_str);
4723 zend_tmp_string_release(tmp_loc_str);
4724 return result;
4725 }
4726
4727 /* {{{ Set locale information */
4728 PHP_FUNCTION(setlocale)
4729 {
4730 zend_long cat;
4731 zval *args = NULL;
4732 int num_args;
4733
4734 ZEND_PARSE_PARAMETERS_START(2, -1)
4735 Z_PARAM_LONG(cat)
4736 Z_PARAM_VARIADIC('+', args, num_args)
4737 ZEND_PARSE_PARAMETERS_END();
4738
4739 for (uint32_t i = 0; i < num_args; i++) {
4740 if (Z_TYPE(args[i]) == IS_ARRAY) {
4741 zval *elem;
4742 ZEND_HASH_FOREACH_VAL(Z_ARRVAL(args[i]), elem) {
4743 zend_string *result = try_setlocale_zval(cat, elem);
4744 if (EG(exception)) {
4745 RETURN_THROWS();
4746 }
4747 if (result) {
4748 RETURN_STR(result);
4749 }
4750 } ZEND_HASH_FOREACH_END();
4751 } else {
4752 zend_string *result = try_setlocale_zval(cat, &args[i]);
4753 if (EG(exception)) {
4754 RETURN_THROWS();
4755 }
4756 if (result) {
4757 RETURN_STR(result);
4758 }
4759 }
4760 }
4761
4762 RETURN_FALSE;
4763 }
4764 /* }}} */
4765
4766 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
4767 PHP_FUNCTION(parse_str)
4768 {
4769 char *arg;
4770 zval *arrayArg = NULL;
4771 char *res = NULL;
4772 size_t arglen;
4773
4774 ZEND_PARSE_PARAMETERS_START(2, 2)
4775 Z_PARAM_STRING(arg, arglen)
4776 Z_PARAM_ZVAL(arrayArg)
4777 ZEND_PARSE_PARAMETERS_END();
4778
4779 arrayArg = zend_try_array_init(arrayArg);
4780 if (!arrayArg) {
4781 RETURN_THROWS();
4782 }
4783
4784 res = estrndup(arg, arglen);
4785 sapi_module.treat_data(PARSE_STRING, res, arrayArg);
4786 }
4787 /* }}} */
4788
4789 #define PHP_TAG_BUF_SIZE 1023
4790
4791 /* {{{ php_tag_find
4792 *
4793 * Check if tag is in a set of tags
4794 *
4795 * states:
4796 *
4797 * 0 start tag
4798 * 1 first non-whitespace char seen
4799 */
4800 static bool php_tag_find(char *tag, size_t len, const char *set) {
4801 char c, *n;
4802 const char *t;
4803 int state = 0;
4804 bool done = 0;
4805 char *norm;
4806
4807 if (len == 0) {
4808 return 0;
4809 }
4810
4811 norm = emalloc(len+1);
4812
4813 n = norm;
4814 t = tag;
4815 c = zend_tolower_ascii(*t);
4816 /*
4817 normalize the tag removing leading and trailing whitespace
4818 and turn any <a whatever...> into just <a> and any </tag>
4819 into <tag>
4820 */
4821 while (!done) {
4822 switch (c) {
4823 case '<':
4824 *(n++) = c;
4825 break;
4826 case '>':
4827 done =1;
4828 break;
4829 default:
4830 if (!isspace((int)c)) {
4831 if (state == 0) {
4832 state=1;
4833 }
4834 if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
4835 *(n++) = c;
4836 }
4837 } else {
4838 if (state == 1)
4839 done=1;
4840 }
4841 break;
4842 }
4843 c = zend_tolower_ascii(*(++t));
4844 }
4845 *(n++) = '>';
4846 *n = '\0';
4847 if (strstr(set, norm)) {
4848 done=1;
4849 } else {
4850 done=0;
4851 }
4852 efree(norm);
4853 return done;
4854 }
4855 /* }}} */
4856
4857 PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
4858 {
4859 return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
4860 }
4861 /* }}} */
4862
4863 /* {{{ php_strip_tags
4864
4865 A simple little state-machine to strip out html and php tags
4866
4867 State 0 is the output state, State 1 means we are inside a
4868 normal html tag and state 2 means we are inside a php tag.
4869
4870 The state variable is passed in to allow a function like fgetss
4871 to maintain state across calls to the function.
4872
4873 lc holds the last significant character read and br is a bracket
4874 counter.
4875
4876 When an allow string is passed in we keep track of the string
4877 in state 1 and when the tag is closed check it against the
4878 allow string to see if we should allow it.
4879
4880 swm: Added ability to strip <?xml tags without assuming it PHP
4881 code.
4882 */
4883 PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, bool allow_tag_spaces)
4884 {
4885 char *tbuf, *tp, *rp, c, lc;
4886 const char *buf, *p, *end;
4887 int br, depth=0, in_q = 0;
4888 uint8_t state = 0;
4889 size_t pos;
4890 char *allow_free = NULL;
4891 char is_xml = 0;
4892
4893 buf = estrndup(rbuf, len);
4894 end = buf + len;
4895 lc = '\0';
4896 p = buf;
4897 rp = rbuf;
4898 br = 0;
4899 if (allow) {
4900 allow_free = zend_str_tolower_dup_ex(allow, allow_len);
4901 allow = allow_free ? allow_free : allow;
4902 tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
4903 tp = tbuf;
4904 } else {
4905 tbuf = tp = NULL;
4906 }
4907
4908 state_0:
4909 if (p >= end) {
4910 goto finish;
4911 }
4912 c = *p;
4913 switch (c) {
4914 case '\0':
4915 break;
4916 case '<':
4917 if (in_q) {
4918 break;
4919 }
4920 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4921 *(rp++) = c;
4922 break;
4923 }
4924 lc = '<';
4925 state = 1;
4926 if (allow) {
4927 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4928 pos = tp - tbuf;
4929 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4930 tp = tbuf + pos;
4931 }
4932 *(tp++) = '<';
4933 }
4934 p++;
4935 goto state_1;
4936 case '>':
4937 if (depth) {
4938 depth--;
4939 break;
4940 }
4941
4942 if (in_q) {
4943 break;
4944 }
4945
4946 *(rp++) = c;
4947 break;
4948 default:
4949 *(rp++) = c;
4950 break;
4951 }
4952 p++;
4953 goto state_0;
4954
4955 state_1:
4956 if (p >= end) {
4957 goto finish;
4958 }
4959 c = *p;
4960 switch (c) {
4961 case '\0':
4962 break;
4963 case '<':
4964 if (in_q) {
4965 break;
4966 }
4967 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4968 goto reg_char_1;
4969 }
4970 depth++;
4971 break;
4972 case '>':
4973 if (depth) {
4974 depth--;
4975 break;
4976 }
4977 if (in_q) {
4978 break;
4979 }
4980
4981 lc = '>';
4982 if (is_xml && p >= buf + 1 && *(p -1) == '-') {
4983 break;
4984 }
4985 in_q = state = is_xml = 0;
4986 if (allow) {
4987 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4988 pos = tp - tbuf;
4989 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4990 tp = tbuf + pos;
4991 }
4992 *(tp++) = '>';
4993 *tp='\0';
4994 if (php_tag_find(tbuf, tp-tbuf, allow)) {
4995 memcpy(rp, tbuf, tp-tbuf);
4996 rp += tp-tbuf;
4997 }
4998 tp = tbuf;
4999 }
5000 p++;
5001 goto state_0;
5002 case '"':
5003 case '\'':
5004 if (p != buf && (!in_q || *p == in_q)) {
5005 if (in_q) {
5006 in_q = 0;
5007 } else {
5008 in_q = *p;
5009 }
5010 }
5011 goto reg_char_1;
5012 case '!':
5013 /* JavaScript & Other HTML scripting languages */
5014 if (p >= buf + 1 && *(p-1) == '<') {
5015 state = 3;
5016 lc = c;
5017 p++;
5018 goto state_3;
5019 } else {
5020 goto reg_char_1;
5021 }
5022 break;
5023 case '?':
5024 if (p >= buf + 1 && *(p-1) == '<') {
5025 br=0;
5026 state = 2;
5027 p++;
5028 goto state_2;
5029 } else {
5030 goto reg_char_1;
5031 }
5032 break;
5033 default:
5034 reg_char_1:
5035 if (allow) {
5036 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5037 pos = tp - tbuf;
5038 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5039 tp = tbuf + pos;
5040 }
5041 *(tp++) = c;
5042 }
5043 break;
5044 }
5045 p++;
5046 goto state_1;
5047
5048 state_2:
5049 if (p >= end) {
5050 goto finish;
5051 }
5052 c = *p;
5053 switch (c) {
5054 case '(':
5055 if (lc != '"' && lc != '\'') {
5056 lc = '(';
5057 br++;
5058 }
5059 break;
5060 case ')':
5061 if (lc != '"' && lc != '\'') {
5062 lc = ')';
5063 br--;
5064 }
5065 break;
5066 case '>':
5067 if (depth) {
5068 depth--;
5069 break;
5070 }
5071 if (in_q) {
5072 break;
5073 }
5074
5075 if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
5076 in_q = state = 0;
5077 tp = tbuf;
5078 p++;
5079 goto state_0;
5080 }
5081 break;
5082 case '"':
5083 case '\'':
5084 if (p >= buf + 1 && *(p-1) != '\\') {
5085 if (lc == c) {
5086 lc = '\0';
5087 } else if (lc != '\\') {
5088 lc = c;
5089 }
5090 if (p != buf && (!in_q || *p == in_q)) {
5091 if (in_q) {
5092 in_q = 0;
5093 } else {
5094 in_q = *p;
5095 }
5096 }
5097 }
5098 break;
5099 case 'l':
5100 case 'L':
5101 /* swm: If we encounter '<?xml' then we shouldn't be in
5102 * state == 2 (PHP). Switch back to HTML.
5103 */
5104 if (state == 2 && p > buf+4
5105 && (*(p-1) == 'm' || *(p-1) == 'M')
5106 && (*(p-2) == 'x' || *(p-2) == 'X')
5107 && *(p-3) == '?'
5108 && *(p-4) == '<') {
5109 state = 1; is_xml=1;
5110 p++;
5111 goto state_1;
5112 }
5113 break;
5114 default:
5115 break;
5116 }
5117 p++;
5118 goto state_2;
5119
5120 state_3:
5121 if (p >= end) {
5122 goto finish;
5123 }
5124 c = *p;
5125 switch (c) {
5126 case '>':
5127 if (depth) {
5128 depth--;
5129 break;
5130 }
5131 if (in_q) {
5132 break;
5133 }
5134 in_q = state = 0;
5135 tp = tbuf;
5136 p++;
5137 goto state_0;
5138 case '"':
5139 case '\'':
5140 if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
5141 if (in_q) {
5142 in_q = 0;
5143 } else {
5144 in_q = *p;
5145 }
5146 }
5147 break;
5148 case '-':
5149 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
5150 state = 4;
5151 p++;
5152 goto state_4;
5153 }
5154 break;
5155 case 'E':
5156 case 'e':
5157 /* !DOCTYPE exception */
5158 if (p > buf+6
5159 && (*(p-1) == 'p' || *(p-1) == 'P')
5160 && (*(p-2) == 'y' || *(p-2) == 'Y')
5161 && (*(p-3) == 't' || *(p-3) == 'T')
5162 && (*(p-4) == 'c' || *(p-4) == 'C')
5163 && (*(p-5) == 'o' || *(p-5) == 'O')
5164 && (*(p-6) == 'd' || *(p-6) == 'D')) {
5165 state = 1;
5166 p++;
5167 goto state_1;
5168 }
5169 break;
5170 default:
5171 break;
5172 }
5173 p++;
5174 goto state_3;
5175
5176 state_4:
5177 while (p < end) {
5178 c = *p;
5179 if (c == '>' && !in_q) {
5180 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
5181 in_q = state = 0;
5182 tp = tbuf;
5183 p++;
5184 goto state_0;
5185 }
5186 }
5187 p++;
5188 }
5189
5190 finish:
5191 if (rp < rbuf + len) {
5192 *rp = '\0';
5193 }
5194 efree((void *)buf);
5195 if (tbuf) {
5196 efree(tbuf);
5197 }
5198 if (allow_free) {
5199 efree(allow_free);
5200 }
5201
5202 return (size_t)(rp - rbuf);
5203 }
5204 /* }}} */
5205
5206 /* {{{ Parse a CSV string into an array */
5207 PHP_FUNCTION(str_getcsv)
5208 {
5209 zend_string *str;
5210 char delim = ',', enc = '"';
5211 int esc = (unsigned char) '\\';
5212 char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
5213 size_t delim_len = 0, enc_len = 0, esc_len = 0;
5214
5215 ZEND_PARSE_PARAMETERS_START(1, 4)
5216 Z_PARAM_STR(str)
5217 Z_PARAM_OPTIONAL
5218 Z_PARAM_STRING(delim_str, delim_len)
5219 Z_PARAM_STRING(enc_str, enc_len)
5220 Z_PARAM_STRING(esc_str, esc_len)
5221 ZEND_PARSE_PARAMETERS_END();
5222
5223 delim = delim_len ? delim_str[0] : delim;
5224 enc = enc_len ? enc_str[0] : enc;
5225 if (esc_str != NULL) {
5226 esc = esc_len ? (unsigned char) esc_str[0] : PHP_CSV_NO_ESCAPE;
5227 }
5228
5229 HashTable *values = php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str));
5230 if (values == NULL) {
5231 values = php_bc_fgetcsv_empty_line();
5232 }
5233 RETURN_ARR(values);
5234 }
5235 /* }}} */
5236
5237 /* {{{ Returns the input string repeat mult times */
5238 PHP_FUNCTION(str_repeat)
5239 {
5240 zend_string *input_str; /* Input string */
5241 zend_long mult; /* Multiplier */
5242 zend_string *result; /* Resulting string */
5243 size_t result_len; /* Length of the resulting string */
5244
5245 ZEND_PARSE_PARAMETERS_START(2, 2)
5246 Z_PARAM_STR(input_str)
5247 Z_PARAM_LONG(mult)
5248 ZEND_PARSE_PARAMETERS_END();
5249
5250 if (mult < 0) {
5251 zend_argument_value_error(2, "must be greater than or equal to 0");
5252 RETURN_THROWS();
5253 }
5254
5255 /* Don't waste our time if it's empty */
5256 /* ... or if the multiplier is zero */
5257 if (ZSTR_LEN(input_str) == 0 || mult == 0)
5258 RETURN_EMPTY_STRING();
5259
5260 /* Initialize the result string */
5261 result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
5262 result_len = ZSTR_LEN(input_str) * mult;
5263
5264 /* Heavy optimization for situations where input string is 1 byte long */
5265 if (ZSTR_LEN(input_str) == 1) {
5266 memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
5267 } else {
5268 const char *s, *ee;
5269 char *e;
5270 ptrdiff_t l=0;
5271 memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
5272 s = ZSTR_VAL(result);
5273 e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
5274 ee = ZSTR_VAL(result) + result_len;
5275
5276 while (e<ee) {
5277 l = (e-s) < (ee-e) ? (e-s) : (ee-e);
5278 memmove(e, s, l);
5279 e += l;
5280 }
5281 }
5282
5283 ZSTR_VAL(result)[result_len] = '\0';
5284
5285 RETURN_NEW_STR(result);
5286 }
5287 /* }}} */
5288
5289 /* {{{ Returns info about what characters are used in input */
5290 PHP_FUNCTION(count_chars)
5291 {
5292 zend_string *input;
5293 int chars[256];
5294 zend_long mymode=0;
5295 const unsigned char *buf;
5296 int inx;
5297 char retstr[256];
5298 size_t retlen=0;
5299 size_t tmp = 0;
5300
5301 ZEND_PARSE_PARAMETERS_START(1, 2)
5302 Z_PARAM_STR(input)
5303 Z_PARAM_OPTIONAL
5304 Z_PARAM_LONG(mymode)
5305 ZEND_PARSE_PARAMETERS_END();
5306
5307 if (mymode < 0 || mymode > 4) {
5308 zend_argument_value_error(2, "must be between 0 and 4 (inclusive)");
5309 RETURN_THROWS();
5310 }
5311
5312 buf = (const unsigned char *) ZSTR_VAL(input);
5313 memset((void*) chars, 0, sizeof(chars));
5314
5315 while (tmp < ZSTR_LEN(input)) {
5316 chars[*buf]++;
5317 buf++;
5318 tmp++;
5319 }
5320
5321 if (mymode < 3) {
5322 array_init(return_value);
5323 }
5324
5325 for (inx = 0; inx < 256; inx++) {
5326 switch (mymode) {
5327 case 0:
5328 add_index_long(return_value, inx, chars[inx]);
5329 break;
5330 case 1:
5331 if (chars[inx] != 0) {
5332 add_index_long(return_value, inx, chars[inx]);
5333 }
5334 break;
5335 case 2:
5336 if (chars[inx] == 0) {
5337 add_index_long(return_value, inx, chars[inx]);
5338 }
5339 break;
5340 case 3:
5341 if (chars[inx] != 0) {
5342 retstr[retlen++] = inx;
5343 }
5344 break;
5345 case 4:
5346 if (chars[inx] == 0) {
5347 retstr[retlen++] = inx;
5348 }
5349 break;
5350 }
5351 }
5352
5353 if (mymode == 3 || mymode == 4) {
5354 RETURN_STRINGL(retstr, retlen);
5355 }
5356 }
5357 /* }}} */
5358
5359 /* {{{ php_strnatcmp */
5360 static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, bool is_case_insensitive)
5361 {
5362 zend_string *s1, *s2;
5363
5364 ZEND_PARSE_PARAMETERS_START(2, 2)
5365 Z_PARAM_STR(s1)
5366 Z_PARAM_STR(s2)
5367 ZEND_PARSE_PARAMETERS_END();
5368
5369 RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
5370 ZSTR_VAL(s2), ZSTR_LEN(s2),
5371 is_case_insensitive));
5372 }
5373 /* }}} */
5374
5375 /* {{{ Returns the result of string comparison using 'natural' algorithm */
5376 PHP_FUNCTION(strnatcmp)
5377 {
5378 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
5379 }
5380 /* }}} */
5381
5382 /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5383 PHP_FUNCTION(strnatcasecmp)
5384 {
5385 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5386 }
5387 /* }}} */
5388
5389 /* {{{ Returns numeric formatting information based on the current locale */
5390 PHP_FUNCTION(localeconv)
5391 {
5392 zval grouping, mon_grouping;
5393 size_t len, i;
5394
5395 ZEND_PARSE_PARAMETERS_NONE();
5396
5397 array_init(return_value);
5398 array_init(&grouping);
5399 array_init(&mon_grouping);
5400
5401 {
5402 struct lconv currlocdata;
5403
5404 localeconv_r( &currlocdata );
5405
5406 /* Grab the grouping data out of the array */
5407 len = strlen(currlocdata.grouping);
5408
5409 for (i = 0; i < len; i++) {
5410 add_index_long(&grouping, i, currlocdata.grouping[i]);
5411 }
5412
5413 /* Grab the monetary grouping data out of the array */
5414 len = strlen(currlocdata.mon_grouping);
5415
5416 for (i = 0; i < len; i++) {
5417 add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
5418 }
5419
5420 add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
5421 add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
5422 add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
5423 add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
5424 add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
5425 add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
5426 add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
5427 add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
5428 add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
5429 add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
5430 add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
5431 add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
5432 add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
5433 add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
5434 add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
5435 add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
5436 }
5437
5438 zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
5439 zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
5440 }
5441 /* }}} */
5442
5443 /* {{{ Returns the number of times a substring occurs in the string */
5444 PHP_FUNCTION(substr_count)
5445 {
5446 char *haystack, *needle;
5447 zend_long offset = 0, length = 0;
5448 bool length_is_null = 1;
5449 zend_long count;
5450 size_t haystack_len, needle_len;
5451 const char *p, *endp;
5452
5453 ZEND_PARSE_PARAMETERS_START(2, 4)
5454 Z_PARAM_STRING(haystack, haystack_len)
5455 Z_PARAM_STRING(needle, needle_len)
5456 Z_PARAM_OPTIONAL
5457 Z_PARAM_LONG(offset)
5458 Z_PARAM_LONG_OR_NULL(length, length_is_null)
5459 ZEND_PARSE_PARAMETERS_END();
5460
5461 if (needle_len == 0) {
5462 zend_argument_value_error(2, "cannot be empty");
5463 RETURN_THROWS();
5464 }
5465
5466 p = haystack;
5467
5468 if (offset) {
5469 if (offset < 0) {
5470 offset += (zend_long)haystack_len;
5471 }
5472 if ((offset < 0) || ((size_t)offset > haystack_len)) {
5473 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5474 RETURN_THROWS();
5475 }
5476 p += offset;
5477 haystack_len -= offset;
5478 }
5479
5480 if (!length_is_null) {
5481 if (length < 0) {
5482 length += haystack_len;
5483 }
5484 if (length < 0 || ((size_t)length > haystack_len)) {
5485 zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
5486 RETURN_THROWS();
5487 }
5488 } else {
5489 length = haystack_len;
5490 }
5491
5492 if (needle_len == 1) {
5493 count = count_chars(p, length, needle[0]);
5494 } else {
5495 count = 0;
5496 endp = p + length;
5497 while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
5498 p += needle_len;
5499 count++;
5500 }
5501 }
5502
5503 RETURN_LONG(count);
5504 }
5505 /* }}} */
5506
5507 /* {{{ Returns input string padded on the left or right to specified length with pad_string */
5508 PHP_FUNCTION(str_pad)
5509 {
5510 /* Input arguments */
5511 zend_string *input; /* Input string */
5512 zend_long pad_length; /* Length to pad to */
5513
5514 /* Helper variables */
5515 size_t num_pad_chars; /* Number of padding characters (total - input size) */
5516 char *pad_str = " "; /* Pointer to padding string */
5517 size_t pad_str_len = 1;
5518 zend_long pad_type_val = PHP_STR_PAD_RIGHT; /* The padding type value */
5519 size_t i, left_pad=0, right_pad=0;
5520 zend_string *result = NULL; /* Resulting string */
5521
5522 ZEND_PARSE_PARAMETERS_START(2, 4)
5523 Z_PARAM_STR(input)
5524 Z_PARAM_LONG(pad_length)
5525 Z_PARAM_OPTIONAL
5526 Z_PARAM_STRING(pad_str, pad_str_len)
5527 Z_PARAM_LONG(pad_type_val)
5528 ZEND_PARSE_PARAMETERS_END();
5529
5530 /* If resulting string turns out to be shorter than input string,
5531 we simply copy the input and return. */
5532 if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
5533 RETURN_STR_COPY(input);
5534 }
5535
5536 if (pad_str_len == 0) {
5537 zend_argument_value_error(3, "must be a non-empty string");
5538 RETURN_THROWS();
5539 }
5540
5541 if (pad_type_val < PHP_STR_PAD_LEFT || pad_type_val > PHP_STR_PAD_BOTH) {
5542 zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
5543 RETURN_THROWS();
5544 }
5545
5546 num_pad_chars = pad_length - ZSTR_LEN(input);
5547 result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
5548 ZSTR_LEN(result) = 0;
5549
5550 /* We need to figure out the left/right padding lengths. */
5551 switch (pad_type_val) {
5552 case PHP_STR_PAD_RIGHT:
5553 left_pad = 0;
5554 right_pad = num_pad_chars;
5555 break;
5556
5557 case PHP_STR_PAD_LEFT:
5558 left_pad = num_pad_chars;
5559 right_pad = 0;
5560 break;
5561
5562 case PHP_STR_PAD_BOTH:
5563 left_pad = num_pad_chars / 2;
5564 right_pad = num_pad_chars - left_pad;
5565 break;
5566 }
5567
5568 /* First we pad on the left. */
5569 for (i = 0; i < left_pad; i++)
5570 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5571
5572 /* Then we copy the input string. */
5573 memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
5574 ZSTR_LEN(result) += ZSTR_LEN(input);
5575
5576 /* Finally, we pad on the right. */
5577 for (i = 0; i < right_pad; i++)
5578 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5579
5580 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
5581
5582 RETURN_NEW_STR(result);
5583 }
5584 /* }}} */
5585
5586 /* {{{ Implements an ANSI C compatible sscanf */
5587 PHP_FUNCTION(sscanf)
5588 {
5589 zval *args = NULL;
5590 char *str, *format;
5591 size_t str_len, format_len;
5592 int result, num_args = 0;
5593
5594 ZEND_PARSE_PARAMETERS_START(2, -1)
5595 Z_PARAM_STRING(str, str_len)
5596 Z_PARAM_STRING(format, format_len)
5597 Z_PARAM_VARIADIC('*', args, num_args)
5598 ZEND_PARSE_PARAMETERS_END();
5599
5600 result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
5601
5602 if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
5603 WRONG_PARAM_COUNT;
5604 }
5605 }
5606 /* }}} */
5607
5608 /* static zend_string *php_str_rot13(zend_string *str) {{{ */
5609 static zend_string *php_str_rot13(zend_string *str)
5610 {
5611 zend_string *ret;
5612 const char *p, *e;
5613 char *target;
5614
5615 if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
5616 return ZSTR_EMPTY_ALLOC();
5617 }
5618
5619 ret = zend_string_alloc(ZSTR_LEN(str), 0);
5620
5621 p = ZSTR_VAL(str);
5622 e = p + ZSTR_LEN(str);
5623 target = ZSTR_VAL(ret);
5624
5625 #ifdef __SSE2__
5626 if (e - p > 15) {
5627 const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
5628 const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
5629 const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
5630 const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
5631 const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
5632 const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
5633 const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
5634 const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
5635 const __m128i add = _mm_set1_epi8(13);
5636 const __m128i sub = _mm_set1_epi8(-13);
5637
5638 do {
5639 __m128i in, gt, lt, cmp, delta;
5640
5641 delta = _mm_setzero_si128();
5642 in = _mm_loadu_si128((__m128i *)p);
5643
5644 gt = _mm_cmpgt_epi8(in, a_minus_1);
5645 lt = _mm_cmplt_epi8(in, m_plus_1);
5646 cmp = _mm_and_si128(lt, gt);
5647 if (_mm_movemask_epi8(cmp)) {
5648 cmp = _mm_and_si128(cmp, add);
5649 delta = _mm_or_si128(delta, cmp);
5650 }
5651
5652 gt = _mm_cmpgt_epi8(in, n_minus_1);
5653 lt = _mm_cmplt_epi8(in, z_plus_1);
5654 cmp = _mm_and_si128(lt, gt);
5655 if (_mm_movemask_epi8(cmp)) {
5656 cmp = _mm_and_si128(cmp, sub);
5657 delta = _mm_or_si128(delta, cmp);
5658 }
5659
5660 gt = _mm_cmpgt_epi8(in, A_minus_1);
5661 lt = _mm_cmplt_epi8(in, M_plus_1);
5662 cmp = _mm_and_si128(lt, gt);
5663 if (_mm_movemask_epi8(cmp)) {
5664 cmp = _mm_and_si128(cmp, add);
5665 delta = _mm_or_si128(delta, cmp);
5666 }
5667
5668 gt = _mm_cmpgt_epi8(in, N_minus_1);
5669 lt = _mm_cmplt_epi8(in, Z_plus_1);
5670 cmp = _mm_and_si128(lt, gt);
5671 if (_mm_movemask_epi8(cmp)) {
5672 cmp = _mm_and_si128(cmp, sub);
5673 delta = _mm_or_si128(delta, cmp);
5674 }
5675
5676 in = _mm_add_epi8(in, delta);
5677 _mm_storeu_si128((__m128i *)target, in);
5678
5679 p += 16;
5680 target += 16;
5681 } while (e - p > 15);
5682 }
5683 #endif
5684
5685 while (p < e) {
5686 if (*p >= 'a' && *p <= 'z') {
5687 *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
5688 } else if (*p >= 'A' && *p <= 'Z') {
5689 *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
5690 } else {
5691 *target++ = *p++;
5692 }
5693 }
5694
5695 *target = '\0';
5696
5697 return ret;
5698 }
5699 /* }}} */
5700
5701 /* {{{ Perform the rot13 transform on a string */
5702 PHP_FUNCTION(str_rot13)
5703 {
5704 zend_string *arg;
5705
5706 ZEND_PARSE_PARAMETERS_START(1, 1)
5707 Z_PARAM_STR(arg)
5708 ZEND_PARSE_PARAMETERS_END();
5709
5710 RETURN_STR(php_str_rot13(arg));
5711 }
5712 /* }}} */
5713
5714 /* {{{ php_binary_string_shuffle */
5715 PHPAPI bool php_binary_string_shuffle(const php_random_algo *algo, php_random_status *status, char *str, zend_long len) /* {{{ */
5716 {
5717 int64_t n_elems, rnd_idx, n_left;
5718 char temp;
5719
5720 /* The implementation is stolen from array_data_shuffle */
5721 /* Thus the characteristics of the randomization are the same */
5722 n_elems = len;
5723
5724 if (n_elems <= 1) {
5725 return true;
5726 }
5727
5728 n_left = n_elems;
5729
5730 while (--n_left) {
5731 rnd_idx = algo->range(status, 0, n_left);
5732 if (EG(exception)) {
5733 return false;
5734 }
5735 if (rnd_idx != n_left) {
5736 temp = str[n_left];
5737 str[n_left] = str[rnd_idx];
5738 str[rnd_idx] = temp;
5739 }
5740 }
5741
5742 return true;
5743 }
5744 /* }}} */
5745
5746 /* {{{ Shuffles string. One permutation of all possible is created */
5747 PHP_FUNCTION(str_shuffle)
5748 {
5749 zend_string *arg;
5750
5751 ZEND_PARSE_PARAMETERS_START(1, 1)
5752 Z_PARAM_STR(arg)
5753 ZEND_PARSE_PARAMETERS_END();
5754
5755 RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
5756 if (Z_STRLEN_P(return_value) > 1) {
5757 php_binary_string_shuffle(
5758 php_random_default_algo(),
5759 php_random_default_status(),
5760 Z_STRVAL_P(return_value),
5761 Z_STRLEN_P(return_value)
5762 );
5763 }
5764 }
5765 /* }}} */
5766
5767 /* {{{ Counts the number of words inside a string. If format of 1 is specified,
5768 then the function will return an array containing all the words
5769 found inside the string. If format of 2 is specified, then the function
5770 will return an associated array where the position of the word is the key
5771 and the word itself is the value.
5772 For the purpose of this function, 'word' is defined as a locale dependent
5773 string containing alphabetic characters, which also may contain, but not start
5774 with "'" and "-" characters.
5775 */
5776 PHP_FUNCTION(str_word_count)
5777 {
5778 zend_string *str;
5779 char *char_list = NULL, ch[256];
5780 const char *p, *e, *s;
5781 size_t char_list_len = 0, word_count = 0;
5782 zend_long type = 0;
5783
5784 ZEND_PARSE_PARAMETERS_START(1, 3)
5785 Z_PARAM_STR(str)
5786 Z_PARAM_OPTIONAL
5787 Z_PARAM_LONG(type)
5788 Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
5789 ZEND_PARSE_PARAMETERS_END();
5790
5791 switch(type) {
5792 case 1:
5793 case 2:
5794 array_init(return_value);
5795 if (!ZSTR_LEN(str)) {
5796 return;
5797 }
5798 break;
5799 case 0:
5800 if (!ZSTR_LEN(str)) {
5801 RETURN_LONG(0);
5802 }
5803 /* nothing to be done */
5804 break;
5805 default:
5806 zend_argument_value_error(2, "must be a valid format value");
5807 RETURN_THROWS();
5808 }
5809
5810 if (char_list) {
5811 php_charmask((const unsigned char *) char_list, char_list_len, ch);
5812 }
5813
5814 p = ZSTR_VAL(str);
5815 e = ZSTR_VAL(str) + ZSTR_LEN(str);
5816
5817 /* first character cannot be ' or -, unless explicitly allowed by the user */
5818 if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
5819 p++;
5820 }
5821 /* last character cannot be -, unless explicitly allowed by the user */
5822 if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
5823 e--;
5824 }
5825
5826 while (p < e) {
5827 s = p;
5828 while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
5829 p++;
5830 }
5831 if (p > s) {
5832 switch (type)
5833 {
5834 case 1:
5835 add_next_index_stringl(return_value, s, p - s);
5836 break;
5837 case 2:
5838 add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
5839 break;
5840 default:
5841 word_count++;
5842 break;
5843 }
5844 }
5845 p++;
5846 }
5847
5848 if (!type) {
5849 RETURN_LONG(word_count);
5850 }
5851 }
5852
5853 /* }}} */
5854
5855 /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
5856 PHP_FUNCTION(str_split)
5857 {
5858 zend_string *str;
5859 zend_long split_length = 1;
5860 const char *p;
5861 size_t n_reg_segments;
5862
5863 ZEND_PARSE_PARAMETERS_START(1, 2)
5864 Z_PARAM_STR(str)
5865 Z_PARAM_OPTIONAL
5866 Z_PARAM_LONG(split_length)
5867 ZEND_PARSE_PARAMETERS_END();
5868
5869 if (split_length <= 0) {
5870 zend_argument_value_error(2, "must be greater than 0");
5871 RETURN_THROWS();
5872 }
5873
5874 if ((size_t)split_length >= ZSTR_LEN(str)) {
5875 if (0 == ZSTR_LEN(str)) {
5876 RETURN_EMPTY_ARRAY();
5877 }
5878
5879 array_init_size(return_value, 1);
5880 add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
5881 return;
5882 }
5883
5884 array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
5885
5886 n_reg_segments = ZSTR_LEN(str) / split_length;
5887 p = ZSTR_VAL(str);
5888
5889 while (n_reg_segments-- > 0) {
5890 add_next_index_stringl(return_value, p, split_length);
5891 p += split_length;
5892 }
5893
5894 if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
5895 add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
5896 }
5897 }
5898 /* }}} */
5899
5900 /* {{{ Search a string for any of a set of characters */
5901 PHP_FUNCTION(strpbrk)
5902 {
5903 zend_string *haystack, *char_list;
5904 const char *haystack_ptr, *cl_ptr;
5905
5906 ZEND_PARSE_PARAMETERS_START(2, 2)
5907 Z_PARAM_STR(haystack)
5908 Z_PARAM_STR(char_list)
5909 ZEND_PARSE_PARAMETERS_END();
5910
5911 if (!ZSTR_LEN(char_list)) {
5912 zend_argument_value_error(2, "must be a non-empty string");
5913 RETURN_THROWS();
5914 }
5915
5916 for (haystack_ptr = ZSTR_VAL(haystack); haystack_ptr < (ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); ++haystack_ptr) {
5917 for (cl_ptr = ZSTR_VAL(char_list); cl_ptr < (ZSTR_VAL(char_list) + ZSTR_LEN(char_list)); ++cl_ptr) {
5918 if (*cl_ptr == *haystack_ptr) {
5919 RETURN_STRINGL(haystack_ptr, (ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - haystack_ptr));
5920 }
5921 }
5922 }
5923
5924 RETURN_FALSE;
5925 }
5926 /* }}} */
5927
5928 /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
5929 PHP_FUNCTION(substr_compare)
5930 {
5931 zend_string *s1, *s2;
5932 zend_long offset, len=0;
5933 bool len_is_default=1;
5934 bool cs=0;
5935 size_t cmp_len;
5936
5937 ZEND_PARSE_PARAMETERS_START(3, 5)
5938 Z_PARAM_STR(s1)
5939 Z_PARAM_STR(s2)
5940 Z_PARAM_LONG(offset)
5941 Z_PARAM_OPTIONAL
5942 Z_PARAM_LONG_OR_NULL(len, len_is_default)
5943 Z_PARAM_BOOL(cs)
5944 ZEND_PARSE_PARAMETERS_END();
5945
5946 if (!len_is_default && len <= 0) {
5947 if (len == 0) {
5948 RETURN_LONG(0L);
5949 } else {
5950 zend_argument_value_error(4, "must be greater than or equal to 0");
5951 RETURN_THROWS();
5952 }
5953 }
5954
5955 if (offset < 0) {
5956 offset = ZSTR_LEN(s1) + offset;
5957 offset = (offset < 0) ? 0 : offset;
5958 }
5959
5960 if ((size_t)offset > ZSTR_LEN(s1)) {
5961 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5962 RETURN_THROWS();
5963 }
5964
5965 cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
5966
5967 if (!cs) {
5968 RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5969 } else {
5970 RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5971 }
5972 }
5973 /* }}} */
5974
5975 /* {{{ */
5976 static zend_string *php_utf8_encode(const char *s, size_t len)
5977 {
5978 size_t pos = len;
5979 zend_string *str;
5980 unsigned char c;
5981
5982 str = zend_string_safe_alloc(len, 2, 0, 0);
5983 ZSTR_LEN(str) = 0;
5984 while (pos > 0) {
5985 /* The lower 256 codepoints of Unicode are identical to Latin-1,
5986 * so we don't need to do any mapping here. */
5987 c = (unsigned char)(*s);
5988 if (c < 0x80) {
5989 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
5990 /* We only account for the single-byte and two-byte cases because
5991 * we're only dealing with the first 256 Unicode codepoints. */
5992 } else {
5993 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
5994 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
5995 }
5996 pos--;
5997 s++;
5998 }
5999 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6000 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6001 return str;
6002 }
6003 /* }}} */
6004
6005 /* {{{ */
6006 static zend_string *php_utf8_decode(const char *s, size_t len)
6007 {
6008 size_t pos = 0;
6009 unsigned int c;
6010 zend_string *str;
6011
6012 str = zend_string_alloc(len, 0);
6013 ZSTR_LEN(str) = 0;
6014 while (pos < len) {
6015 zend_result status = FAILURE;
6016 c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
6017
6018 /* The lower 256 codepoints of Unicode are identical to Latin-1,
6019 * so we don't need to do any mapping here beyond replacing non-Latin-1
6020 * characters. */
6021 if (status == FAILURE || c > 0xFFU) {
6022 c = '?';
6023 }
6024
6025 ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
6026 }
6027 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6028 if (ZSTR_LEN(str) < len) {
6029 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6030 }
6031
6032 return str;
6033 }
6034 /* }}} */
6035
6036 /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
6037 PHP_FUNCTION(utf8_encode)
6038 {
6039 char *arg;
6040 size_t arg_len;
6041
6042 ZEND_PARSE_PARAMETERS_START(1, 1)
6043 Z_PARAM_STRING(arg, arg_len)
6044 ZEND_PARSE_PARAMETERS_END();
6045
6046 RETURN_STR(php_utf8_encode(arg, arg_len));
6047 }
6048 /* }}} */
6049
6050 /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
6051 PHP_FUNCTION(utf8_decode)
6052 {
6053 char *arg;
6054 size_t arg_len;
6055
6056 ZEND_PARSE_PARAMETERS_START(1, 1)
6057 Z_PARAM_STRING(arg, arg_len)
6058 ZEND_PARSE_PARAMETERS_END();
6059
6060 RETURN_STR(php_utf8_decode(arg, arg_len));
6061 }
6062 /* }}} */
6063