1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Rasmus Lerdorf <rasmus@php.net> |
14 | Stig Sæther Bakken <ssb@php.net> |
15 | Zeev Suraski <zeev@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include <stdio.h>
20 #include "php.h"
21 #include "php_rand.h"
22 #include "php_string.h"
23 #include "php_variables.h"
24 #include <locale.h>
25 #ifdef HAVE_LANGINFO_H
26 # include <langinfo.h>
27 #endif
28
29 #ifdef HAVE_LIBINTL
30 # include <libintl.h> /* For LC_MESSAGES */
31 #endif
32
33 #include "scanf.h"
34 #include "zend_API.h"
35 #include "zend_execute.h"
36 #include "php_globals.h"
37 #include "basic_functions.h"
38 #include "zend_smart_str.h"
39 #include <Zend/zend_exceptions.h>
40 #ifdef ZTS
41 #include "TSRM.h"
42 #endif
43
44 /* For str_getcsv() support */
45 #include "ext/standard/file.h"
46 /* For php_next_utf8_char() */
47 #include "ext/standard/html.h"
48
49 #define STR_PAD_LEFT 0
50 #define STR_PAD_RIGHT 1
51 #define STR_PAD_BOTH 2
52 #define PHP_PATHINFO_DIRNAME 1
53 #define PHP_PATHINFO_BASENAME 2
54 #define PHP_PATHINFO_EXTENSION 4
55 #define PHP_PATHINFO_FILENAME 8
56 #define PHP_PATHINFO_ALL (PHP_PATHINFO_DIRNAME | PHP_PATHINFO_BASENAME | PHP_PATHINFO_EXTENSION | PHP_PATHINFO_FILENAME)
57
58 #define STR_STRSPN 0
59 #define STR_STRCSPN 1
60
61 /* {{{ register_string_constants */
register_string_constants(INIT_FUNC_ARGS)62 void register_string_constants(INIT_FUNC_ARGS)
63 {
64 REGISTER_LONG_CONSTANT("STR_PAD_LEFT", STR_PAD_LEFT, CONST_CS | CONST_PERSISTENT);
65 REGISTER_LONG_CONSTANT("STR_PAD_RIGHT", STR_PAD_RIGHT, CONST_CS | CONST_PERSISTENT);
66 REGISTER_LONG_CONSTANT("STR_PAD_BOTH", STR_PAD_BOTH, CONST_CS | CONST_PERSISTENT);
67 REGISTER_LONG_CONSTANT("PATHINFO_DIRNAME", PHP_PATHINFO_DIRNAME, CONST_CS | CONST_PERSISTENT);
68 REGISTER_LONG_CONSTANT("PATHINFO_BASENAME", PHP_PATHINFO_BASENAME, CONST_CS | CONST_PERSISTENT);
69 REGISTER_LONG_CONSTANT("PATHINFO_EXTENSION", PHP_PATHINFO_EXTENSION, CONST_CS | CONST_PERSISTENT);
70 REGISTER_LONG_CONSTANT("PATHINFO_FILENAME", PHP_PATHINFO_FILENAME, CONST_CS | CONST_PERSISTENT);
71 REGISTER_LONG_CONSTANT("PATHINFO_ALL", PHP_PATHINFO_ALL, CONST_CS | CONST_PERSISTENT);
72
73 /* If last members of struct lconv equal CHAR_MAX, no grouping is done */
74 REGISTER_LONG_CONSTANT("CHAR_MAX", CHAR_MAX, CONST_CS | CONST_PERSISTENT);
75 REGISTER_LONG_CONSTANT("LC_CTYPE", LC_CTYPE, CONST_CS | CONST_PERSISTENT);
76 REGISTER_LONG_CONSTANT("LC_NUMERIC", LC_NUMERIC, CONST_CS | CONST_PERSISTENT);
77 REGISTER_LONG_CONSTANT("LC_TIME", LC_TIME, CONST_CS | CONST_PERSISTENT);
78 REGISTER_LONG_CONSTANT("LC_COLLATE", LC_COLLATE, CONST_CS | CONST_PERSISTENT);
79 REGISTER_LONG_CONSTANT("LC_MONETARY", LC_MONETARY, CONST_CS | CONST_PERSISTENT);
80 REGISTER_LONG_CONSTANT("LC_ALL", LC_ALL, CONST_CS | CONST_PERSISTENT);
81 # ifdef LC_MESSAGES
82 REGISTER_LONG_CONSTANT("LC_MESSAGES", LC_MESSAGES, CONST_CS | CONST_PERSISTENT);
83 # endif
84
85 }
86 /* }}} */
87
88 int php_tag_find(char *tag, size_t len, const char *set);
89
90 /* this is read-only, so it's ok */
91 ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
92
93 /* localeconv mutex */
94 #ifdef ZTS
95 static MUTEX_T locale_mutex = NULL;
96 #endif
97
98 /* {{{ php_bin2hex */
php_bin2hex(const unsigned char * old,const size_t oldlen)99 static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
100 {
101 zend_string *result;
102 size_t i, j;
103
104 result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
105
106 for (i = j = 0; i < oldlen; i++) {
107 ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
108 ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
109 }
110 ZSTR_VAL(result)[j] = '\0';
111
112 return result;
113 }
114 /* }}} */
115
116 /* {{{ php_hex2bin */
php_hex2bin(const unsigned char * old,const size_t oldlen)117 static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
118 {
119 size_t target_length = oldlen >> 1;
120 zend_string *str = zend_string_alloc(target_length, 0);
121 unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
122 size_t i, j;
123
124 for (i = j = 0; i < target_length; i++) {
125 unsigned char c = old[j++];
126 unsigned char l = c & ~0x20;
127 int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
128 unsigned char d;
129
130 /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
131 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
132 d = (l - 0x10 - 0x27 * is_letter) << 4;
133 } else {
134 zend_string_efree(str);
135 return NULL;
136 }
137 c = old[j++];
138 l = c & ~0x20;
139 is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
140 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
141 d |= l - 0x10 - 0x27 * is_letter;
142 } else {
143 zend_string_efree(str);
144 return NULL;
145 }
146 ret[i] = d;
147 }
148 ret[i] = '\0';
149
150 return str;
151 }
152 /* }}} */
153
154 /* {{{ localeconv_r
155 * glibc's localeconv is not reentrant, so lets make it so ... sorta */
localeconv_r(struct lconv * out)156 PHPAPI struct lconv *localeconv_r(struct lconv *out)
157 {
158
159 #ifdef ZTS
160 tsrm_mutex_lock( locale_mutex );
161 #endif
162
163 /* cur->locinfo is struct __crt_locale_info which implementation is
164 hidden in vc14. TODO revisit this and check if a workaround available
165 and needed. */
166 #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
167 {
168 /* Even with the enabled per thread locale, localeconv
169 won't check any locale change in the master thread. */
170 _locale_t cur = _get_current_locale();
171 *out = *cur->locinfo->lconv;
172 _free_locale(cur);
173 }
174 #else
175 /* localeconv doesn't return an error condition */
176 *out = *localeconv();
177 #endif
178
179 #ifdef ZTS
180 tsrm_mutex_unlock( locale_mutex );
181 #endif
182
183 return out;
184 }
185 /* }}} */
186
187 #ifdef ZTS
188 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(localeconv)189 PHP_MINIT_FUNCTION(localeconv)
190 {
191 locale_mutex = tsrm_mutex_alloc();
192 return SUCCESS;
193 }
194 /* }}} */
195
196 /* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(localeconv)197 PHP_MSHUTDOWN_FUNCTION(localeconv)
198 {
199 tsrm_mutex_free( locale_mutex );
200 locale_mutex = NULL;
201 return SUCCESS;
202 }
203 /* }}} */
204 #endif
205
206 /* {{{ Converts the binary representation of data to hex */
PHP_FUNCTION(bin2hex)207 PHP_FUNCTION(bin2hex)
208 {
209 zend_string *result;
210 zend_string *data;
211
212 ZEND_PARSE_PARAMETERS_START(1, 1)
213 Z_PARAM_STR(data)
214 ZEND_PARSE_PARAMETERS_END();
215
216 result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
217
218 RETURN_STR(result);
219 }
220 /* }}} */
221
222 /* {{{ Converts the hex representation of data to binary */
PHP_FUNCTION(hex2bin)223 PHP_FUNCTION(hex2bin)
224 {
225 zend_string *result, *data;
226
227 ZEND_PARSE_PARAMETERS_START(1, 1)
228 Z_PARAM_STR(data)
229 ZEND_PARSE_PARAMETERS_END();
230
231 if (ZSTR_LEN(data) % 2 != 0) {
232 php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
233 RETURN_FALSE;
234 }
235
236 result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
237
238 if (!result) {
239 php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
240 RETURN_FALSE;
241 }
242
243 RETVAL_STR(result);
244 }
245 /* }}} */
246
php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS,int behavior)247 static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
248 {
249 zend_string *s11, *s22;
250 zend_long start = 0, len = 0;
251 bool len_is_null = 1;
252
253 ZEND_PARSE_PARAMETERS_START(2, 4)
254 Z_PARAM_STR(s11)
255 Z_PARAM_STR(s22)
256 Z_PARAM_OPTIONAL
257 Z_PARAM_LONG(start)
258 Z_PARAM_LONG_OR_NULL(len, len_is_null)
259 ZEND_PARSE_PARAMETERS_END();
260
261 size_t remain_len = ZSTR_LEN(s11);
262 if (start < 0) {
263 start += remain_len;
264 if (start < 0) {
265 start = 0;
266 }
267 } else if ((size_t) start > remain_len) {
268 start = remain_len;
269 }
270
271 remain_len -= start;
272 if (!len_is_null) {
273 if (len < 0) {
274 len += remain_len;
275 if (len < 0) {
276 len = 0;
277 }
278 } else if ((size_t) len > remain_len) {
279 len = remain_len;
280 }
281 } else {
282 len = remain_len;
283 }
284
285 if (len == 0) {
286 RETURN_LONG(0);
287 }
288
289 if (behavior == STR_STRSPN) {
290 RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
291 ZSTR_VAL(s22) /*str2_start*/,
292 ZSTR_VAL(s11) + start + len /*str1_end*/,
293 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
294 } else {
295 ZEND_ASSERT(behavior == STR_STRCSPN);
296 RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
297 ZSTR_VAL(s22) /*str2_start*/,
298 ZSTR_VAL(s11) + start + len /*str1_end*/,
299 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
300 }
301 }
302 /* }}} */
303
304 /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
PHP_FUNCTION(strspn)305 PHP_FUNCTION(strspn)
306 {
307 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRSPN);
308 }
309 /* }}} */
310
311 /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
PHP_FUNCTION(strcspn)312 PHP_FUNCTION(strcspn)
313 {
314 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRCSPN);
315 }
316 /* }}} */
317
318 /* {{{ PHP_MINIT_FUNCTION(nl_langinfo) */
319 #if HAVE_NL_LANGINFO
PHP_MINIT_FUNCTION(nl_langinfo)320 PHP_MINIT_FUNCTION(nl_langinfo)
321 {
322 #define REGISTER_NL_LANGINFO_CONSTANT(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS | CONST_PERSISTENT)
323 #ifdef ABDAY_1
324 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_1);
325 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_2);
326 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_3);
327 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_4);
328 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_5);
329 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_6);
330 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_7);
331 #endif
332 #ifdef DAY_1
333 REGISTER_NL_LANGINFO_CONSTANT(DAY_1);
334 REGISTER_NL_LANGINFO_CONSTANT(DAY_2);
335 REGISTER_NL_LANGINFO_CONSTANT(DAY_3);
336 REGISTER_NL_LANGINFO_CONSTANT(DAY_4);
337 REGISTER_NL_LANGINFO_CONSTANT(DAY_5);
338 REGISTER_NL_LANGINFO_CONSTANT(DAY_6);
339 REGISTER_NL_LANGINFO_CONSTANT(DAY_7);
340 #endif
341 #ifdef ABMON_1
342 REGISTER_NL_LANGINFO_CONSTANT(ABMON_1);
343 REGISTER_NL_LANGINFO_CONSTANT(ABMON_2);
344 REGISTER_NL_LANGINFO_CONSTANT(ABMON_3);
345 REGISTER_NL_LANGINFO_CONSTANT(ABMON_4);
346 REGISTER_NL_LANGINFO_CONSTANT(ABMON_5);
347 REGISTER_NL_LANGINFO_CONSTANT(ABMON_6);
348 REGISTER_NL_LANGINFO_CONSTANT(ABMON_7);
349 REGISTER_NL_LANGINFO_CONSTANT(ABMON_8);
350 REGISTER_NL_LANGINFO_CONSTANT(ABMON_9);
351 REGISTER_NL_LANGINFO_CONSTANT(ABMON_10);
352 REGISTER_NL_LANGINFO_CONSTANT(ABMON_11);
353 REGISTER_NL_LANGINFO_CONSTANT(ABMON_12);
354 #endif
355 #ifdef MON_1
356 REGISTER_NL_LANGINFO_CONSTANT(MON_1);
357 REGISTER_NL_LANGINFO_CONSTANT(MON_2);
358 REGISTER_NL_LANGINFO_CONSTANT(MON_3);
359 REGISTER_NL_LANGINFO_CONSTANT(MON_4);
360 REGISTER_NL_LANGINFO_CONSTANT(MON_5);
361 REGISTER_NL_LANGINFO_CONSTANT(MON_6);
362 REGISTER_NL_LANGINFO_CONSTANT(MON_7);
363 REGISTER_NL_LANGINFO_CONSTANT(MON_8);
364 REGISTER_NL_LANGINFO_CONSTANT(MON_9);
365 REGISTER_NL_LANGINFO_CONSTANT(MON_10);
366 REGISTER_NL_LANGINFO_CONSTANT(MON_11);
367 REGISTER_NL_LANGINFO_CONSTANT(MON_12);
368 #endif
369 #ifdef AM_STR
370 REGISTER_NL_LANGINFO_CONSTANT(AM_STR);
371 #endif
372 #ifdef PM_STR
373 REGISTER_NL_LANGINFO_CONSTANT(PM_STR);
374 #endif
375 #ifdef D_T_FMT
376 REGISTER_NL_LANGINFO_CONSTANT(D_T_FMT);
377 #endif
378 #ifdef D_FMT
379 REGISTER_NL_LANGINFO_CONSTANT(D_FMT);
380 #endif
381 #ifdef T_FMT
382 REGISTER_NL_LANGINFO_CONSTANT(T_FMT);
383 #endif
384 #ifdef T_FMT_AMPM
385 REGISTER_NL_LANGINFO_CONSTANT(T_FMT_AMPM);
386 #endif
387 #ifdef ERA
388 REGISTER_NL_LANGINFO_CONSTANT(ERA);
389 #endif
390 #ifdef ERA_YEAR
391 REGISTER_NL_LANGINFO_CONSTANT(ERA_YEAR);
392 #endif
393 #ifdef ERA_D_T_FMT
394 REGISTER_NL_LANGINFO_CONSTANT(ERA_D_T_FMT);
395 #endif
396 #ifdef ERA_D_FMT
397 REGISTER_NL_LANGINFO_CONSTANT(ERA_D_FMT);
398 #endif
399 #ifdef ERA_T_FMT
400 REGISTER_NL_LANGINFO_CONSTANT(ERA_T_FMT);
401 #endif
402 #ifdef ALT_DIGITS
403 REGISTER_NL_LANGINFO_CONSTANT(ALT_DIGITS);
404 #endif
405 #ifdef INT_CURR_SYMBOL
406 REGISTER_NL_LANGINFO_CONSTANT(INT_CURR_SYMBOL);
407 #endif
408 #ifdef CURRENCY_SYMBOL
409 REGISTER_NL_LANGINFO_CONSTANT(CURRENCY_SYMBOL);
410 #endif
411 #ifdef CRNCYSTR
412 REGISTER_NL_LANGINFO_CONSTANT(CRNCYSTR);
413 #endif
414 #ifdef MON_DECIMAL_POINT
415 REGISTER_NL_LANGINFO_CONSTANT(MON_DECIMAL_POINT);
416 #endif
417 #ifdef MON_THOUSANDS_SEP
418 REGISTER_NL_LANGINFO_CONSTANT(MON_THOUSANDS_SEP);
419 #endif
420 #ifdef MON_GROUPING
421 REGISTER_NL_LANGINFO_CONSTANT(MON_GROUPING);
422 #endif
423 #ifdef POSITIVE_SIGN
424 REGISTER_NL_LANGINFO_CONSTANT(POSITIVE_SIGN);
425 #endif
426 #ifdef NEGATIVE_SIGN
427 REGISTER_NL_LANGINFO_CONSTANT(NEGATIVE_SIGN);
428 #endif
429 #ifdef INT_FRAC_DIGITS
430 REGISTER_NL_LANGINFO_CONSTANT(INT_FRAC_DIGITS);
431 #endif
432 #ifdef FRAC_DIGITS
433 REGISTER_NL_LANGINFO_CONSTANT(FRAC_DIGITS);
434 #endif
435 #ifdef P_CS_PRECEDES
436 REGISTER_NL_LANGINFO_CONSTANT(P_CS_PRECEDES);
437 #endif
438 #ifdef P_SEP_BY_SPACE
439 REGISTER_NL_LANGINFO_CONSTANT(P_SEP_BY_SPACE);
440 #endif
441 #ifdef N_CS_PRECEDES
442 REGISTER_NL_LANGINFO_CONSTANT(N_CS_PRECEDES);
443 #endif
444 #ifdef N_SEP_BY_SPACE
445 REGISTER_NL_LANGINFO_CONSTANT(N_SEP_BY_SPACE);
446 #endif
447 #ifdef P_SIGN_POSN
448 REGISTER_NL_LANGINFO_CONSTANT(P_SIGN_POSN);
449 #endif
450 #ifdef N_SIGN_POSN
451 REGISTER_NL_LANGINFO_CONSTANT(N_SIGN_POSN);
452 #endif
453 #ifdef DECIMAL_POINT
454 REGISTER_NL_LANGINFO_CONSTANT(DECIMAL_POINT);
455 #endif
456 #ifdef RADIXCHAR
457 REGISTER_NL_LANGINFO_CONSTANT(RADIXCHAR);
458 #endif
459 #ifdef THOUSANDS_SEP
460 REGISTER_NL_LANGINFO_CONSTANT(THOUSANDS_SEP);
461 #endif
462 #ifdef THOUSEP
463 REGISTER_NL_LANGINFO_CONSTANT(THOUSEP);
464 #endif
465 #ifdef GROUPING
466 REGISTER_NL_LANGINFO_CONSTANT(GROUPING);
467 #endif
468 #ifdef YESEXPR
469 REGISTER_NL_LANGINFO_CONSTANT(YESEXPR);
470 #endif
471 #ifdef NOEXPR
472 REGISTER_NL_LANGINFO_CONSTANT(NOEXPR);
473 #endif
474 #ifdef YESSTR
475 REGISTER_NL_LANGINFO_CONSTANT(YESSTR);
476 #endif
477 #ifdef NOSTR
478 REGISTER_NL_LANGINFO_CONSTANT(NOSTR);
479 #endif
480 #ifdef CODESET
481 REGISTER_NL_LANGINFO_CONSTANT(CODESET);
482 #endif
483 #undef REGISTER_NL_LANGINFO_CONSTANT
484 return SUCCESS;
485 }
486 /* }}} */
487
488 /* {{{ Query language and locale information */
PHP_FUNCTION(nl_langinfo)489 PHP_FUNCTION(nl_langinfo)
490 {
491 zend_long item;
492 char *value;
493
494 ZEND_PARSE_PARAMETERS_START(1, 1)
495 Z_PARAM_LONG(item)
496 ZEND_PARSE_PARAMETERS_END();
497
498 switch(item) { /* {{{ */
499 #ifdef ABDAY_1
500 case ABDAY_1:
501 case ABDAY_2:
502 case ABDAY_3:
503 case ABDAY_4:
504 case ABDAY_5:
505 case ABDAY_6:
506 case ABDAY_7:
507 #endif
508 #ifdef DAY_1
509 case DAY_1:
510 case DAY_2:
511 case DAY_3:
512 case DAY_4:
513 case DAY_5:
514 case DAY_6:
515 case DAY_7:
516 #endif
517 #ifdef ABMON_1
518 case ABMON_1:
519 case ABMON_2:
520 case ABMON_3:
521 case ABMON_4:
522 case ABMON_5:
523 case ABMON_6:
524 case ABMON_7:
525 case ABMON_8:
526 case ABMON_9:
527 case ABMON_10:
528 case ABMON_11:
529 case ABMON_12:
530 #endif
531 #ifdef MON_1
532 case MON_1:
533 case MON_2:
534 case MON_3:
535 case MON_4:
536 case MON_5:
537 case MON_6:
538 case MON_7:
539 case MON_8:
540 case MON_9:
541 case MON_10:
542 case MON_11:
543 case MON_12:
544 #endif
545 #ifdef AM_STR
546 case AM_STR:
547 #endif
548 #ifdef PM_STR
549 case PM_STR:
550 #endif
551 #ifdef D_T_FMT
552 case D_T_FMT:
553 #endif
554 #ifdef D_FMT
555 case D_FMT:
556 #endif
557 #ifdef T_FMT
558 case T_FMT:
559 #endif
560 #ifdef T_FMT_AMPM
561 case T_FMT_AMPM:
562 #endif
563 #ifdef ERA
564 case ERA:
565 #endif
566 #ifdef ERA_YEAR
567 case ERA_YEAR:
568 #endif
569 #ifdef ERA_D_T_FMT
570 case ERA_D_T_FMT:
571 #endif
572 #ifdef ERA_D_FMT
573 case ERA_D_FMT:
574 #endif
575 #ifdef ERA_T_FMT
576 case ERA_T_FMT:
577 #endif
578 #ifdef ALT_DIGITS
579 case ALT_DIGITS:
580 #endif
581 #ifdef INT_CURR_SYMBOL
582 case INT_CURR_SYMBOL:
583 #endif
584 #ifdef CURRENCY_SYMBOL
585 case CURRENCY_SYMBOL:
586 #endif
587 #ifdef CRNCYSTR
588 case CRNCYSTR:
589 #endif
590 #ifdef MON_DECIMAL_POINT
591 case MON_DECIMAL_POINT:
592 #endif
593 #ifdef MON_THOUSANDS_SEP
594 case MON_THOUSANDS_SEP:
595 #endif
596 #ifdef MON_GROUPING
597 case MON_GROUPING:
598 #endif
599 #ifdef POSITIVE_SIGN
600 case POSITIVE_SIGN:
601 #endif
602 #ifdef NEGATIVE_SIGN
603 case NEGATIVE_SIGN:
604 #endif
605 #ifdef INT_FRAC_DIGITS
606 case INT_FRAC_DIGITS:
607 #endif
608 #ifdef FRAC_DIGITS
609 case FRAC_DIGITS:
610 #endif
611 #ifdef P_CS_PRECEDES
612 case P_CS_PRECEDES:
613 #endif
614 #ifdef P_SEP_BY_SPACE
615 case P_SEP_BY_SPACE:
616 #endif
617 #ifdef N_CS_PRECEDES
618 case N_CS_PRECEDES:
619 #endif
620 #ifdef N_SEP_BY_SPACE
621 case N_SEP_BY_SPACE:
622 #endif
623 #ifdef P_SIGN_POSN
624 case P_SIGN_POSN:
625 #endif
626 #ifdef N_SIGN_POSN
627 case N_SIGN_POSN:
628 #endif
629 #ifdef DECIMAL_POINT
630 case DECIMAL_POINT:
631 #elif defined(RADIXCHAR)
632 case RADIXCHAR:
633 #endif
634 #ifdef THOUSANDS_SEP
635 case THOUSANDS_SEP:
636 #elif defined(THOUSEP)
637 case THOUSEP:
638 #endif
639 #ifdef GROUPING
640 case GROUPING:
641 #endif
642 #ifdef YESEXPR
643 case YESEXPR:
644 #endif
645 #ifdef NOEXPR
646 case NOEXPR:
647 #endif
648 #ifdef YESSTR
649 case YESSTR:
650 #endif
651 #ifdef NOSTR
652 case NOSTR:
653 #endif
654 #ifdef CODESET
655 case CODESET:
656 #endif
657 break;
658 default:
659 php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
660 RETURN_FALSE;
661 }
662 /* }}} */
663
664 value = nl_langinfo(item);
665 if (value == NULL) {
666 RETURN_FALSE;
667 } else {
668 RETURN_STRING(value);
669 }
670 }
671 #endif
672 /* }}} */
673
674 /* {{{ Compares two strings using the current locale */
PHP_FUNCTION(strcoll)675 PHP_FUNCTION(strcoll)
676 {
677 zend_string *s1, *s2;
678
679 ZEND_PARSE_PARAMETERS_START(2, 2)
680 Z_PARAM_STR(s1)
681 Z_PARAM_STR(s2)
682 ZEND_PARSE_PARAMETERS_END();
683
684 RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
685 (const char *) ZSTR_VAL(s2)));
686 }
687 /* }}} */
688
689 /* {{{ php_charmask
690 * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
691 * it needs to be incrementing.
692 * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
693 */
php_charmask(const unsigned char * input,size_t len,char * mask)694 static inline int php_charmask(const unsigned char *input, size_t len, char *mask)
695 {
696 const unsigned char *end;
697 unsigned char c;
698 int result = SUCCESS;
699
700 memset(mask, 0, 256);
701 for (end = input+len; input < end; input++) {
702 c=*input;
703 if ((input+3 < end) && input[1] == '.' && input[2] == '.'
704 && input[3] >= c) {
705 memset(mask+c, 1, input[3] - c + 1);
706 input+=3;
707 } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
708 /* Error, try to be as helpful as possible:
709 (a range ending/starting with '.' won't be captured here) */
710 if (end-len >= input) { /* there was no 'left' char */
711 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
712 result = FAILURE;
713 continue;
714 }
715 if (input+2 >= end) { /* there is no 'right' char */
716 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
717 result = FAILURE;
718 continue;
719 }
720 if (input[-1] > input[2]) { /* wrong order */
721 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
722 result = FAILURE;
723 continue;
724 }
725 /* FIXME: better error (a..b..c is the only left possibility?) */
726 php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
727 result = FAILURE;
728 continue;
729 } else {
730 mask[c]=1;
731 }
732 }
733 return result;
734 }
735 /* }}} */
736
737 /* {{{ php_trim_int()
738 * mode 1 : trim left
739 * mode 2 : trim right
740 * mode 3 : trim left and right
741 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
742 */
php_trim_int(zend_string * str,const char * what,size_t what_len,int mode)743 static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
744 {
745 const char *start = ZSTR_VAL(str);
746 const char *end = start + ZSTR_LEN(str);
747 char mask[256];
748
749 if (what) {
750 if (what_len == 1) {
751 char p = *what;
752 if (mode & 1) {
753 while (start != end) {
754 if (*start == p) {
755 start++;
756 } else {
757 break;
758 }
759 }
760 }
761 if (mode & 2) {
762 while (start != end) {
763 if (*(end-1) == p) {
764 end--;
765 } else {
766 break;
767 }
768 }
769 }
770 } else {
771 php_charmask((const unsigned char *) what, what_len, mask);
772
773 if (mode & 1) {
774 while (start != end) {
775 if (mask[(unsigned char)*start]) {
776 start++;
777 } else {
778 break;
779 }
780 }
781 }
782 if (mode & 2) {
783 while (start != end) {
784 if (mask[(unsigned char)*(end-1)]) {
785 end--;
786 } else {
787 break;
788 }
789 }
790 }
791 }
792 } else {
793 if (mode & 1) {
794 while (start != end) {
795 unsigned char c = (unsigned char)*start;
796
797 if (c <= ' ' &&
798 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
799 start++;
800 } else {
801 break;
802 }
803 }
804 }
805 if (mode & 2) {
806 while (start != end) {
807 unsigned char c = (unsigned char)*(end-1);
808
809 if (c <= ' ' &&
810 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
811 end--;
812 } else {
813 break;
814 }
815 }
816 }
817 }
818
819 if (ZSTR_LEN(str) == end - start) {
820 return zend_string_copy(str);
821 } else if (end - start == 0) {
822 return ZSTR_EMPTY_ALLOC();
823 } else {
824 return zend_string_init(start, end - start, 0);
825 }
826 }
827 /* }}} */
828
829 /* {{{ php_trim_int()
830 * mode 1 : trim left
831 * mode 2 : trim right
832 * mode 3 : trim left and right
833 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
834 */
php_trim(zend_string * str,const char * what,size_t what_len,int mode)835 PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
836 {
837 return php_trim_int(str, what, what_len, mode);
838 }
839 /* }}} */
840
841 /* {{{ php_do_trim
842 * Base for trim(), rtrim() and ltrim() functions.
843 */
php_do_trim(INTERNAL_FUNCTION_PARAMETERS,int mode)844 static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
845 {
846 zend_string *str;
847 zend_string *what = NULL;
848
849 ZEND_PARSE_PARAMETERS_START(1, 2)
850 Z_PARAM_STR(str)
851 Z_PARAM_OPTIONAL
852 Z_PARAM_STR(what)
853 ZEND_PARSE_PARAMETERS_END();
854
855 ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
856 }
857 /* }}} */
858
859 /* {{{ Strips whitespace from the beginning and end of a string */
PHP_FUNCTION(trim)860 PHP_FUNCTION(trim)
861 {
862 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
863 }
864 /* }}} */
865
866 /* {{{ Removes trailing whitespace */
PHP_FUNCTION(rtrim)867 PHP_FUNCTION(rtrim)
868 {
869 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
870 }
871 /* }}} */
872
873 /* {{{ Strips whitespace from the beginning of a string */
PHP_FUNCTION(ltrim)874 PHP_FUNCTION(ltrim)
875 {
876 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
877 }
878 /* }}} */
879
880 /* {{{ Wraps buffer to selected number of characters using string break char */
PHP_FUNCTION(wordwrap)881 PHP_FUNCTION(wordwrap)
882 {
883 zend_string *text;
884 char *breakchar = "\n";
885 size_t newtextlen, chk, breakchar_len = 1;
886 size_t alloced;
887 zend_long current = 0, laststart = 0, lastspace = 0;
888 zend_long linelength = 75;
889 bool docut = 0;
890 zend_string *newtext;
891
892 ZEND_PARSE_PARAMETERS_START(1, 4)
893 Z_PARAM_STR(text)
894 Z_PARAM_OPTIONAL
895 Z_PARAM_LONG(linelength)
896 Z_PARAM_STRING(breakchar, breakchar_len)
897 Z_PARAM_BOOL(docut)
898 ZEND_PARSE_PARAMETERS_END();
899
900 if (ZSTR_LEN(text) == 0) {
901 RETURN_EMPTY_STRING();
902 }
903
904 if (breakchar_len == 0) {
905 zend_argument_value_error(3, "cannot be empty");
906 RETURN_THROWS();
907 }
908
909 if (linelength == 0 && docut) {
910 zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
911 RETURN_THROWS();
912 }
913
914 /* Special case for a single-character break as it needs no
915 additional storage space */
916 if (breakchar_len == 1 && !docut) {
917 newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
918
919 laststart = lastspace = 0;
920 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
921 if (ZSTR_VAL(text)[current] == breakchar[0]) {
922 laststart = lastspace = current + 1;
923 } else if (ZSTR_VAL(text)[current] == ' ') {
924 if (current - laststart >= linelength) {
925 ZSTR_VAL(newtext)[current] = breakchar[0];
926 laststart = current + 1;
927 }
928 lastspace = current;
929 } else if (current - laststart >= linelength && laststart != lastspace) {
930 ZSTR_VAL(newtext)[lastspace] = breakchar[0];
931 laststart = lastspace + 1;
932 }
933 }
934
935 RETURN_NEW_STR(newtext);
936 } else {
937 /* Multiple character line break or forced cut */
938 if (linelength > 0) {
939 chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
940 newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
941 alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
942 } else {
943 chk = ZSTR_LEN(text);
944 alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
945 newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
946 }
947
948 /* now keep track of the actual new text length */
949 newtextlen = 0;
950
951 laststart = lastspace = 0;
952 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
953 if (chk == 0) {
954 alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
955 newtext = zend_string_extend(newtext, alloced, 0);
956 chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
957 }
958 /* when we hit an existing break, copy to new buffer, and
959 * fix up laststart and lastspace */
960 if (ZSTR_VAL(text)[current] == breakchar[0]
961 && current + breakchar_len < ZSTR_LEN(text)
962 && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
963 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
964 newtextlen += current - laststart + breakchar_len;
965 current += breakchar_len - 1;
966 laststart = lastspace = current + 1;
967 chk--;
968 }
969 /* if it is a space, check if it is at the line boundary,
970 * copy and insert a break, or just keep track of it */
971 else if (ZSTR_VAL(text)[current] == ' ') {
972 if (current - laststart >= linelength) {
973 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
974 newtextlen += current - laststart;
975 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
976 newtextlen += breakchar_len;
977 laststart = current + 1;
978 chk--;
979 }
980 lastspace = current;
981 }
982 /* if we are cutting, and we've accumulated enough
983 * characters, and we haven't see a space for this line,
984 * copy and insert a break. */
985 else if (current - laststart >= linelength
986 && docut && laststart >= lastspace) {
987 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
988 newtextlen += current - laststart;
989 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
990 newtextlen += breakchar_len;
991 laststart = lastspace = current;
992 chk--;
993 }
994 /* if the current word puts us over the linelength, copy
995 * back up until the last space, insert a break, and move
996 * up the laststart */
997 else if (current - laststart >= linelength
998 && laststart < lastspace) {
999 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
1000 newtextlen += lastspace - laststart;
1001 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
1002 newtextlen += breakchar_len;
1003 laststart = lastspace = lastspace + 1;
1004 chk--;
1005 }
1006 }
1007
1008 /* copy over any stragglers */
1009 if (laststart != current) {
1010 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
1011 newtextlen += current - laststart;
1012 }
1013
1014 ZSTR_VAL(newtext)[newtextlen] = '\0';
1015 /* free unused memory */
1016 newtext = zend_string_truncate(newtext, newtextlen, 0);
1017
1018 RETURN_NEW_STR(newtext);
1019 }
1020 }
1021 /* }}} */
1022
1023 /* {{{ php_explode */
php_explode(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)1024 PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
1025 {
1026 const char *p1 = ZSTR_VAL(str);
1027 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
1028 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1029 zval tmp;
1030
1031 if (p2 == NULL) {
1032 ZVAL_STR_COPY(&tmp, str);
1033 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1034 } else {
1035 zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
1036 ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
1037 do {
1038 ZEND_HASH_FILL_GROW();
1039 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
1040 ZEND_HASH_FILL_NEXT();
1041 p1 = p2 + ZSTR_LEN(delim);
1042 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1043 } while (p2 != NULL && --limit > 1);
1044
1045 if (p1 <= endp) {
1046 ZEND_HASH_FILL_GROW();
1047 ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
1048 ZEND_HASH_FILL_NEXT();
1049 }
1050 } ZEND_HASH_FILL_END();
1051 }
1052 }
1053 /* }}} */
1054
1055 /* {{{ php_explode_negative_limit */
php_explode_negative_limit(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)1056 PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
1057 {
1058 #define EXPLODE_ALLOC_STEP 64
1059 const char *p1 = ZSTR_VAL(str);
1060 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
1061 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1062 zval tmp;
1063
1064 if (p2 == NULL) {
1065 /*
1066 do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
1067 by doing nothing we return empty array
1068 */
1069 } else {
1070 size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
1071 zend_long i, to_return;
1072 const char **positions = emalloc(allocated * sizeof(char *));
1073
1074 positions[found++] = p1;
1075 do {
1076 if (found >= allocated) {
1077 allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
1078 positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
1079 }
1080 positions[found++] = p1 = p2 + ZSTR_LEN(delim);
1081 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1082 } while (p2 != NULL);
1083
1084 to_return = limit + found;
1085 /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
1086 for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
1087 ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
1088 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1089 }
1090 efree((void *)positions);
1091 }
1092 #undef EXPLODE_ALLOC_STEP
1093 }
1094 /* }}} */
1095
1096 /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
PHP_FUNCTION(explode)1097 PHP_FUNCTION(explode)
1098 {
1099 zend_string *str, *delim;
1100 zend_long limit = ZEND_LONG_MAX; /* No limit */
1101 zval tmp;
1102
1103 ZEND_PARSE_PARAMETERS_START(2, 3)
1104 Z_PARAM_STR(delim)
1105 Z_PARAM_STR(str)
1106 Z_PARAM_OPTIONAL
1107 Z_PARAM_LONG(limit)
1108 ZEND_PARSE_PARAMETERS_END();
1109
1110 if (ZSTR_LEN(delim) == 0) {
1111 zend_argument_value_error(1, "cannot be empty");
1112 RETURN_THROWS();
1113 }
1114
1115 array_init(return_value);
1116
1117 if (ZSTR_LEN(str) == 0) {
1118 if (limit >= 0) {
1119 ZVAL_EMPTY_STRING(&tmp);
1120 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
1121 }
1122 return;
1123 }
1124
1125 if (limit > 1) {
1126 php_explode(delim, str, return_value, limit);
1127 } else if (limit < 0) {
1128 php_explode_negative_limit(delim, str, return_value, limit);
1129 } else {
1130 ZVAL_STR_COPY(&tmp, str);
1131 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
1132 }
1133 }
1134 /* }}} */
1135
1136 /* {{{ An alias for implode */
1137 /* }}} */
1138
1139 /* {{{ php_implode */
php_implode(const zend_string * glue,HashTable * pieces,zval * return_value)1140 PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
1141 {
1142 zval *tmp;
1143 int numelems;
1144 zend_string *str;
1145 char *cptr;
1146 size_t len = 0;
1147 struct {
1148 zend_string *str;
1149 zend_long lval;
1150 } *strings, *ptr;
1151 ALLOCA_FLAG(use_heap)
1152
1153 numelems = zend_hash_num_elements(pieces);
1154
1155 if (numelems == 0) {
1156 RETURN_EMPTY_STRING();
1157 } else if (numelems == 1) {
1158 /* loop to search the first not undefined element... */
1159 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
1160 RETURN_STR(zval_get_string(tmp));
1161 } ZEND_HASH_FOREACH_END();
1162 }
1163
1164 ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
1165
1166 ZEND_HASH_FOREACH_VAL(pieces, tmp) {
1167 if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
1168 ptr->str = Z_STR_P(tmp);
1169 len += ZSTR_LEN(ptr->str);
1170 ptr->lval = 0;
1171 ptr++;
1172 } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
1173 zend_long val = Z_LVAL_P(tmp);
1174
1175 ptr->str = NULL;
1176 ptr->lval = val;
1177 ptr++;
1178 if (val <= 0) {
1179 len++;
1180 }
1181 while (val) {
1182 val /= 10;
1183 len++;
1184 }
1185 } else {
1186 ptr->str = zval_get_string_func(tmp);
1187 len += ZSTR_LEN(ptr->str);
1188 ptr->lval = 1;
1189 ptr++;
1190 }
1191 } ZEND_HASH_FOREACH_END();
1192
1193 /* numelems can not be 0, we checked above */
1194 str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
1195 cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
1196 *cptr = 0;
1197
1198 while (1) {
1199 ptr--;
1200 if (EXPECTED(ptr->str)) {
1201 cptr -= ZSTR_LEN(ptr->str);
1202 memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
1203 if (ptr->lval) {
1204 zend_string_release_ex(ptr->str, 0);
1205 }
1206 } else {
1207 char *oldPtr = cptr;
1208 char oldVal = *cptr;
1209 cptr = zend_print_long_to_buf(cptr, ptr->lval);
1210 *oldPtr = oldVal;
1211 }
1212
1213 if (ptr == strings) {
1214 break;
1215 }
1216
1217 cptr -= ZSTR_LEN(glue);
1218 memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
1219 }
1220
1221 free_alloca(strings, use_heap);
1222 RETURN_NEW_STR(str);
1223 }
1224 /* }}} */
1225
1226 /* {{{ Joins array elements placing glue string between items and return one string */
PHP_FUNCTION(implode)1227 PHP_FUNCTION(implode)
1228 {
1229 zend_string *arg1_str = NULL;
1230 HashTable *arg1_array = NULL;
1231 zend_array *pieces = NULL;
1232
1233 ZEND_PARSE_PARAMETERS_START(1, 2)
1234 Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
1235 Z_PARAM_OPTIONAL
1236 Z_PARAM_ARRAY_HT_OR_NULL(pieces)
1237 ZEND_PARSE_PARAMETERS_END();
1238
1239 if (pieces == NULL) {
1240 if (arg1_array == NULL) {
1241 zend_type_error("%s(): Argument #1 ($array) must be of type array, string given", get_active_function_name());
1242 RETURN_THROWS();
1243 }
1244
1245 arg1_str = ZSTR_EMPTY_ALLOC();
1246 pieces = arg1_array;
1247 } else {
1248 if (arg1_str == NULL) {
1249 zend_argument_type_error(1, "must be of type string, array given");
1250 RETURN_THROWS();
1251 }
1252 }
1253
1254 php_implode(arg1_str, pieces, return_value);
1255 }
1256 /* }}} */
1257
1258 #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
1259
1260 /* {{{ Tokenize a string */
PHP_FUNCTION(strtok)1261 PHP_FUNCTION(strtok)
1262 {
1263 zend_string *str, *tok = NULL;
1264 char *token;
1265 char *token_end;
1266 char *p;
1267 char *pe;
1268 size_t skipped = 0;
1269
1270 ZEND_PARSE_PARAMETERS_START(1, 2)
1271 Z_PARAM_STR(str)
1272 Z_PARAM_OPTIONAL
1273 Z_PARAM_STR_OR_NULL(tok)
1274 ZEND_PARSE_PARAMETERS_END();
1275
1276 if (!tok) {
1277 tok = str;
1278 } else {
1279 if (BG(strtok_string)) {
1280 zend_string_release(BG(strtok_string));
1281 }
1282 BG(strtok_string) = zend_string_copy(str);
1283 BG(strtok_last) = ZSTR_VAL(str);
1284 BG(strtok_len) = ZSTR_LEN(str);
1285 }
1286
1287 if (!BG(strtok_string)) {
1288 /* String to tokenize not set. */
1289 // TODO: Should this warn?
1290 RETURN_FALSE;
1291 }
1292
1293 p = BG(strtok_last); /* Where we start to search */
1294 pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
1295 if (p >= pe) {
1296 /* Reached the end of the string. */
1297 RETURN_FALSE;
1298 }
1299
1300 token = ZSTR_VAL(tok);
1301 token_end = token + ZSTR_LEN(tok);
1302
1303 while (token < token_end) {
1304 STRTOK_TABLE(token++) = 1;
1305 }
1306
1307 /* Skip leading delimiters */
1308 while (STRTOK_TABLE(p)) {
1309 if (++p >= pe) {
1310 /* no other chars left */
1311 goto return_false;
1312 }
1313 skipped++;
1314 }
1315
1316 /* We know at this place that *p is no delimiter, so skip it */
1317 while (++p < pe) {
1318 if (STRTOK_TABLE(p)) {
1319 goto return_token;
1320 }
1321 }
1322
1323 if (p - BG(strtok_last)) {
1324 return_token:
1325 RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
1326 BG(strtok_last) = p + 1;
1327 } else {
1328 return_false:
1329 RETVAL_FALSE;
1330 zend_string_release(BG(strtok_string));
1331 BG(strtok_string) = NULL;
1332 }
1333
1334 /* Restore table -- usually faster then memset'ing the table on every invocation */
1335 token = ZSTR_VAL(tok);
1336 while (token < token_end) {
1337 STRTOK_TABLE(token++) = 0;
1338 }
1339 }
1340 /* }}} */
1341
1342 /* {{{ php_strtoupper */
php_strtoupper(char * s,size_t len)1343 PHPAPI char *php_strtoupper(char *s, size_t len)
1344 {
1345 unsigned char *c;
1346 const unsigned char *e;
1347
1348 c = (unsigned char *)s;
1349 e = (unsigned char *)c+len;
1350
1351 while (c < e) {
1352 *c = toupper(*c);
1353 c++;
1354 }
1355 return s;
1356 }
1357 /* }}} */
1358
1359 /* {{{ php_string_toupper */
php_string_toupper(zend_string * s)1360 PHPAPI zend_string *php_string_toupper(zend_string *s)
1361 {
1362 unsigned char *c;
1363 const unsigned char *e;
1364
1365 c = (unsigned char *)ZSTR_VAL(s);
1366 e = c + ZSTR_LEN(s);
1367
1368 while (c < e) {
1369 if (islower(*c)) {
1370 unsigned char *r;
1371 zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
1372
1373 if (c != (unsigned char*)ZSTR_VAL(s)) {
1374 memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
1375 }
1376 r = (unsigned char*) ZSTR_VAL(res) + (c - (unsigned char*) ZSTR_VAL(s));
1377 while (c < e) {
1378 *r = toupper(*c);
1379 r++;
1380 c++;
1381 }
1382 *r = '\0';
1383 return res;
1384 }
1385 c++;
1386 }
1387 return zend_string_copy(s);
1388 }
1389 /* }}} */
1390
1391 /* {{{ Makes a string uppercase */
PHP_FUNCTION(strtoupper)1392 PHP_FUNCTION(strtoupper)
1393 {
1394 zend_string *arg;
1395
1396 ZEND_PARSE_PARAMETERS_START(1, 1)
1397 Z_PARAM_STR(arg)
1398 ZEND_PARSE_PARAMETERS_END();
1399
1400 RETURN_STR(php_string_toupper(arg));
1401 }
1402 /* }}} */
1403
1404 /* {{{ php_strtolower */
php_strtolower(char * s,size_t len)1405 PHPAPI char *php_strtolower(char *s, size_t len)
1406 {
1407 unsigned char *c;
1408 const unsigned char *e;
1409
1410 c = (unsigned char *)s;
1411 e = c+len;
1412
1413 while (c < e) {
1414 *c = tolower(*c);
1415 c++;
1416 }
1417 return s;
1418 }
1419 /* }}} */
1420
1421 /* {{{ php_string_tolower */
php_string_tolower(zend_string * s)1422 PHPAPI zend_string *php_string_tolower(zend_string *s)
1423 {
1424 unsigned char *c;
1425 const unsigned char *e;
1426
1427 if (EXPECTED(!BG(ctype_string))) {
1428 return zend_string_tolower(s);
1429 } else {
1430 c = (unsigned char *)ZSTR_VAL(s);
1431 e = c + ZSTR_LEN(s);
1432
1433 while (c < e) {
1434 if (isupper(*c)) {
1435 unsigned char *r;
1436 zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
1437
1438 if (c != (unsigned char*)ZSTR_VAL(s)) {
1439 memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
1440 }
1441 r = (unsigned char*) ZSTR_VAL(res) + (c - (unsigned char*) ZSTR_VAL(s));
1442 while (c < e) {
1443 *r = tolower(*c);
1444 r++;
1445 c++;
1446 }
1447 *r = '\0';
1448 return res;
1449 }
1450 c++;
1451 }
1452 return zend_string_copy(s);
1453 }
1454 }
1455 /* }}} */
1456
1457 /* {{{ Makes a string lowercase */
PHP_FUNCTION(strtolower)1458 PHP_FUNCTION(strtolower)
1459 {
1460 zend_string *str;
1461
1462 ZEND_PARSE_PARAMETERS_START(1, 1)
1463 Z_PARAM_STR(str)
1464 ZEND_PARSE_PARAMETERS_END();
1465
1466 RETURN_STR(php_string_tolower(str));
1467 }
1468 /* }}} */
1469
1470 #if defined(PHP_WIN32)
_is_basename_start(const char * start,const char * pos)1471 static bool _is_basename_start(const char *start, const char *pos)
1472 {
1473 if (pos - start >= 1
1474 && *(pos-1) != '/'
1475 && *(pos-1) != '\\') {
1476 if (pos - start == 1) {
1477 return 1;
1478 } else if (*(pos-2) == '/' || *(pos-2) == '\\') {
1479 return 1;
1480 } else if (*(pos-2) == ':'
1481 && _is_basename_start(start, pos - 2)) {
1482 return 1;
1483 }
1484 }
1485 return 0;
1486 }
1487 #endif
1488
1489 /* {{{ php_basename */
php_basename(const char * s,size_t len,const char * suffix,size_t suffix_len)1490 PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
1491 {
1492 const char *basename_start;
1493 const char *basename_end;
1494
1495 if (CG(ascii_compatible_locale)) {
1496 basename_end = s + len - 1;
1497
1498 /* Strip trailing slashes */
1499 while (basename_end >= s
1500 #if defined(PHP_WIN32)
1501 && (*basename_end == '/'
1502 || *basename_end == '\\'
1503 || (*basename_end == ':'
1504 && _is_basename_start(s, basename_end)))) {
1505 #else
1506 && *basename_end == '/') {
1507 #endif
1508 basename_end--;
1509 }
1510 if (basename_end < s) {
1511 return ZSTR_EMPTY_ALLOC();
1512 }
1513
1514 /* Extract filename */
1515 basename_start = basename_end;
1516 basename_end++;
1517 while (basename_start > s
1518 #if defined(PHP_WIN32)
1519 && *(basename_start-1) != '/'
1520 && *(basename_start-1) != '\\') {
1521
1522 if (*(basename_start-1) == ':' &&
1523 _is_basename_start(s, basename_start - 1)) {
1524 break;
1525 }
1526 #else
1527 && *(basename_start-1) != '/') {
1528 #endif
1529 basename_start--;
1530 }
1531 } else {
1532 /* State 0 is directly after a directory separator (or at the start of the string).
1533 * State 1 is everything else. */
1534 int state = 0;
1535
1536 basename_start = s;
1537 basename_end = s;
1538 while (len > 0) {
1539 int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
1540
1541 switch (inc_len) {
1542 case 0:
1543 goto quit_loop;
1544 case 1:
1545 #if defined(PHP_WIN32)
1546 if (*s == '/' || *s == '\\') {
1547 #else
1548 if (*s == '/') {
1549 #endif
1550 if (state == 1) {
1551 state = 0;
1552 basename_end = s;
1553 }
1554 #if defined(PHP_WIN32)
1555 /* Catch relative paths in c:file.txt style. They're not to confuse
1556 with the NTFS streams. This part ensures also, that no drive
1557 letter traversing happens. */
1558 } else if ((*s == ':' && (s - basename_start == 1))) {
1559 if (state == 0) {
1560 basename_start = s;
1561 state = 1;
1562 } else {
1563 basename_end = s;
1564 state = 0;
1565 }
1566 #endif
1567 } else {
1568 if (state == 0) {
1569 basename_start = s;
1570 state = 1;
1571 }
1572 }
1573 break;
1574 default:
1575 if (inc_len < 0) {
1576 /* If character is invalid, treat it like other non-significant characters. */
1577 inc_len = 1;
1578 php_mb_reset();
1579 }
1580 if (state == 0) {
1581 basename_start = s;
1582 state = 1;
1583 }
1584 break;
1585 }
1586 s += inc_len;
1587 len -= inc_len;
1588 }
1589
1590 quit_loop:
1591 if (state == 1) {
1592 basename_end = s;
1593 }
1594 }
1595
1596 if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
1597 memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
1598 basename_end -= suffix_len;
1599 }
1600
1601 return zend_string_init(basename_start, basename_end - basename_start, 0);
1602 }
1603 /* }}} */
1604
1605 /* {{{ Returns the filename component of the path */
1606 PHP_FUNCTION(basename)
1607 {
1608 char *string, *suffix = NULL;
1609 size_t string_len, suffix_len = 0;
1610
1611 ZEND_PARSE_PARAMETERS_START(1, 2)
1612 Z_PARAM_STRING(string, string_len)
1613 Z_PARAM_OPTIONAL
1614 Z_PARAM_STRING(suffix, suffix_len)
1615 ZEND_PARSE_PARAMETERS_END();
1616
1617 RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
1618 }
1619 /* }}} */
1620
1621 /* {{{ php_dirname
1622 Returns directory name component of path */
1623 PHPAPI size_t php_dirname(char *path, size_t len)
1624 {
1625 return zend_dirname(path, len);
1626 }
1627 /* }}} */
1628
1629 /* {{{ Returns the directory name component of the path */
1630 PHP_FUNCTION(dirname)
1631 {
1632 char *str;
1633 size_t str_len;
1634 zend_string *ret;
1635 zend_long levels = 1;
1636
1637 ZEND_PARSE_PARAMETERS_START(1, 2)
1638 Z_PARAM_STRING(str, str_len)
1639 Z_PARAM_OPTIONAL
1640 Z_PARAM_LONG(levels)
1641 ZEND_PARSE_PARAMETERS_END();
1642
1643 ret = zend_string_init(str, str_len, 0);
1644
1645 if (levels == 1) {
1646 /* Default case */
1647 #ifdef PHP_WIN32
1648 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len);
1649 #else
1650 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len);
1651 #endif
1652 } else if (levels < 1) {
1653 zend_argument_value_error(2, "must be greater than or equal to 1");
1654 zend_string_efree(ret);
1655 RETURN_THROWS();
1656 } else {
1657 /* Some levels up */
1658 do {
1659 #ifdef PHP_WIN32
1660 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1661 #else
1662 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1663 #endif
1664 } while (ZSTR_LEN(ret) < str_len && --levels);
1665 }
1666
1667 RETURN_NEW_STR(ret);
1668 }
1669 /* }}} */
1670
1671 /* {{{ Returns information about a certain string */
1672 PHP_FUNCTION(pathinfo)
1673 {
1674 zval tmp;
1675 char *path, *dirname;
1676 size_t path_len;
1677 int have_basename;
1678 zend_long opt = PHP_PATHINFO_ALL;
1679 zend_string *ret = NULL;
1680
1681 ZEND_PARSE_PARAMETERS_START(1, 2)
1682 Z_PARAM_STRING(path, path_len)
1683 Z_PARAM_OPTIONAL
1684 Z_PARAM_LONG(opt)
1685 ZEND_PARSE_PARAMETERS_END();
1686
1687 have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
1688
1689 array_init(&tmp);
1690
1691 if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
1692 dirname = estrndup(path, path_len);
1693 php_dirname(dirname, path_len);
1694 if (*dirname) {
1695 add_assoc_string(&tmp, "dirname", dirname);
1696 }
1697 efree(dirname);
1698 }
1699
1700 if (have_basename) {
1701 ret = php_basename(path, path_len, NULL, 0);
1702 add_assoc_str(&tmp, "basename", zend_string_copy(ret));
1703 }
1704
1705 if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
1706 const char *p;
1707 ptrdiff_t idx;
1708
1709 if (!have_basename) {
1710 ret = php_basename(path, path_len, NULL, 0);
1711 }
1712
1713 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1714
1715 if (p) {
1716 idx = p - ZSTR_VAL(ret);
1717 add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
1718 }
1719 }
1720
1721 if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
1722 const char *p;
1723 ptrdiff_t idx;
1724
1725 /* Have we already looked up the basename? */
1726 if (!have_basename && !ret) {
1727 ret = php_basename(path, path_len, NULL, 0);
1728 }
1729
1730 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1731
1732 idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
1733 add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
1734 }
1735
1736 if (ret) {
1737 zend_string_release_ex(ret, 0);
1738 }
1739
1740 if (opt == PHP_PATHINFO_ALL) {
1741 RETURN_COPY_VALUE(&tmp);
1742 } else {
1743 zval *element;
1744 if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
1745 RETVAL_COPY_DEREF(element);
1746 } else {
1747 RETVAL_EMPTY_STRING();
1748 }
1749 zval_ptr_dtor(&tmp);
1750 }
1751 }
1752 /* }}} */
1753
1754 /* {{{ php_stristr
1755 case insensitive strstr */
1756 PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
1757 {
1758 php_strtolower(s, s_len);
1759 php_strtolower(t, t_len);
1760 return (char*)php_memnstr(s, t, t_len, s + s_len);
1761 }
1762 /* }}} */
1763
1764 /* {{{ php_strspn */
1765 PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1766 {
1767 const char *p = s1, *spanp;
1768 char c = *p;
1769
1770 cont:
1771 for (spanp = s2; p != s1_end && spanp != s2_end;) {
1772 if (*spanp++ == c) {
1773 c = *(++p);
1774 goto cont;
1775 }
1776 }
1777 return (p - s1);
1778 }
1779 /* }}} */
1780
1781 /* {{{ php_strcspn */
1782 PHPAPI size_t php_strcspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1783 {
1784 const char *p, *spanp;
1785 char c = *s1;
1786
1787 for (p = s1;;) {
1788 spanp = s2;
1789 do {
1790 if (*spanp == c || p == s1_end) {
1791 return p - s1;
1792 }
1793 } while (spanp++ < (s2_end - 1));
1794 c = *++p;
1795 }
1796 /* NOTREACHED */
1797 }
1798 /* }}} */
1799
1800 /* {{{ Finds first occurrence of a string within another, case insensitive */
1801 PHP_FUNCTION(stristr)
1802 {
1803 zend_string *haystack, *needle;
1804 const char *found = NULL;
1805 size_t found_offset;
1806 char *haystack_dup;
1807 char *orig_needle;
1808 bool part = 0;
1809
1810 ZEND_PARSE_PARAMETERS_START(2, 3)
1811 Z_PARAM_STR(haystack)
1812 Z_PARAM_STR(needle)
1813 Z_PARAM_OPTIONAL
1814 Z_PARAM_BOOL(part)
1815 ZEND_PARSE_PARAMETERS_END();
1816
1817 haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack));
1818 orig_needle = estrndup(ZSTR_VAL(needle), ZSTR_LEN(needle));
1819 found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), ZSTR_LEN(needle));
1820 efree(orig_needle);
1821
1822 if (found) {
1823 found_offset = found - haystack_dup;
1824 if (part) {
1825 RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
1826 } else {
1827 RETVAL_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
1828 }
1829 } else {
1830 RETVAL_FALSE;
1831 }
1832
1833 efree(haystack_dup);
1834 }
1835 /* }}} */
1836
1837 /* {{{ Finds first occurrence of a string within another */
1838 PHP_FUNCTION(strstr)
1839 {
1840 zend_string *haystack, *needle;
1841 const char *found = NULL;
1842 zend_long found_offset;
1843 bool part = 0;
1844
1845 ZEND_PARSE_PARAMETERS_START(2, 3)
1846 Z_PARAM_STR(haystack)
1847 Z_PARAM_STR(needle)
1848 Z_PARAM_OPTIONAL
1849 Z_PARAM_BOOL(part)
1850 ZEND_PARSE_PARAMETERS_END();
1851
1852 found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1853
1854 if (found) {
1855 found_offset = found - ZSTR_VAL(haystack);
1856 if (part) {
1857 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1858 } else {
1859 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1860 }
1861 }
1862 RETURN_FALSE;
1863 }
1864 /* }}} */
1865
1866 /* {{{ Checks if a string contains another */
1867 PHP_FUNCTION(str_contains)
1868 {
1869 zend_string *haystack, *needle;
1870
1871 ZEND_PARSE_PARAMETERS_START(2, 2)
1872 Z_PARAM_STR(haystack)
1873 Z_PARAM_STR(needle)
1874 ZEND_PARSE_PARAMETERS_END();
1875
1876 RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1877 }
1878 /* }}} */
1879
1880 /* {{{ Checks if haystack starts with needle */
1881 PHP_FUNCTION(str_starts_with)
1882 {
1883 zend_string *haystack, *needle;
1884
1885 ZEND_PARSE_PARAMETERS_START(2, 2)
1886 Z_PARAM_STR(haystack)
1887 Z_PARAM_STR(needle)
1888 ZEND_PARSE_PARAMETERS_END();
1889
1890 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1891 RETURN_FALSE;
1892 }
1893
1894 RETURN_BOOL(memcmp(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1895 }
1896 /* }}} */
1897
1898 /* {{{ Checks if haystack ends with needle */
1899 PHP_FUNCTION(str_ends_with)
1900 {
1901 zend_string *haystack, *needle;
1902
1903 ZEND_PARSE_PARAMETERS_START(2, 2)
1904 Z_PARAM_STR(haystack)
1905 Z_PARAM_STR(needle)
1906 ZEND_PARSE_PARAMETERS_END();
1907
1908 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1909 RETURN_FALSE;
1910 }
1911
1912 RETURN_BOOL(memcmp(
1913 ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
1914 ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1915 }
1916 /* }}} */
1917
1918 /* {{{ An alias for strstr */
1919 /* }}} */
1920
1921 /* {{{ Finds position of first occurrence of a string within another */
1922 PHP_FUNCTION(strpos)
1923 {
1924 zend_string *haystack, *needle;
1925 const char *found = NULL;
1926 zend_long offset = 0;
1927
1928 ZEND_PARSE_PARAMETERS_START(2, 3)
1929 Z_PARAM_STR(haystack)
1930 Z_PARAM_STR(needle)
1931 Z_PARAM_OPTIONAL
1932 Z_PARAM_LONG(offset)
1933 ZEND_PARSE_PARAMETERS_END();
1934
1935 if (offset < 0) {
1936 offset += (zend_long)ZSTR_LEN(haystack);
1937 }
1938 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1939 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1940 RETURN_THROWS();
1941 }
1942
1943 found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
1944 ZSTR_VAL(needle), ZSTR_LEN(needle),
1945 ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1946
1947 if (found) {
1948 RETURN_LONG(found - ZSTR_VAL(haystack));
1949 } else {
1950 RETURN_FALSE;
1951 }
1952 }
1953 /* }}} */
1954
1955 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
1956 PHP_FUNCTION(stripos)
1957 {
1958 const char *found = NULL;
1959 zend_string *haystack, *needle;
1960 zend_long offset = 0;
1961 zend_string *needle_dup = NULL, *haystack_dup;
1962
1963 ZEND_PARSE_PARAMETERS_START(2, 3)
1964 Z_PARAM_STR(haystack)
1965 Z_PARAM_STR(needle)
1966 Z_PARAM_OPTIONAL
1967 Z_PARAM_LONG(offset)
1968 ZEND_PARSE_PARAMETERS_END();
1969
1970 if (offset < 0) {
1971 offset += (zend_long)ZSTR_LEN(haystack);
1972 }
1973 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1974 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1975 RETURN_THROWS();
1976 }
1977
1978 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1979 RETURN_FALSE;
1980 }
1981
1982 haystack_dup = php_string_tolower(haystack);
1983 needle_dup = php_string_tolower(needle);
1984 found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
1985 ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
1986
1987 if (found) {
1988 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
1989 } else {
1990 RETVAL_FALSE;
1991 }
1992
1993 zend_string_release_ex(haystack_dup, 0);
1994 zend_string_release_ex(needle_dup, 0);
1995 }
1996 /* }}} */
1997
1998 /* {{{ Finds position of last occurrence of a string within another string */
1999 PHP_FUNCTION(strrpos)
2000 {
2001 zend_string *needle;
2002 zend_string *haystack;
2003 zend_long offset = 0;
2004 const char *p, *e, *found;
2005
2006 ZEND_PARSE_PARAMETERS_START(2, 3)
2007 Z_PARAM_STR(haystack)
2008 Z_PARAM_STR(needle)
2009 Z_PARAM_OPTIONAL
2010 Z_PARAM_LONG(offset)
2011 ZEND_PARSE_PARAMETERS_END();
2012
2013 if (offset >= 0) {
2014 if ((size_t)offset > ZSTR_LEN(haystack)) {
2015 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2016 RETURN_THROWS();
2017 }
2018 p = ZSTR_VAL(haystack) + (size_t)offset;
2019 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2020 } else {
2021 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2022 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2023 RETURN_THROWS();
2024 }
2025
2026 p = ZSTR_VAL(haystack);
2027 if ((size_t)-offset < ZSTR_LEN(needle)) {
2028 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2029 } else {
2030 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
2031 }
2032 }
2033
2034 if ((found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e))) {
2035 RETURN_LONG(found - ZSTR_VAL(haystack));
2036 }
2037
2038 RETURN_FALSE;
2039 }
2040 /* }}} */
2041
2042 /* {{{ Finds position of last occurrence of a string within another string */
2043 PHP_FUNCTION(strripos)
2044 {
2045 zend_string *needle;
2046 zend_string *haystack;
2047 zend_long offset = 0;
2048 const char *p, *e, *found;
2049 zend_string *needle_dup, *haystack_dup;
2050
2051 ZEND_PARSE_PARAMETERS_START(2, 3)
2052 Z_PARAM_STR(haystack)
2053 Z_PARAM_STR(needle)
2054 Z_PARAM_OPTIONAL
2055 Z_PARAM_LONG(offset)
2056 ZEND_PARSE_PARAMETERS_END();
2057
2058 if (ZSTR_LEN(needle) == 1) {
2059 /* Single character search can shortcut memcmps
2060 Can also avoid tolower emallocs */
2061 char lowered;
2062 if (offset >= 0) {
2063 if ((size_t)offset > ZSTR_LEN(haystack)) {
2064 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2065 RETURN_THROWS();
2066 }
2067 p = ZSTR_VAL(haystack) + (size_t)offset;
2068 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
2069 } else {
2070 p = ZSTR_VAL(haystack);
2071 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2072 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2073 RETURN_THROWS();
2074 }
2075 e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
2076 }
2077 /* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */
2078 lowered = tolower(*ZSTR_VAL(needle));
2079 while (e >= p) {
2080 if (tolower(*e) == lowered) {
2081 RETURN_LONG(e - p + (offset > 0 ? offset : 0));
2082 }
2083 e--;
2084 }
2085 RETURN_FALSE;
2086 }
2087
2088 haystack_dup = php_string_tolower(haystack);
2089 if (offset >= 0) {
2090 if ((size_t)offset > ZSTR_LEN(haystack)) {
2091 zend_string_release_ex(haystack_dup, 0);
2092 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2093 RETURN_THROWS();
2094 }
2095 p = ZSTR_VAL(haystack_dup) + offset;
2096 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2097 } else {
2098 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2099 zend_string_release_ex(haystack_dup, 0);
2100 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2101 RETURN_THROWS();
2102 }
2103
2104 p = ZSTR_VAL(haystack_dup);
2105 if ((size_t)-offset < ZSTR_LEN(needle)) {
2106 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2107 } else {
2108 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
2109 }
2110 }
2111
2112 needle_dup = php_string_tolower(needle);
2113 if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
2114 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
2115 zend_string_release_ex(needle_dup, 0);
2116 zend_string_release_ex(haystack_dup, 0);
2117 } else {
2118 zend_string_release_ex(needle_dup, 0);
2119 zend_string_release_ex(haystack_dup, 0);
2120 RETURN_FALSE;
2121 }
2122 }
2123 /* }}} */
2124
2125 /* {{{ Finds the last occurrence of a character in a string within another */
2126 PHP_FUNCTION(strrchr)
2127 {
2128 zend_string *haystack, *needle;
2129 const char *found = NULL;
2130 zend_long found_offset;
2131
2132 ZEND_PARSE_PARAMETERS_START(2, 2)
2133 Z_PARAM_STR(haystack)
2134 Z_PARAM_STR(needle)
2135 ZEND_PARSE_PARAMETERS_END();
2136
2137 found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
2138 if (found) {
2139 found_offset = found - ZSTR_VAL(haystack);
2140 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
2141 } else {
2142 RETURN_FALSE;
2143 }
2144 }
2145 /* }}} */
2146
2147 /* {{{ php_chunk_split */
2148 static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
2149 {
2150 char *q;
2151 const char *p;
2152 size_t chunks;
2153 size_t restlen;
2154 zend_string *dest;
2155
2156 chunks = srclen / chunklen;
2157 restlen = srclen - chunks * chunklen; /* srclen % chunklen */
2158 if (restlen) {
2159 /* We want chunks to be rounded up rather than rounded down.
2160 * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
2161 chunks++;
2162 }
2163
2164 dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
2165
2166 for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
2167 memcpy(q, p, chunklen);
2168 q += chunklen;
2169 memcpy(q, end, endlen);
2170 q += endlen;
2171 p += chunklen;
2172 }
2173
2174 if (restlen) {
2175 memcpy(q, p, restlen);
2176 q += restlen;
2177 memcpy(q, end, endlen);
2178 q += endlen;
2179 }
2180
2181 *q = '\0';
2182 ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
2183
2184 return dest;
2185 }
2186 /* }}} */
2187
2188 /* {{{ Returns split line */
2189 PHP_FUNCTION(chunk_split)
2190 {
2191 zend_string *str;
2192 char *end = "\r\n";
2193 size_t endlen = 2;
2194 zend_long chunklen = 76;
2195 zend_string *result;
2196
2197 ZEND_PARSE_PARAMETERS_START(1, 3)
2198 Z_PARAM_STR(str)
2199 Z_PARAM_OPTIONAL
2200 Z_PARAM_LONG(chunklen)
2201 Z_PARAM_STRING(end, endlen)
2202 ZEND_PARSE_PARAMETERS_END();
2203
2204 if (chunklen <= 0) {
2205 zend_argument_value_error(2, "must be greater than 0");
2206 RETURN_THROWS();
2207 }
2208
2209 if ((size_t)chunklen > ZSTR_LEN(str)) {
2210 /* to maintain BC, we must return original string + ending */
2211 result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
2212 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
2213 memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
2214 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2215 RETURN_NEW_STR(result);
2216 }
2217
2218 if (!ZSTR_LEN(str)) {
2219 RETURN_EMPTY_STRING();
2220 }
2221
2222 result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
2223
2224 RETURN_STR(result);
2225 }
2226 /* }}} */
2227
2228 /* {{{ Returns part of a string */
2229 PHP_FUNCTION(substr)
2230 {
2231 zend_string *str;
2232 zend_long l = 0, f;
2233 bool len_is_null = 1;
2234
2235 ZEND_PARSE_PARAMETERS_START(2, 3)
2236 Z_PARAM_STR(str)
2237 Z_PARAM_LONG(f)
2238 Z_PARAM_OPTIONAL
2239 Z_PARAM_LONG_OR_NULL(l, len_is_null)
2240 ZEND_PARSE_PARAMETERS_END();
2241
2242 if (f < 0) {
2243 /* if "from" position is negative, count start position from the end
2244 * of the string
2245 */
2246 if (-(size_t)f > ZSTR_LEN(str)) {
2247 f = 0;
2248 } else {
2249 f = (zend_long)ZSTR_LEN(str) + f;
2250 }
2251 } else if ((size_t)f > ZSTR_LEN(str)) {
2252 RETURN_EMPTY_STRING();
2253 }
2254
2255 if (!len_is_null) {
2256 if (l < 0) {
2257 /* if "length" position is negative, set it to the length
2258 * needed to stop that many chars from the end of the string
2259 */
2260 if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
2261 l = 0;
2262 } else {
2263 l = (zend_long)ZSTR_LEN(str) - f + l;
2264 }
2265 } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
2266 l = (zend_long)ZSTR_LEN(str) - f;
2267 }
2268 } else {
2269 l = (zend_long)ZSTR_LEN(str) - f;
2270 }
2271
2272 if (l == ZSTR_LEN(str)) {
2273 RETURN_STR_COPY(str);
2274 } else {
2275 RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
2276 }
2277 }
2278 /* }}} */
2279
2280 /* {{{ Replaces part of a string with another string */
2281 PHP_FUNCTION(substr_replace)
2282 {
2283 zend_string *str, *repl_str;
2284 HashTable *str_ht, *repl_ht;
2285 HashTable *from_ht;
2286 zend_long from_long;
2287 HashTable *len_ht = NULL;
2288 zend_long len_long;
2289 bool len_is_null = 1;
2290 zend_long l = 0;
2291 zend_long f;
2292 zend_string *result;
2293 HashPosition from_idx, repl_idx, len_idx;
2294 zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
2295
2296 ZEND_PARSE_PARAMETERS_START(3, 4)
2297 Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
2298 Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
2299 Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
2300 Z_PARAM_OPTIONAL
2301 Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
2302 ZEND_PARSE_PARAMETERS_END();
2303
2304 if (len_is_null) {
2305 if (str) {
2306 l = ZSTR_LEN(str);
2307 }
2308 } else if (!len_ht) {
2309 l = len_long;
2310 }
2311
2312 if (str) {
2313 if (from_ht) {
2314 zend_argument_type_error(3, "cannot be an array when working on a single string");
2315 RETURN_THROWS();
2316 }
2317 if (len_ht) {
2318 zend_argument_type_error(4, "cannot be an array when working on a single string");
2319 RETURN_THROWS();
2320 }
2321
2322 f = from_long;
2323
2324 /* if "from" position is negative, count start position from the end
2325 * of the string
2326 */
2327 if (f < 0) {
2328 f = (zend_long)ZSTR_LEN(str) + f;
2329 if (f < 0) {
2330 f = 0;
2331 }
2332 } else if ((size_t)f > ZSTR_LEN(str)) {
2333 f = ZSTR_LEN(str);
2334 }
2335 /* if "length" position is negative, set it to the length
2336 * needed to stop that many chars from the end of the string
2337 */
2338 if (l < 0) {
2339 l = ((zend_long)ZSTR_LEN(str) - f) + l;
2340 if (l < 0) {
2341 l = 0;
2342 }
2343 }
2344
2345 if ((size_t)l > ZSTR_LEN(str) || (l < 0 && (size_t)(-l) > ZSTR_LEN(str))) {
2346 l = ZSTR_LEN(str);
2347 }
2348
2349 if ((f + l) > (zend_long)ZSTR_LEN(str)) {
2350 l = ZSTR_LEN(str) - f;
2351 }
2352
2353 zend_string *tmp_repl_str = NULL;
2354 if (repl_ht) {
2355 repl_idx = 0;
2356 while (repl_idx < repl_ht->nNumUsed) {
2357 tmp_repl = &repl_ht->arData[repl_idx].val;
2358 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2359 break;
2360 }
2361 repl_idx++;
2362 }
2363 if (repl_idx < repl_ht->nNumUsed) {
2364 repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2365 } else {
2366 repl_str = STR_EMPTY_ALLOC();
2367 }
2368 }
2369
2370 result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
2371
2372 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
2373 if (ZSTR_LEN(repl_str)) {
2374 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2375 }
2376 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
2377 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2378 zend_tmp_string_release(tmp_repl_str);
2379 RETURN_NEW_STR(result);
2380 } else { /* str is array of strings */
2381 zend_string *str_index = NULL;
2382 size_t result_len;
2383 zend_ulong num_index;
2384
2385 /* TODO
2386 if (!len_is_null && from_ht) {
2387 if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
2388 php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
2389 RETURN_STR_COPY(str);
2390 }
2391 }
2392 */
2393
2394 array_init(return_value);
2395
2396 from_idx = len_idx = repl_idx = 0;
2397
2398 ZEND_HASH_FOREACH_KEY_VAL(str_ht, num_index, str_index, tmp_str) {
2399 zend_string *tmp_orig_str;
2400 zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
2401
2402 if (from_ht) {
2403 while (from_idx < from_ht->nNumUsed) {
2404 tmp_from = &from_ht->arData[from_idx].val;
2405 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2406 break;
2407 }
2408 from_idx++;
2409 }
2410 if (from_idx < from_ht->nNumUsed) {
2411 f = zval_get_long(tmp_from);
2412
2413 if (f < 0) {
2414 f = (zend_long)ZSTR_LEN(orig_str) + f;
2415 if (f < 0) {
2416 f = 0;
2417 }
2418 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2419 f = ZSTR_LEN(orig_str);
2420 }
2421 from_idx++;
2422 } else {
2423 f = 0;
2424 }
2425 } else {
2426 f = from_long;
2427 if (f < 0) {
2428 f = (zend_long)ZSTR_LEN(orig_str) + f;
2429 if (f < 0) {
2430 f = 0;
2431 }
2432 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2433 f = ZSTR_LEN(orig_str);
2434 }
2435 }
2436
2437 if (len_ht) {
2438 while (len_idx < len_ht->nNumUsed) {
2439 tmp_len = &len_ht->arData[len_idx].val;
2440 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2441 break;
2442 }
2443 len_idx++;
2444 }
2445 if (len_idx < len_ht->nNumUsed) {
2446 l = zval_get_long(tmp_len);
2447 len_idx++;
2448 } else {
2449 l = ZSTR_LEN(orig_str);
2450 }
2451 } else if (!len_is_null) {
2452 l = len_long;
2453 } else {
2454 l = ZSTR_LEN(orig_str);
2455 }
2456
2457 if (l < 0) {
2458 l = (ZSTR_LEN(orig_str) - f) + l;
2459 if (l < 0) {
2460 l = 0;
2461 }
2462 }
2463
2464 ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
2465 ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
2466 if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
2467 l = ZSTR_LEN(orig_str) - f;
2468 }
2469
2470 result_len = ZSTR_LEN(orig_str) - l;
2471
2472 if (repl_ht) {
2473 while (repl_idx < repl_ht->nNumUsed) {
2474 tmp_repl = &repl_ht->arData[repl_idx].val;
2475 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2476 break;
2477 }
2478 repl_idx++;
2479 }
2480 if (repl_idx < repl_ht->nNumUsed) {
2481 zend_string *tmp_repl_str;
2482 zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2483
2484 result_len += ZSTR_LEN(repl_str);
2485 repl_idx++;
2486 result = zend_string_safe_alloc(1, result_len, 0, 0);
2487
2488 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2489 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2490 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2491 zend_tmp_string_release(tmp_repl_str);
2492 } else {
2493 result = zend_string_safe_alloc(1, result_len, 0, 0);
2494
2495 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2496 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2497 }
2498 } else {
2499 result_len += ZSTR_LEN(repl_str);
2500
2501 result = zend_string_safe_alloc(1, result_len, 0, 0);
2502
2503 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2504 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2505 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2506 }
2507
2508 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2509
2510 if (str_index) {
2511 zval tmp;
2512
2513 ZVAL_NEW_STR(&tmp, result);
2514 zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
2515 } else {
2516 add_index_str(return_value, num_index, result);
2517 }
2518
2519 zend_tmp_string_release(tmp_orig_str);
2520 } ZEND_HASH_FOREACH_END();
2521 } /* if */
2522 }
2523 /* }}} */
2524
2525 /* {{{ Quotes meta characters */
2526 PHP_FUNCTION(quotemeta)
2527 {
2528 zend_string *old;
2529 const char *old_end, *p;
2530 char *q;
2531 char c;
2532 zend_string *str;
2533
2534 ZEND_PARSE_PARAMETERS_START(1, 1)
2535 Z_PARAM_STR(old)
2536 ZEND_PARSE_PARAMETERS_END();
2537
2538 old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
2539
2540 if (ZSTR_LEN(old) == 0) {
2541 RETURN_EMPTY_STRING();
2542 }
2543
2544 str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
2545
2546 for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
2547 c = *p;
2548 switch (c) {
2549 case '.':
2550 case '\\':
2551 case '+':
2552 case '*':
2553 case '?':
2554 case '[':
2555 case '^':
2556 case ']':
2557 case '$':
2558 case '(':
2559 case ')':
2560 *q++ = '\\';
2561 ZEND_FALLTHROUGH;
2562 default:
2563 *q++ = c;
2564 }
2565 }
2566
2567 *q = '\0';
2568
2569 RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
2570 }
2571 /* }}} */
2572
2573 /* {{{ Returns ASCII value of character
2574 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
2575 PHP_FUNCTION(ord)
2576 {
2577 zend_string *str;
2578
2579 ZEND_PARSE_PARAMETERS_START(1, 1)
2580 Z_PARAM_STR(str)
2581 ZEND_PARSE_PARAMETERS_END();
2582
2583 RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
2584 }
2585 /* }}} */
2586
2587 /* {{{ Converts ASCII code to a character
2588 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
2589 PHP_FUNCTION(chr)
2590 {
2591 zend_long c;
2592
2593 ZEND_PARSE_PARAMETERS_START(1, 1)
2594 Z_PARAM_LONG(c)
2595 ZEND_PARSE_PARAMETERS_END();
2596
2597 c &= 0xff;
2598 RETURN_CHAR(c);
2599 }
2600 /* }}} */
2601
2602 /* {{{ php_ucfirst
2603 Uppercase the first character of the word in a native string */
2604 static zend_string* php_ucfirst(zend_string *str)
2605 {
2606 const unsigned char ch = ZSTR_VAL(str)[0];
2607 unsigned char r = toupper(ch);
2608 if (r == ch) {
2609 return zend_string_copy(str);
2610 } else {
2611 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2612 ZSTR_VAL(s)[0] = r;
2613 return s;
2614 }
2615 }
2616 /* }}} */
2617
2618 /* {{{ Makes a string's first character uppercase */
2619 PHP_FUNCTION(ucfirst)
2620 {
2621 zend_string *str;
2622
2623 ZEND_PARSE_PARAMETERS_START(1, 1)
2624 Z_PARAM_STR(str)
2625 ZEND_PARSE_PARAMETERS_END();
2626
2627 if (!ZSTR_LEN(str)) {
2628 RETURN_EMPTY_STRING();
2629 }
2630
2631 RETURN_STR(php_ucfirst(str));
2632 }
2633 /* }}} */
2634
2635 /* {{{
2636 Lowercase the first character of the word in a native string */
2637 static zend_string* php_lcfirst(zend_string *str)
2638 {
2639 unsigned char r = tolower(ZSTR_VAL(str)[0]);
2640 if (r == ZSTR_VAL(str)[0]) {
2641 return zend_string_copy(str);
2642 } else {
2643 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2644 ZSTR_VAL(s)[0] = r;
2645 return s;
2646 }
2647 }
2648 /* }}} */
2649
2650 /* {{{ Make a string's first character lowercase */
2651 PHP_FUNCTION(lcfirst)
2652 {
2653 zend_string *str;
2654
2655 ZEND_PARSE_PARAMETERS_START(1, 1)
2656 Z_PARAM_STR(str)
2657 ZEND_PARSE_PARAMETERS_END();
2658
2659 if (!ZSTR_LEN(str)) {
2660 RETURN_EMPTY_STRING();
2661 }
2662
2663 RETURN_STR(php_lcfirst(str));
2664 }
2665 /* }}} */
2666
2667 /* {{{ Uppercase the first character of every word in a string */
2668 PHP_FUNCTION(ucwords)
2669 {
2670 zend_string *str;
2671 char *delims = " \t\r\n\f\v";
2672 char *r;
2673 const char *r_end;
2674 size_t delims_len = 6;
2675 char mask[256];
2676
2677 ZEND_PARSE_PARAMETERS_START(1, 2)
2678 Z_PARAM_STR(str)
2679 Z_PARAM_OPTIONAL
2680 Z_PARAM_STRING(delims, delims_len)
2681 ZEND_PARSE_PARAMETERS_END();
2682
2683 if (!ZSTR_LEN(str)) {
2684 RETURN_EMPTY_STRING();
2685 }
2686
2687 php_charmask((const unsigned char *) delims, delims_len, mask);
2688
2689 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
2690 r = Z_STRVAL_P(return_value);
2691
2692 *r = toupper((unsigned char) *r);
2693 for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
2694 if (mask[(unsigned char)*r++]) {
2695 *r = toupper((unsigned char) *r);
2696 }
2697 }
2698 }
2699 /* }}} */
2700
2701 /* {{{ php_strtr */
2702 PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
2703 {
2704 size_t i;
2705
2706 if (UNEXPECTED(trlen < 1)) {
2707 return str;
2708 } else if (trlen == 1) {
2709 char ch_from = *str_from;
2710 char ch_to = *str_to;
2711
2712 for (i = 0; i < len; i++) {
2713 if (str[i] == ch_from) {
2714 str[i] = ch_to;
2715 }
2716 }
2717 } else {
2718 unsigned char xlat[256];
2719
2720 memset(xlat, 0, sizeof(xlat));
2721
2722 for (i = 0; i < trlen; i++) {
2723 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2724 }
2725
2726 for (i = 0; i < len; i++) {
2727 str[i] += xlat[(size_t)(unsigned char) str[i]];
2728 }
2729 }
2730
2731 return str;
2732 }
2733 /* }}} */
2734
2735 /* {{{ php_strtr_ex */
2736 static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
2737 {
2738 zend_string *new_str = NULL;
2739 size_t i;
2740
2741 if (UNEXPECTED(trlen < 1)) {
2742 return zend_string_copy(str);
2743 } else if (trlen == 1) {
2744 char ch_from = *str_from;
2745 char ch_to = *str_to;
2746
2747 for (i = 0; i < ZSTR_LEN(str); i++) {
2748 if (ZSTR_VAL(str)[i] == ch_from) {
2749 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2750 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2751 ZSTR_VAL(new_str)[i] = ch_to;
2752 i++;
2753 for (; i < ZSTR_LEN(str); i++) {
2754 ZSTR_VAL(new_str)[i] = (ZSTR_VAL(str)[i] != ch_from) ? ZSTR_VAL(str)[i] : ch_to;
2755 }
2756 ZSTR_VAL(new_str)[i] = 0;
2757 return new_str;
2758 }
2759 }
2760 } else {
2761 unsigned char xlat[256];
2762
2763 memset(xlat, 0, sizeof(xlat));;
2764
2765 for (i = 0; i < trlen; i++) {
2766 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
2767 }
2768
2769 for (i = 0; i < ZSTR_LEN(str); i++) {
2770 if (xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
2771 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2772 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2773 do {
2774 ZSTR_VAL(new_str)[i] = ZSTR_VAL(str)[i] + xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
2775 i++;
2776 } while (i < ZSTR_LEN(str));
2777 ZSTR_VAL(new_str)[i] = 0;
2778 return new_str;
2779 }
2780 }
2781 }
2782
2783 return zend_string_copy(str);
2784 }
2785 /* }}} */
2786
2787 /* {{{ php_strtr_array */
2788 static void php_strtr_array(zval *return_value, zend_string *input, HashTable *pats)
2789 {
2790 const char *str = ZSTR_VAL(input);
2791 size_t slen = ZSTR_LEN(input);
2792 zend_ulong num_key;
2793 zend_string *str_key;
2794 size_t len, pos, old_pos;
2795 int num_keys = 0;
2796 size_t minlen = 128*1024;
2797 size_t maxlen = 0;
2798 HashTable str_hash;
2799 zval *entry;
2800 const char *key;
2801 smart_str result = {0};
2802 zend_ulong bitset[256/sizeof(zend_ulong)];
2803 zend_ulong *num_bitset;
2804
2805 /* we will collect all possible key lengths */
2806 num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
2807 memset(bitset, 0, sizeof(bitset));
2808
2809 /* check if original array has numeric keys */
2810 ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
2811 if (UNEXPECTED(!str_key)) {
2812 num_keys = 1;
2813 } else {
2814 len = ZSTR_LEN(str_key);
2815 if (UNEXPECTED(len < 1)) {
2816 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
2817 continue;
2818 } else if (UNEXPECTED(len > slen)) {
2819 /* skip long patterns */
2820 continue;
2821 }
2822 if (len > maxlen) {
2823 maxlen = len;
2824 }
2825 if (len < minlen) {
2826 minlen = len;
2827 }
2828 /* remember possible key length */
2829 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2830 bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
2831 }
2832 } ZEND_HASH_FOREACH_END();
2833
2834 if (UNEXPECTED(num_keys)) {
2835 zend_string *key_used;
2836 /* we have to rebuild HashTable with numeric keys */
2837 zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
2838 ZEND_HASH_FOREACH_KEY_VAL(pats, num_key, str_key, entry) {
2839 if (UNEXPECTED(!str_key)) {
2840 key_used = zend_long_to_str(num_key);
2841 len = ZSTR_LEN(key_used);
2842 if (UNEXPECTED(len > slen)) {
2843 /* skip long patterns */
2844 zend_string_release(key_used);
2845 continue;
2846 }
2847 if (len > maxlen) {
2848 maxlen = len;
2849 }
2850 if (len < minlen) {
2851 minlen = len;
2852 }
2853 /* remember possible key length */
2854 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2855 bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
2856 } else {
2857 key_used = str_key;
2858 len = ZSTR_LEN(key_used);
2859 if (UNEXPECTED(len > slen)) {
2860 /* skip long patterns */
2861 continue;
2862 }
2863 }
2864 zend_hash_add(&str_hash, key_used, entry);
2865 if (UNEXPECTED(!str_key)) {
2866 zend_string_release_ex(key_used, 0);
2867 }
2868 } ZEND_HASH_FOREACH_END();
2869 pats = &str_hash;
2870 }
2871
2872 if (UNEXPECTED(minlen > maxlen)) {
2873 /* return the original string */
2874 if (pats == &str_hash) {
2875 zend_hash_destroy(&str_hash);
2876 }
2877 efree(num_bitset);
2878 RETURN_STR_COPY(input);
2879 }
2880
2881 old_pos = pos = 0;
2882 while (pos <= slen - minlen) {
2883 key = str + pos;
2884 if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
2885 len = maxlen;
2886 if (len > slen - pos) {
2887 len = slen - pos;
2888 }
2889 while (len >= minlen) {
2890 if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
2891 entry = zend_hash_str_find(pats, key, len);
2892 if (entry != NULL) {
2893 zend_string *tmp;
2894 zend_string *s = zval_get_tmp_string(entry, &tmp);
2895 smart_str_appendl(&result, str + old_pos, pos - old_pos);
2896 smart_str_append(&result, s);
2897 old_pos = pos + len;
2898 pos = old_pos - 1;
2899 zend_tmp_string_release(tmp);
2900 break;
2901 }
2902 }
2903 len--;
2904 }
2905 }
2906 pos++;
2907 }
2908
2909 if (result.s) {
2910 smart_str_appendl(&result, str + old_pos, slen - old_pos);
2911 smart_str_0(&result);
2912 RETVAL_NEW_STR(result.s);
2913 } else {
2914 smart_str_free(&result);
2915 RETVAL_STR_COPY(input);
2916 }
2917
2918 if (pats == &str_hash) {
2919 zend_hash_destroy(&str_hash);
2920 }
2921 efree(num_bitset);
2922 }
2923 /* }}} */
2924
2925 /* {{{ php_char_to_str_ex */
2926 static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, int case_sensitivity, zend_long *replace_count)
2927 {
2928 zend_string *result;
2929 size_t char_count = 0;
2930 int lc_from = 0;
2931 const char *source, *source_end= ZSTR_VAL(str) + ZSTR_LEN(str);
2932 char *target;
2933
2934 if (case_sensitivity) {
2935 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str);
2936 while ((p = memchr(p, from, (e - p)))) {
2937 char_count++;
2938 p++;
2939 }
2940 } else {
2941 lc_from = tolower(from);
2942 for (source = ZSTR_VAL(str); source < source_end; source++) {
2943 if (tolower(*source) == lc_from) {
2944 char_count++;
2945 }
2946 }
2947 }
2948
2949 if (char_count == 0) {
2950 return zend_string_copy(str);
2951 }
2952
2953 if (to_len > 0) {
2954 result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
2955 } else {
2956 result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
2957 }
2958 target = ZSTR_VAL(result);
2959
2960 if (case_sensitivity) {
2961 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
2962 while ((p = memchr(p, from, (e - p)))) {
2963 memcpy(target, s, (p - s));
2964 target += p - s;
2965 memcpy(target, to, to_len);
2966 target += to_len;
2967 p++;
2968 s = p;
2969 if (replace_count) {
2970 *replace_count += 1;
2971 }
2972 }
2973 if (s < e) {
2974 memcpy(target, s, (e - s));
2975 target += e - s;
2976 }
2977 } else {
2978 for (source = ZSTR_VAL(str); source < source_end; source++) {
2979 if (tolower(*source) == lc_from) {
2980 if (replace_count) {
2981 *replace_count += 1;
2982 }
2983 memcpy(target, to, to_len);
2984 target += to_len;
2985 } else {
2986 *target = *source;
2987 target++;
2988 }
2989 }
2990 }
2991 *target = 0;
2992 return result;
2993 }
2994 /* }}} */
2995
2996 /* {{{ php_str_to_str_ex */
2997 static zend_string *php_str_to_str_ex(zend_string *haystack,
2998 const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
2999 {
3000
3001 if (needle_len < ZSTR_LEN(haystack)) {
3002 zend_string *new_str;
3003 const char *end;
3004 const char *p, *r;
3005 char *e;
3006
3007 if (needle_len == str_len) {
3008 new_str = NULL;
3009 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3010 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3011 if (!new_str) {
3012 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3013 }
3014 memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
3015 (*replace_count)++;
3016 }
3017 if (!new_str) {
3018 goto nothing_todo;
3019 }
3020 return new_str;
3021 } else {
3022 size_t count = 0;
3023 const char *o = ZSTR_VAL(haystack);
3024 const char *n = needle;
3025 const char *endp = o + ZSTR_LEN(haystack);
3026
3027 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3028 o += needle_len;
3029 count++;
3030 }
3031 if (count == 0) {
3032 /* Needle doesn't occur, shortcircuit the actual replacement. */
3033 goto nothing_todo;
3034 }
3035 if (str_len > needle_len) {
3036 new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
3037 } else {
3038 new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
3039 }
3040
3041 e = ZSTR_VAL(new_str);
3042 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
3043 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3044 memcpy(e, p, r - p);
3045 e += r - p;
3046 memcpy(e, str, str_len);
3047 e += str_len;
3048 (*replace_count)++;
3049 }
3050
3051 if (p < end) {
3052 memcpy(e, p, end - p);
3053 e += end - p;
3054 }
3055
3056 *e = '\0';
3057 return new_str;
3058 }
3059 } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
3060 nothing_todo:
3061 return zend_string_copy(haystack);
3062 } else {
3063 (*replace_count)++;
3064 return zend_string_init_fast(str, str_len);
3065 }
3066 }
3067 /* }}} */
3068
3069 /* {{{ php_str_to_str_i_ex */
3070 static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
3071 zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
3072 {
3073 zend_string *new_str = NULL;
3074 zend_string *lc_needle;
3075
3076 if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
3077 const char *end;
3078 const char *p, *r;
3079 char *e;
3080
3081 if (ZSTR_LEN(needle) == str_len) {
3082 lc_needle = php_string_tolower(needle);
3083 end = lc_haystack + ZSTR_LEN(haystack);
3084 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3085 if (!new_str) {
3086 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3087 }
3088 memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
3089 (*replace_count)++;
3090 }
3091 zend_string_release_ex(lc_needle, 0);
3092
3093 if (!new_str) {
3094 goto nothing_todo;
3095 }
3096 return new_str;
3097 } else {
3098 size_t count = 0;
3099 const char *o = lc_haystack;
3100 const char *n;
3101 const char *endp = o + ZSTR_LEN(haystack);
3102
3103 lc_needle = php_string_tolower(needle);
3104 n = ZSTR_VAL(lc_needle);
3105
3106 while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
3107 o += ZSTR_LEN(lc_needle);
3108 count++;
3109 }
3110 if (count == 0) {
3111 /* Needle doesn't occur, shortcircuit the actual replacement. */
3112 zend_string_release_ex(lc_needle, 0);
3113 goto nothing_todo;
3114 }
3115
3116 if (str_len > ZSTR_LEN(lc_needle)) {
3117 new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
3118 } else {
3119 new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
3120 }
3121
3122 e = ZSTR_VAL(new_str);
3123 end = lc_haystack + ZSTR_LEN(haystack);
3124
3125 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3126 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
3127 e += r - p;
3128 memcpy(e, str, str_len);
3129 e += str_len;
3130 (*replace_count)++;
3131 }
3132
3133 if (p < end) {
3134 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
3135 e += end - p;
3136 }
3137 *e = '\0';
3138
3139 zend_string_release_ex(lc_needle, 0);
3140
3141 return new_str;
3142 }
3143 } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
3144 nothing_todo:
3145 return zend_string_copy(haystack);
3146 } else {
3147 lc_needle = php_string_tolower(needle);
3148
3149 if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
3150 zend_string_release_ex(lc_needle, 0);
3151 goto nothing_todo;
3152 }
3153 zend_string_release_ex(lc_needle, 0);
3154
3155 new_str = zend_string_init(str, str_len, 0);
3156
3157 (*replace_count)++;
3158 return new_str;
3159 }
3160 }
3161 /* }}} */
3162
3163 /* {{{ php_str_to_str */
3164 PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
3165 {
3166 zend_string *new_str;
3167
3168 if (needle_len < length) {
3169 const char *end;
3170 const char *s, *p;
3171 char *e, *r;
3172
3173 if (needle_len == str_len) {
3174 new_str = zend_string_init(haystack, length, 0);
3175 end = ZSTR_VAL(new_str) + length;
3176 for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3177 memcpy(r, str, str_len);
3178 }
3179 return new_str;
3180 } else {
3181 if (str_len < needle_len) {
3182 new_str = zend_string_alloc(length, 0);
3183 } else {
3184 size_t count = 0;
3185 const char *o = haystack;
3186 const char *n = needle;
3187 const char *endp = o + length;
3188
3189 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3190 o += needle_len;
3191 count++;
3192 }
3193 if (count == 0) {
3194 /* Needle doesn't occur, shortcircuit the actual replacement. */
3195 new_str = zend_string_init(haystack, length, 0);
3196 return new_str;
3197 } else {
3198 if (str_len > needle_len) {
3199 new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
3200 } else {
3201 new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
3202 }
3203 }
3204 }
3205
3206 s = e = ZSTR_VAL(new_str);
3207 end = haystack + length;
3208 for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3209 memcpy(e, p, r - p);
3210 e += r - p;
3211 memcpy(e, str, str_len);
3212 e += str_len;
3213 }
3214
3215 if (p < end) {
3216 memcpy(e, p, end - p);
3217 e += end - p;
3218 }
3219
3220 *e = '\0';
3221 new_str = zend_string_truncate(new_str, e - s, 0);
3222 return new_str;
3223 }
3224 } else if (needle_len > length || memcmp(haystack, needle, length)) {
3225 new_str = zend_string_init(haystack, length, 0);
3226 return new_str;
3227 } else {
3228 new_str = zend_string_init(str, str_len, 0);
3229
3230 return new_str;
3231 }
3232 }
3233 /* }}} */
3234
3235 /* {{{ Translates characters in str using given translation tables */
3236 PHP_FUNCTION(strtr)
3237 {
3238 zend_string *str, *from_str = NULL;
3239 HashTable *from_ht = NULL;
3240 char *to = NULL;
3241 size_t to_len = 0;
3242
3243 ZEND_PARSE_PARAMETERS_START(2, 3)
3244 Z_PARAM_STR(str)
3245 Z_PARAM_ARRAY_HT_OR_STR(from_ht, from_str)
3246 Z_PARAM_OPTIONAL
3247 Z_PARAM_STRING_OR_NULL(to, to_len)
3248 ZEND_PARSE_PARAMETERS_END();
3249
3250 if (!to && from_ht == NULL) {
3251 zend_argument_type_error(2, "must be of type array, string given");
3252 RETURN_THROWS();
3253 } else if (to && from_str == NULL) {
3254 zend_argument_type_error(2, "must be of type string, array given");
3255 RETURN_THROWS();
3256 }
3257
3258 /* shortcut for empty string */
3259 if (ZSTR_LEN(str) == 0) {
3260 RETURN_EMPTY_STRING();
3261 }
3262
3263 if (!to) {
3264 if (zend_hash_num_elements(from_ht) < 1) {
3265 RETURN_STR_COPY(str);
3266 } else if (zend_hash_num_elements(from_ht) == 1) {
3267 zend_long num_key;
3268 zend_string *str_key, *tmp_str, *replace, *tmp_replace;
3269 zval *entry;
3270
3271 ZEND_HASH_FOREACH_KEY_VAL(from_ht, num_key, str_key, entry) {
3272 tmp_str = NULL;
3273 if (UNEXPECTED(!str_key)) {
3274 str_key = tmp_str = zend_long_to_str(num_key);
3275 }
3276 replace = zval_get_tmp_string(entry, &tmp_replace);
3277 if (ZSTR_LEN(str_key) < 1) {
3278 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
3279 RETVAL_STR_COPY(str);
3280 } else if (ZSTR_LEN(str_key) == 1) {
3281 RETVAL_STR(php_char_to_str_ex(str,
3282 ZSTR_VAL(str_key)[0],
3283 ZSTR_VAL(replace),
3284 ZSTR_LEN(replace),
3285 1,
3286 NULL));
3287 } else {
3288 zend_long dummy;
3289 RETVAL_STR(php_str_to_str_ex(str,
3290 ZSTR_VAL(str_key), ZSTR_LEN(str_key),
3291 ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
3292 }
3293 zend_tmp_string_release(tmp_str);
3294 zend_tmp_string_release(tmp_replace);
3295 return;
3296 } ZEND_HASH_FOREACH_END();
3297 } else {
3298 php_strtr_array(return_value, str, from_ht);
3299 }
3300 } else {
3301 RETURN_STR(php_strtr_ex(str,
3302 ZSTR_VAL(from_str),
3303 to,
3304 MIN(ZSTR_LEN(from_str), to_len)));
3305 }
3306 }
3307 /* }}} */
3308
3309 /* {{{ Reverse a string */
3310 #if ZEND_INTRIN_SSSE3_NATIVE
3311 #include <tmmintrin.h>
3312 #elif defined(__aarch64__)
3313 #include <arm_neon.h>
3314 #endif
3315 PHP_FUNCTION(strrev)
3316 {
3317 zend_string *str;
3318 const char *s, *e;
3319 char *p;
3320 zend_string *n;
3321
3322 ZEND_PARSE_PARAMETERS_START(1, 1)
3323 Z_PARAM_STR(str)
3324 ZEND_PARSE_PARAMETERS_END();
3325
3326 n = zend_string_alloc(ZSTR_LEN(str), 0);
3327 p = ZSTR_VAL(n);
3328
3329 s = ZSTR_VAL(str);
3330 e = s + ZSTR_LEN(str);
3331 --e;
3332 #if ZEND_INTRIN_SSSE3_NATIVE
3333 if (e - s > 15) {
3334 const __m128i map = _mm_set_epi8(
3335 0, 1, 2, 3,
3336 4, 5, 6, 7,
3337 8, 9, 10, 11,
3338 12, 13, 14, 15);
3339 do {
3340 const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
3341 _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
3342 p += 16;
3343 e -= 16;
3344 } while (e - s > 15);
3345 }
3346 #elif defined(__aarch64__)
3347 if (e - s > 15) {
3348 do {
3349 const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
3350 /* Synthesize rev128 with a rev64 + ext. */
3351 const uint8x16_t rev = vrev64q_u8(str);
3352 const uint8x16_t ext = (uint8x16_t)
3353 vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
3354 vst1q_u8((uint8_t *)p, ext);
3355 p += 16;
3356 e -= 16;
3357 } while (e - s > 15);
3358 }
3359 #endif
3360 while (e >= s) {
3361 *p++ = *e--;
3362 }
3363
3364 *p = '\0';
3365
3366 RETVAL_NEW_STR(n);
3367 }
3368 /* }}} */
3369
3370 /* {{{ php_similar_str */
3371 static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
3372 {
3373 const char *p, *q;
3374 const char *end1 = (char *) txt1 + len1;
3375 const char *end2 = (char *) txt2 + len2;
3376 size_t l;
3377
3378 *max = 0;
3379 *count = 0;
3380 for (p = (char *) txt1; p < end1; p++) {
3381 for (q = (char *) txt2; q < end2; q++) {
3382 for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
3383 if (l > *max) {
3384 *max = l;
3385 *count += 1;
3386 *pos1 = p - txt1;
3387 *pos2 = q - txt2;
3388 }
3389 }
3390 }
3391 }
3392 /* }}} */
3393
3394 /* {{{ php_similar_char */
3395 static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
3396 {
3397 size_t sum;
3398 size_t pos1 = 0, pos2 = 0, max, count;
3399
3400 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
3401 if ((sum = max)) {
3402 if (pos1 && pos2 && count > 1) {
3403 sum += php_similar_char(txt1, pos1,
3404 txt2, pos2);
3405 }
3406 if ((pos1 + max < len1) && (pos2 + max < len2)) {
3407 sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
3408 txt2 + pos2 + max, len2 - pos2 - max);
3409 }
3410 }
3411
3412 return sum;
3413 }
3414 /* }}} */
3415
3416 /* {{{ Calculates the similarity between two strings */
3417 PHP_FUNCTION(similar_text)
3418 {
3419 zend_string *t1, *t2;
3420 zval *percent = NULL;
3421 int ac = ZEND_NUM_ARGS();
3422 size_t sim;
3423
3424 ZEND_PARSE_PARAMETERS_START(2, 3)
3425 Z_PARAM_STR(t1)
3426 Z_PARAM_STR(t2)
3427 Z_PARAM_OPTIONAL
3428 Z_PARAM_ZVAL(percent)
3429 ZEND_PARSE_PARAMETERS_END();
3430
3431 if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
3432 if (ac > 2) {
3433 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
3434 }
3435
3436 RETURN_LONG(0);
3437 }
3438
3439 sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
3440
3441 if (ac > 2) {
3442 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
3443 }
3444
3445 RETURN_LONG(sim);
3446 }
3447 /* }}} */
3448
3449 /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
3450 PHP_FUNCTION(addcslashes)
3451 {
3452 zend_string *str, *what;
3453
3454 ZEND_PARSE_PARAMETERS_START(2, 2)
3455 Z_PARAM_STR(str)
3456 Z_PARAM_STR(what)
3457 ZEND_PARSE_PARAMETERS_END();
3458
3459 if (ZSTR_LEN(str) == 0) {
3460 RETURN_EMPTY_STRING();
3461 }
3462
3463 if (ZSTR_LEN(what) == 0) {
3464 RETURN_STR_COPY(str);
3465 }
3466
3467 RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
3468 }
3469 /* }}} */
3470
3471 /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
3472 PHP_FUNCTION(addslashes)
3473 {
3474 zend_string *str;
3475
3476 ZEND_PARSE_PARAMETERS_START(1, 1)
3477 Z_PARAM_STR(str)
3478 ZEND_PARSE_PARAMETERS_END();
3479
3480 if (ZSTR_LEN(str) == 0) {
3481 RETURN_EMPTY_STRING();
3482 }
3483
3484 RETURN_STR(php_addslashes(str));
3485 }
3486 /* }}} */
3487
3488 /* {{{ Strips backslashes from a string. Uses C-style conventions */
3489 PHP_FUNCTION(stripcslashes)
3490 {
3491 zend_string *str;
3492
3493 ZEND_PARSE_PARAMETERS_START(1, 1)
3494 Z_PARAM_STR(str)
3495 ZEND_PARSE_PARAMETERS_END();
3496
3497 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3498 php_stripcslashes(Z_STR_P(return_value));
3499 }
3500 /* }}} */
3501
3502 /* {{{ Strips backslashes from a string */
3503 PHP_FUNCTION(stripslashes)
3504 {
3505 zend_string *str;
3506
3507 ZEND_PARSE_PARAMETERS_START(1, 1)
3508 Z_PARAM_STR(str)
3509 ZEND_PARSE_PARAMETERS_END();
3510
3511 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3512 php_stripslashes(Z_STR_P(return_value));
3513 }
3514 /* }}} */
3515
3516 /* {{{ php_stripcslashes */
3517 PHPAPI void php_stripcslashes(zend_string *str)
3518 {
3519 const char *source, *end;
3520 char *target;
3521 size_t nlen = ZSTR_LEN(str), i;
3522 char numtmp[4];
3523
3524 for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
3525 if (*source == '\\' && source + 1 < end) {
3526 source++;
3527 switch (*source) {
3528 case 'n': *target++='\n'; nlen--; break;
3529 case 'r': *target++='\r'; nlen--; break;
3530 case 'a': *target++='\a'; nlen--; break;
3531 case 't': *target++='\t'; nlen--; break;
3532 case 'v': *target++='\v'; nlen--; break;
3533 case 'b': *target++='\b'; nlen--; break;
3534 case 'f': *target++='\f'; nlen--; break;
3535 case '\\': *target++='\\'; nlen--; break;
3536 case 'x':
3537 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3538 numtmp[0] = *++source;
3539 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3540 numtmp[1] = *++source;
3541 numtmp[2] = '\0';
3542 nlen-=3;
3543 } else {
3544 numtmp[1] = '\0';
3545 nlen-=2;
3546 }
3547 *target++=(char)strtol(numtmp, NULL, 16);
3548 break;
3549 }
3550 ZEND_FALLTHROUGH;
3551 default:
3552 i=0;
3553 while (source < end && *source >= '0' && *source <= '7' && i<3) {
3554 numtmp[i++] = *source++;
3555 }
3556 if (i) {
3557 numtmp[i]='\0';
3558 *target++=(char)strtol(numtmp, NULL, 8);
3559 nlen-=i;
3560 source--;
3561 } else {
3562 *target++=*source;
3563 nlen--;
3564 }
3565 }
3566 } else {
3567 *target++=*source;
3568 }
3569 }
3570
3571 if (nlen != 0) {
3572 *target='\0';
3573 }
3574
3575 ZSTR_LEN(str) = nlen;
3576 }
3577 /* }}} */
3578
3579 /* {{{ php_addcslashes_str */
3580 PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
3581 {
3582 char flags[256];
3583 char *target;
3584 const char *source, *end;
3585 char c;
3586 size_t newlen;
3587 zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
3588
3589 php_charmask((const unsigned char *) what, wlength, flags);
3590
3591 for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
3592 c = *source;
3593 if (flags[(unsigned char)c]) {
3594 if ((unsigned char) c < 32 || (unsigned char) c > 126) {
3595 *target++ = '\\';
3596 switch (c) {
3597 case '\n': *target++ = 'n'; break;
3598 case '\t': *target++ = 't'; break;
3599 case '\r': *target++ = 'r'; break;
3600 case '\a': *target++ = 'a'; break;
3601 case '\v': *target++ = 'v'; break;
3602 case '\b': *target++ = 'b'; break;
3603 case '\f': *target++ = 'f'; break;
3604 default: target += sprintf(target, "%03o", (unsigned char) c);
3605 }
3606 continue;
3607 }
3608 *target++ = '\\';
3609 }
3610 *target++ = c;
3611 }
3612 *target = 0;
3613 newlen = target - ZSTR_VAL(new_str);
3614 if (newlen < len * 4) {
3615 new_str = zend_string_truncate(new_str, newlen, 0);
3616 }
3617 return new_str;
3618 }
3619 /* }}} */
3620
3621 /* {{{ php_addcslashes */
3622 PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
3623 {
3624 return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
3625 }
3626 /* }}} */
3627
3628 /* {{{ php_addslashes */
3629
3630 #if ZEND_INTRIN_SSE4_2_NATIVE
3631 # include <nmmintrin.h>
3632 # include "Zend/zend_bitset.h"
3633 #elif ZEND_INTRIN_SSE4_2_RESOLVER
3634 # include <nmmintrin.h>
3635 # include "Zend/zend_bitset.h"
3636 # include "Zend/zend_cpuinfo.h"
3637
3638 ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
3639 zend_string *php_addslashes_default(zend_string *str);
3640
3641 ZEND_INTRIN_SSE4_2_FUNC_DECL(void php_stripslashes_sse42(zend_string *str));
3642 void php_stripslashes_default(zend_string *str);
3643
3644 # if ZEND_INTRIN_SSE4_2_FUNC_PROTO
3645 PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
3646 PHPAPI void php_stripslashes(zend_string *str) __attribute__((ifunc("resolve_stripslashes")));
3647
3648 typedef zend_string *(*php_addslashes_func_t)(zend_string *);
3649 typedef void (*php_stripslashes_func_t)(zend_string *);
3650
3651 ZEND_NO_SANITIZE_ADDRESS
3652 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3653 static php_addslashes_func_t resolve_addslashes(void) {
3654 if (zend_cpu_supports_sse42()) {
3655 return php_addslashes_sse42;
3656 }
3657 return php_addslashes_default;
3658 }
3659
3660 ZEND_NO_SANITIZE_ADDRESS
3661 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3662 static php_stripslashes_func_t resolve_stripslashes(void) {
3663 if (zend_cpu_supports_sse42()) {
3664 return php_stripslashes_sse42;
3665 }
3666 return php_stripslashes_default;
3667 }
3668 # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
3669
3670 static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
3671 static void (*php_stripslashes_ptr)(zend_string *str) = NULL;
3672
3673 PHPAPI zend_string *php_addslashes(zend_string *str) {
3674 return php_addslashes_ptr(str);
3675 }
3676 PHPAPI void php_stripslashes(zend_string *str) {
3677 php_stripslashes_ptr(str);
3678 }
3679
3680 /* {{{ PHP_MINIT_FUNCTION */
3681 PHP_MINIT_FUNCTION(string_intrin)
3682 {
3683 if (zend_cpu_supports_sse42()) {
3684 php_addslashes_ptr = php_addslashes_sse42;
3685 php_stripslashes_ptr = php_stripslashes_sse42;
3686 } else {
3687 php_addslashes_ptr = php_addslashes_default;
3688 php_stripslashes_ptr = php_stripslashes_default;
3689 }
3690 return SUCCESS;
3691 }
3692 /* }}} */
3693 # endif
3694 #endif
3695
3696 #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
3697 # if ZEND_INTRIN_SSE4_2_NATIVE
3698 PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
3699 # elif ZEND_INTRIN_SSE4_2_RESOLVER
3700 zend_string *php_addslashes_sse42(zend_string *str)
3701 # endif
3702 {
3703 ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
3704 __m128i w128, s128;
3705 uint32_t res = 0;
3706 /* maximum string length, worst case situation */
3707 char *target;
3708 const char *source, *end;
3709 size_t offset;
3710 zend_string *new_str;
3711
3712 if (!str) {
3713 return ZSTR_EMPTY_ALLOC();
3714 }
3715
3716 source = ZSTR_VAL(str);
3717 end = source + ZSTR_LEN(str);
3718
3719 if (ZSTR_LEN(str) > 15) {
3720 w128 = _mm_load_si128((__m128i *)slashchars);
3721 do {
3722 s128 = _mm_loadu_si128((__m128i *)source);
3723 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3724 if (res) {
3725 goto do_escape;
3726 }
3727 source += 16;
3728 } while ((end - source) > 15);
3729 }
3730
3731 while (source < end) {
3732 switch (*source) {
3733 case '\0':
3734 case '\'':
3735 case '\"':
3736 case '\\':
3737 goto do_escape;
3738 default:
3739 source++;
3740 break;
3741 }
3742 }
3743
3744 return zend_string_copy(str);
3745
3746 do_escape:
3747 offset = source - (char *)ZSTR_VAL(str);
3748 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3749 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3750 target = ZSTR_VAL(new_str) + offset;
3751
3752 if (res) {
3753 int pos = 0;
3754 do {
3755 int i, n = zend_ulong_ntz(res);
3756 for (i = 0; i < n; i++) {
3757 *target++ = source[pos + i];
3758 }
3759 pos += n;
3760 *target++ = '\\';
3761 if (source[pos] == '\0') {
3762 *target++ = '0';
3763 } else {
3764 *target++ = source[pos];
3765 }
3766 pos++;
3767 res = res >> (n + 1);
3768 } while (res);
3769
3770 for (; pos < 16; pos++) {
3771 *target++ = source[pos];
3772 }
3773 source += 16;
3774 } else if (end - source > 15) {
3775 w128 = _mm_load_si128((__m128i *)slashchars);
3776 }
3777
3778 for (; end - source > 15; source += 16) {
3779 int pos = 0;
3780 s128 = _mm_loadu_si128((__m128i *)source);
3781 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3782 if (res) {
3783 do {
3784 int i, n = zend_ulong_ntz(res);
3785 for (i = 0; i < n; i++) {
3786 *target++ = source[pos + i];
3787 }
3788 pos += n;
3789 *target++ = '\\';
3790 if (source[pos] == '\0') {
3791 *target++ = '0';
3792 } else {
3793 *target++ = source[pos];
3794 }
3795 pos++;
3796 res = res >> (n + 1);
3797 } while (res);
3798 for (; pos < 16; pos++) {
3799 *target++ = source[pos];
3800 }
3801 } else {
3802 _mm_storeu_si128((__m128i*)target, s128);
3803 target += 16;
3804 }
3805 }
3806
3807 while (source < end) {
3808 switch (*source) {
3809 case '\0':
3810 *target++ = '\\';
3811 *target++ = '0';
3812 break;
3813 case '\'':
3814 case '\"':
3815 case '\\':
3816 *target++ = '\\';
3817 ZEND_FALLTHROUGH;
3818 default:
3819 *target++ = *source;
3820 break;
3821 }
3822 source++;
3823 }
3824
3825 *target = '\0';
3826
3827 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3828 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3829 } else {
3830 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3831 }
3832
3833 return new_str;
3834 }
3835 /* }}} */
3836 #endif
3837
3838 #ifdef __aarch64__
3839 typedef union {
3840 uint8_t mem[16];
3841 uint64_t dw[2];
3842 } quad_word;
3843
3844 static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
3845 uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
3846 uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
3847 uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
3848 uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
3849 uint8x16_t s01 = vorrq_u8(s0, s1);
3850 uint8x16_t s23 = vorrq_u8(s2, s3);
3851 uint8x16_t s0123 = vorrq_u8(s01, s23);
3852 quad_word qw;
3853 vst1q_u8(qw.mem, s0123);
3854 return qw;
3855 }
3856
3857 static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
3858 {
3859 int i = 0;
3860 for (; i < 16; i++) {
3861 char s = source[i];
3862 if (res.mem[i] == 0)
3863 *target++ = s;
3864 else {
3865 *target++ = '\\';
3866 if (s == '\0')
3867 *target++ = '0';
3868 else
3869 *target++ = s;
3870 }
3871 }
3872 return target;
3873 }
3874 #endif /* __aarch64__ */
3875
3876 #if !ZEND_INTRIN_SSE4_2_NATIVE
3877 # if ZEND_INTRIN_SSE4_2_RESOLVER
3878 zend_string *php_addslashes_default(zend_string *str) /* {{{ */
3879 # else
3880 PHPAPI zend_string *php_addslashes(zend_string *str)
3881 # endif
3882 {
3883 /* maximum string length, worst case situation */
3884 char *target;
3885 const char *source, *end;
3886 size_t offset;
3887 zend_string *new_str;
3888
3889 if (!str) {
3890 return ZSTR_EMPTY_ALLOC();
3891 }
3892
3893 source = ZSTR_VAL(str);
3894 end = source + ZSTR_LEN(str);
3895
3896 # ifdef __aarch64__
3897 quad_word res = {0};
3898 if (ZSTR_LEN(str) > 15) {
3899 do {
3900 res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
3901 if (res.dw[0] | res.dw[1])
3902 goto do_escape;
3903 source += 16;
3904 } while ((end - source) > 15);
3905 }
3906 /* Finish the last 15 bytes or less with the scalar loop. */
3907 # endif /* __aarch64__ */
3908
3909 while (source < end) {
3910 switch (*source) {
3911 case '\0':
3912 case '\'':
3913 case '\"':
3914 case '\\':
3915 goto do_escape;
3916 default:
3917 source++;
3918 break;
3919 }
3920 }
3921
3922 return zend_string_copy(str);
3923
3924 do_escape:
3925 offset = source - (char *)ZSTR_VAL(str);
3926 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3927 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3928 target = ZSTR_VAL(new_str) + offset;
3929
3930 # ifdef __aarch64__
3931 if (res.dw[0] | res.dw[1]) {
3932 target = aarch64_add_slashes(res, source, target);
3933 source += 16;
3934 }
3935 for (; end - source > 15; source += 16) {
3936 uint8x16_t x = vld1q_u8((uint8_t *)source);
3937 res = aarch64_contains_slash_chars(x);
3938 if (res.dw[0] | res.dw[1]) {
3939 target = aarch64_add_slashes(res, source, target);
3940 } else {
3941 vst1q_u8((uint8_t*)target, x);
3942 target += 16;
3943 }
3944 }
3945 /* Finish the last 15 bytes or less with the scalar loop. */
3946 # endif /* __aarch64__ */
3947
3948 while (source < end) {
3949 switch (*source) {
3950 case '\0':
3951 *target++ = '\\';
3952 *target++ = '0';
3953 break;
3954 case '\'':
3955 case '\"':
3956 case '\\':
3957 *target++ = '\\';
3958 ZEND_FALLTHROUGH;
3959 default:
3960 *target++ = *source;
3961 break;
3962 }
3963 source++;
3964 }
3965
3966 *target = '\0';
3967
3968 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3969 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3970 } else {
3971 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3972 }
3973
3974 return new_str;
3975 }
3976 #endif
3977 /* }}} */
3978 /* }}} */
3979
3980 /* {{{ php_stripslashes
3981 *
3982 * be careful, this edits the string in-place */
3983 static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
3984 {
3985 #ifdef __aarch64__
3986 while (len > 15) {
3987 uint8x16_t x = vld1q_u8((uint8_t *)str);
3988 quad_word q;
3989 vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
3990 if (q.dw[0] | q.dw[1]) {
3991 unsigned int i = 0;
3992 while (i < 16) {
3993 if (q.mem[i] == 0) {
3994 *out++ = str[i];
3995 i++;
3996 continue;
3997 }
3998
3999 i++; /* skip the slash */
4000 if (i < len) {
4001 char s = str[i];
4002 if (s == '0')
4003 *out++ = '\0';
4004 else
4005 *out++ = s; /* preserve the next character */
4006 i++;
4007 }
4008 }
4009 str += i;
4010 len -= i;
4011 } else {
4012 vst1q_u8((uint8_t*)out, x);
4013 out += 16;
4014 str += 16;
4015 len -= 16;
4016 }
4017 }
4018 /* Finish the last 15 bytes or less with the scalar loop. */
4019 #endif /* __aarch64__ */
4020 while (len > 0) {
4021 if (*str == '\\') {
4022 str++; /* skip the slash */
4023 len--;
4024 if (len > 0) {
4025 if (*str == '0') {
4026 *out++='\0';
4027 str++;
4028 } else {
4029 *out++ = *str++; /* preserve the next character */
4030 }
4031 len--;
4032 }
4033 } else {
4034 *out++ = *str++;
4035 len--;
4036 }
4037 }
4038
4039 return out;
4040 }
4041
4042 #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
4043 # if ZEND_INTRIN_SSE4_2_NATIVE
4044 PHPAPI void php_stripslashes(zend_string *str)
4045 # elif ZEND_INTRIN_SSE4_2_RESOLVER
4046 void php_stripslashes_sse42(zend_string *str)
4047 # endif
4048 {
4049 const char *s = ZSTR_VAL(str);
4050 char *t = ZSTR_VAL(str);
4051 size_t l = ZSTR_LEN(str);
4052
4053 if (l > 15) {
4054 const __m128i slash = _mm_set1_epi8('\\');
4055
4056 do {
4057 __m128i in = _mm_loadu_si128((__m128i *)s);
4058 __m128i any_slash = _mm_cmpeq_epi8(in, slash);
4059 uint32_t res = _mm_movemask_epi8(any_slash);
4060
4061 if (res) {
4062 int i, n = zend_ulong_ntz(res);
4063 const char *e = s + 15;
4064 l -= n;
4065 for (i = 0; i < n; i++) {
4066 *t++ = *s++;
4067 }
4068 for (; s < e; s++) {
4069 if (*s == '\\') {
4070 s++;
4071 l--;
4072 if (*s == '0') {
4073 *t = '\0';
4074 } else {
4075 *t = *s;
4076 }
4077 } else {
4078 *t = *s;
4079 }
4080 t++;
4081 l--;
4082 }
4083 } else {
4084 _mm_storeu_si128((__m128i *)t, in);
4085 s += 16;
4086 t += 16;
4087 l -= 16;
4088 }
4089 } while (l > 15);
4090 }
4091
4092 t = php_stripslashes_impl(s, t, l);
4093 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4094 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4095 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4096 }
4097 }
4098 #endif
4099
4100 #if !ZEND_INTRIN_SSE4_2_NATIVE
4101 # if ZEND_INTRIN_SSE4_2_RESOLVER
4102 void php_stripslashes_default(zend_string *str) /* {{{ */
4103 # else
4104 PHPAPI void php_stripslashes(zend_string *str)
4105 # endif
4106 {
4107 const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
4108 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4109 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4110 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4111 }
4112 }
4113 /* }}} */
4114 #endif
4115 /* }}} */
4116
4117 #define _HEB_BLOCK_TYPE_ENG 1
4118 #define _HEB_BLOCK_TYPE_HEB 2
4119 #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
4120 #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
4121 #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
4122
4123 /* {{{ php_str_replace_in_subject */
4124 static zend_long php_str_replace_in_subject(
4125 zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
4126 zend_string *subject_str, zval *result, int case_sensitivity
4127 ) {
4128 zval *search_entry;
4129 zend_string *tmp_result;
4130 char *replace_value = NULL;
4131 size_t replace_len = 0;
4132 zend_long replace_count = 0;
4133 zend_string *lc_subject_str = NULL;
4134 uint32_t replace_idx;
4135
4136 if (ZSTR_LEN(subject_str) == 0) {
4137 ZVAL_EMPTY_STRING(result);
4138 return 0;
4139 }
4140
4141 /* If search is an array */
4142 if (search_ht) {
4143 /* Duplicate subject string for repeated replacement */
4144 zend_string_addref(subject_str);
4145
4146 if (replace_ht) {
4147 replace_idx = 0;
4148 } else {
4149 /* Set replacement value to the passed one */
4150 replace_value = ZSTR_VAL(replace_str);
4151 replace_len = ZSTR_LEN(replace_str);
4152 }
4153
4154 /* For each entry in the search array, get the entry */
4155 ZEND_HASH_FOREACH_VAL(search_ht, search_entry) {
4156 /* Make sure we're dealing with strings. */
4157 zend_string *tmp_search_str;
4158 zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
4159 zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
4160
4161 /* If replace is an array. */
4162 if (replace_ht) {
4163 /* Get current entry */
4164 zval *replace_entry = NULL;
4165 while (replace_idx < replace_ht->nNumUsed) {
4166 replace_entry = &replace_ht->arData[replace_idx].val;
4167 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4168 break;
4169 }
4170 replace_idx++;
4171 }
4172 if (replace_idx < replace_ht->nNumUsed) {
4173 /* Make sure we're dealing with strings. */
4174 replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
4175
4176 /* Set replacement value to the one we got from array */
4177 replace_value = ZSTR_VAL(replace_entry_str);
4178 replace_len = ZSTR_LEN(replace_entry_str);
4179
4180 replace_idx++;
4181 } else {
4182 /* We've run out of replacement strings, so use an empty one. */
4183 replace_value = "";
4184 replace_len = 0;
4185 }
4186 }
4187
4188 if (ZSTR_LEN(search_str) == 1) {
4189 zend_long old_replace_count = replace_count;
4190
4191 tmp_result = php_char_to_str_ex(subject_str,
4192 ZSTR_VAL(search_str)[0],
4193 replace_value,
4194 replace_len,
4195 case_sensitivity,
4196 &replace_count);
4197 if (lc_subject_str && replace_count != old_replace_count) {
4198 zend_string_release_ex(lc_subject_str, 0);
4199 lc_subject_str = NULL;
4200 }
4201 } else if (ZSTR_LEN(search_str) > 1) {
4202 if (case_sensitivity) {
4203 tmp_result = php_str_to_str_ex(subject_str,
4204 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4205 replace_value, replace_len, &replace_count);
4206 } else {
4207 zend_long old_replace_count = replace_count;
4208
4209 if (!lc_subject_str) {
4210 lc_subject_str = php_string_tolower(subject_str);
4211 }
4212 tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4213 search_str, replace_value, replace_len, &replace_count);
4214 if (replace_count != old_replace_count) {
4215 zend_string_release_ex(lc_subject_str, 0);
4216 lc_subject_str = NULL;
4217 }
4218 }
4219 } else {
4220 zend_tmp_string_release(tmp_search_str);
4221 zend_tmp_string_release(tmp_replace_entry_str);
4222 continue;
4223 }
4224
4225 zend_tmp_string_release(tmp_search_str);
4226 zend_tmp_string_release(tmp_replace_entry_str);
4227
4228 if (subject_str == tmp_result) {
4229 zend_string_delref(subject_str);
4230 } else {
4231 zend_string_release_ex(subject_str, 0);
4232 subject_str = tmp_result;
4233 if (ZSTR_LEN(subject_str) == 0) {
4234 zend_string_release_ex(subject_str, 0);
4235 ZVAL_EMPTY_STRING(result);
4236 if (lc_subject_str) {
4237 zend_string_release_ex(lc_subject_str, 0);
4238 }
4239 return replace_count;
4240 }
4241 }
4242 } ZEND_HASH_FOREACH_END();
4243 ZVAL_STR(result, subject_str);
4244 if (lc_subject_str) {
4245 zend_string_release_ex(lc_subject_str, 0);
4246 }
4247 } else {
4248 ZEND_ASSERT(search_str);
4249 if (ZSTR_LEN(search_str) == 1) {
4250 ZVAL_STR(result,
4251 php_char_to_str_ex(subject_str,
4252 ZSTR_VAL(search_str)[0],
4253 ZSTR_VAL(replace_str),
4254 ZSTR_LEN(replace_str),
4255 case_sensitivity,
4256 &replace_count));
4257 } else if (ZSTR_LEN(search_str) > 1) {
4258 if (case_sensitivity) {
4259 ZVAL_STR(result, php_str_to_str_ex(subject_str,
4260 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4261 ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4262 } else {
4263 lc_subject_str = php_string_tolower(subject_str);
4264 ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4265 search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4266 zend_string_release_ex(lc_subject_str, 0);
4267 }
4268 } else {
4269 ZVAL_STR_COPY(result, subject_str);
4270 }
4271 }
4272 return replace_count;
4273 }
4274 /* }}} */
4275
4276 /* {{{ php_str_replace_common */
4277 static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, int case_sensitivity)
4278 {
4279 zend_string *search_str;
4280 HashTable *search_ht;
4281 zend_string *replace_str;
4282 HashTable *replace_ht;
4283 zend_string *subject_str;
4284 HashTable *subject_ht;
4285 zval *subject_entry, *zcount = NULL;
4286 zval result;
4287 zend_string *string_key;
4288 zend_ulong num_key;
4289 zend_long count = 0;
4290
4291 ZEND_PARSE_PARAMETERS_START(3, 4)
4292 Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
4293 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
4294 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
4295 Z_PARAM_OPTIONAL
4296 Z_PARAM_ZVAL(zcount)
4297 ZEND_PARSE_PARAMETERS_END();
4298
4299 /* Make sure we're dealing with strings and do the replacement. */
4300 if (search_str && replace_ht) {
4301 zend_argument_type_error(2, "must be of type %s when argument #1 ($search) is %s",
4302 search_str ? "string" : "array", search_str ? "a string" : "an array"
4303 );
4304 RETURN_THROWS();
4305 }
4306
4307 /* if subject is an array */
4308 if (subject_ht) {
4309 array_init(return_value);
4310
4311 /* For each subject entry, convert it to string, then perform replacement
4312 and add the result to the return_value array. */
4313 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
4314 zend_string *tmp_subject_str;
4315 ZVAL_DEREF(subject_entry);
4316 subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
4317 count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
4318 zend_tmp_string_release(tmp_subject_str);
4319
4320 /* Add to return array */
4321 if (string_key) {
4322 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
4323 } else {
4324 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
4325 }
4326 } ZEND_HASH_FOREACH_END();
4327 } else { /* if subject is not an array */
4328 count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
4329 }
4330 if (zcount) {
4331 ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
4332 }
4333 }
4334 /* }}} */
4335
4336 /* {{{ Replaces all occurrences of search in haystack with replace */
4337 PHP_FUNCTION(str_replace)
4338 {
4339 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4340 }
4341 /* }}} */
4342
4343 /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
4344 PHP_FUNCTION(str_ireplace)
4345 {
4346 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4347 }
4348 /* }}} */
4349
4350 /* {{{ Converts logical Hebrew text to visual text */
4351 PHP_FUNCTION(hebrev)
4352 {
4353 char *str, *heb_str, *target;
4354 const char *tmp;
4355 size_t block_start, block_end, block_type, block_length, i;
4356 zend_long max_chars=0, char_count;
4357 size_t begin, end, orig_begin;
4358 size_t str_len;
4359 zend_string *broken_str;
4360
4361 ZEND_PARSE_PARAMETERS_START(1, 2)
4362 Z_PARAM_STRING(str, str_len)
4363 Z_PARAM_OPTIONAL
4364 Z_PARAM_LONG(max_chars)
4365 ZEND_PARSE_PARAMETERS_END();
4366
4367 if (str_len == 0) {
4368 RETURN_EMPTY_STRING();
4369 }
4370
4371 tmp = str;
4372 block_start=block_end=0;
4373
4374 heb_str = (char *) emalloc(str_len+1);
4375 target = heb_str+str_len;
4376 *target = 0;
4377 target--;
4378
4379 block_length=0;
4380
4381 if (isheb(*tmp)) {
4382 block_type = _HEB_BLOCK_TYPE_HEB;
4383 } else {
4384 block_type = _HEB_BLOCK_TYPE_ENG;
4385 }
4386
4387 do {
4388 if (block_type == _HEB_BLOCK_TYPE_HEB) {
4389 while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
4390 tmp++;
4391 block_end++;
4392 block_length++;
4393 }
4394 for (i = block_start+1; i<= block_end+1; i++) {
4395 *target = str[i-1];
4396 switch (*target) {
4397 case '(':
4398 *target = ')';
4399 break;
4400 case ')':
4401 *target = '(';
4402 break;
4403 case '[':
4404 *target = ']';
4405 break;
4406 case ']':
4407 *target = '[';
4408 break;
4409 case '{':
4410 *target = '}';
4411 break;
4412 case '}':
4413 *target = '{';
4414 break;
4415 case '<':
4416 *target = '>';
4417 break;
4418 case '>':
4419 *target = '<';
4420 break;
4421 case '\\':
4422 *target = '/';
4423 break;
4424 case '/':
4425 *target = '\\';
4426 break;
4427 default:
4428 break;
4429 }
4430 target--;
4431 }
4432 block_type = _HEB_BLOCK_TYPE_ENG;
4433 } else {
4434 while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
4435 tmp++;
4436 block_end++;
4437 block_length++;
4438 }
4439 while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
4440 tmp--;
4441 block_end--;
4442 }
4443 for (i = block_end+1; i >= block_start+1; i--) {
4444 *target = str[i-1];
4445 target--;
4446 }
4447 block_type = _HEB_BLOCK_TYPE_HEB;
4448 }
4449 block_start=block_end+1;
4450 } while (block_end < str_len-1);
4451
4452
4453 broken_str = zend_string_alloc(str_len, 0);
4454 begin = end = str_len-1;
4455 target = ZSTR_VAL(broken_str);
4456
4457 while (1) {
4458 char_count=0;
4459 while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
4460 char_count++;
4461 begin--;
4462 if (_isnewline(heb_str[begin])) {
4463 while (begin > 0 && _isnewline(heb_str[begin-1])) {
4464 begin--;
4465 char_count++;
4466 }
4467 break;
4468 }
4469 }
4470 if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
4471 size_t new_char_count=char_count, new_begin=begin;
4472
4473 while (new_char_count > 0) {
4474 if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
4475 break;
4476 }
4477 new_begin++;
4478 new_char_count--;
4479 }
4480 if (new_char_count > 0) {
4481 begin=new_begin;
4482 }
4483 }
4484 orig_begin=begin;
4485
4486 if (_isblank(heb_str[begin])) {
4487 heb_str[begin]='\n';
4488 }
4489 while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
4490 begin++;
4491 }
4492 for (i = begin; i <= end; i++) { /* copy content */
4493 *target = heb_str[i];
4494 target++;
4495 }
4496 for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
4497 *target = heb_str[i];
4498 target++;
4499 }
4500 begin=orig_begin;
4501
4502 if (begin == 0) {
4503 *target = 0;
4504 break;
4505 }
4506 begin--;
4507 end=begin;
4508 }
4509 efree(heb_str);
4510
4511 RETURN_NEW_STR(broken_str);
4512 }
4513 /* }}} */
4514
4515 /* {{{ Converts newlines to HTML line breaks */
4516 PHP_FUNCTION(nl2br)
4517 {
4518 /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
4519 const char *tmp, *end;
4520 zend_string *str;
4521 char *target;
4522 size_t repl_cnt = 0;
4523 bool is_xhtml = 1;
4524 zend_string *result;
4525
4526 ZEND_PARSE_PARAMETERS_START(1, 2)
4527 Z_PARAM_STR(str)
4528 Z_PARAM_OPTIONAL
4529 Z_PARAM_BOOL(is_xhtml)
4530 ZEND_PARSE_PARAMETERS_END();
4531
4532 tmp = ZSTR_VAL(str);
4533 end = ZSTR_VAL(str) + ZSTR_LEN(str);
4534
4535 /* it is really faster to scan twice and allocate mem once instead of scanning once
4536 and constantly reallocing */
4537 while (tmp < end) {
4538 if (*tmp == '\r') {
4539 if (*(tmp+1) == '\n') {
4540 tmp++;
4541 }
4542 repl_cnt++;
4543 } else if (*tmp == '\n') {
4544 if (*(tmp+1) == '\r') {
4545 tmp++;
4546 }
4547 repl_cnt++;
4548 }
4549
4550 tmp++;
4551 }
4552
4553 if (repl_cnt == 0) {
4554 RETURN_STR_COPY(str);
4555 }
4556
4557 {
4558 size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
4559
4560 result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
4561 target = ZSTR_VAL(result);
4562 }
4563
4564 tmp = ZSTR_VAL(str);
4565 while (tmp < end) {
4566 switch (*tmp) {
4567 case '\r':
4568 case '\n':
4569 *target++ = '<';
4570 *target++ = 'b';
4571 *target++ = 'r';
4572
4573 if (is_xhtml) {
4574 *target++ = ' ';
4575 *target++ = '/';
4576 }
4577
4578 *target++ = '>';
4579
4580 if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
4581 *target++ = *tmp++;
4582 }
4583 ZEND_FALLTHROUGH;
4584 default:
4585 *target++ = *tmp;
4586 }
4587
4588 tmp++;
4589 }
4590
4591 *target = '\0';
4592
4593 RETURN_NEW_STR(result);
4594 }
4595 /* }}} */
4596
4597 /* {{{ Strips HTML and PHP tags from a string */
4598 PHP_FUNCTION(strip_tags)
4599 {
4600 zend_string *buf;
4601 zend_string *str;
4602 zend_string *allow_str = NULL;
4603 HashTable *allow_ht = NULL;
4604 const char *allowed_tags=NULL;
4605 size_t allowed_tags_len=0;
4606 smart_str tags_ss = {0};
4607
4608 ZEND_PARSE_PARAMETERS_START(1, 2)
4609 Z_PARAM_STR(str)
4610 Z_PARAM_OPTIONAL
4611 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
4612 ZEND_PARSE_PARAMETERS_END();
4613
4614 if (allow_ht) {
4615 zval *tmp;
4616 zend_string *tag;
4617
4618 ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
4619 tag = zval_get_string(tmp);
4620 smart_str_appendc(&tags_ss, '<');
4621 smart_str_append(&tags_ss, tag);
4622 smart_str_appendc(&tags_ss, '>');
4623 zend_string_release(tag);
4624 } ZEND_HASH_FOREACH_END();
4625 if (tags_ss.s) {
4626 smart_str_0(&tags_ss);
4627 allowed_tags = ZSTR_VAL(tags_ss.s);
4628 allowed_tags_len = ZSTR_LEN(tags_ss.s);
4629 }
4630 } else if (allow_str) {
4631 allowed_tags = ZSTR_VAL(allow_str);
4632 allowed_tags_len = ZSTR_LEN(allow_str);
4633 }
4634
4635 buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
4636 ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
4637 smart_str_free(&tags_ss);
4638 RETURN_NEW_STR(buf);
4639 }
4640 /* }}} */
4641
4642 static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
4643 const char *retval;
4644
4645 if (zend_string_equals_literal(loc, "0")) {
4646 loc = NULL;
4647 } else {
4648 if (ZSTR_LEN(loc) >= 255) {
4649 php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
4650 return NULL;
4651 }
4652 }
4653
4654 # ifndef PHP_WIN32
4655 retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
4656 # else
4657 if (loc) {
4658 /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
4659 char *locp = ZSTR_VAL(loc);
4660 if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
4661 && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
4662 && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
4663 && (locp[5] == '\0' || locp[5] == '.')
4664 && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
4665 && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
4666 && locp[5] == '\0')
4667 ) {
4668 retval = NULL;
4669 } else {
4670 retval = setlocale(cat, ZSTR_VAL(loc));
4671 }
4672 } else {
4673 retval = setlocale(cat, NULL);
4674 }
4675 # endif
4676 if (!retval) {
4677 return NULL;
4678 }
4679
4680 if (loc) {
4681 /* Remember if locale was changed */
4682 size_t len = strlen(retval);
4683
4684 BG(locale_changed) = 1;
4685 if (cat == LC_CTYPE || cat == LC_ALL) {
4686 zend_update_current_locale();
4687 if (BG(ctype_string)) {
4688 zend_string_release_ex(BG(ctype_string), 0);
4689 }
4690 if (len == 1 && *retval == 'C') {
4691 /* C locale is represented as NULL. */
4692 BG(ctype_string) = NULL;
4693 return ZSTR_CHAR('C');
4694 } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
4695 BG(ctype_string) = zend_string_copy(loc);
4696 return zend_string_copy(BG(ctype_string));
4697 } else {
4698 BG(ctype_string) = zend_string_init(retval, len, 0);
4699 return zend_string_copy(BG(ctype_string));
4700 }
4701 } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
4702 return zend_string_copy(loc);
4703 }
4704 }
4705 return zend_string_init(retval, strlen(retval), 0);
4706 }
4707
4708 static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
4709 zend_string *tmp_loc_str;
4710 zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
4711 if (UNEXPECTED(loc_str == NULL)) {
4712 return NULL;
4713 }
4714 zend_string *result = try_setlocale_str(cat, loc_str);
4715 zend_tmp_string_release(tmp_loc_str);
4716 return result;
4717 }
4718
4719 /* {{{ Set locale information */
4720 PHP_FUNCTION(setlocale)
4721 {
4722 zend_long cat;
4723 zval *args = NULL;
4724 int num_args;
4725
4726 ZEND_PARSE_PARAMETERS_START(2, -1)
4727 Z_PARAM_LONG(cat)
4728 Z_PARAM_VARIADIC('+', args, num_args)
4729 ZEND_PARSE_PARAMETERS_END();
4730
4731 for (uint32_t i = 0; i < num_args; i++) {
4732 if (Z_TYPE(args[i]) == IS_ARRAY) {
4733 zval *elem;
4734 ZEND_HASH_FOREACH_VAL(Z_ARRVAL(args[i]), elem) {
4735 zend_string *result = try_setlocale_zval(cat, elem);
4736 if (EG(exception)) {
4737 RETURN_THROWS();
4738 }
4739 if (result) {
4740 RETURN_STR(result);
4741 }
4742 } ZEND_HASH_FOREACH_END();
4743 } else {
4744 zend_string *result = try_setlocale_zval(cat, &args[i]);
4745 if (EG(exception)) {
4746 RETURN_THROWS();
4747 }
4748 if (result) {
4749 RETURN_STR(result);
4750 }
4751 }
4752 }
4753
4754 RETURN_FALSE;
4755 }
4756 /* }}} */
4757
4758 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
4759 PHP_FUNCTION(parse_str)
4760 {
4761 char *arg;
4762 zval *arrayArg = NULL;
4763 char *res = NULL;
4764 size_t arglen;
4765
4766 ZEND_PARSE_PARAMETERS_START(2, 2)
4767 Z_PARAM_STRING(arg, arglen)
4768 Z_PARAM_ZVAL(arrayArg)
4769 ZEND_PARSE_PARAMETERS_END();
4770
4771 arrayArg = zend_try_array_init(arrayArg);
4772 if (!arrayArg) {
4773 RETURN_THROWS();
4774 }
4775
4776 res = estrndup(arg, arglen);
4777 sapi_module.treat_data(PARSE_STRING, res, arrayArg);
4778 }
4779 /* }}} */
4780
4781 #define PHP_TAG_BUF_SIZE 1023
4782
4783 /* {{{ php_tag_find
4784 *
4785 * Check if tag is in a set of tags
4786 *
4787 * states:
4788 *
4789 * 0 start tag
4790 * 1 first non-whitespace char seen
4791 */
4792 int php_tag_find(char *tag, size_t len, const char *set) {
4793 char c, *n;
4794 const char *t;
4795 int state=0, done=0;
4796 char *norm;
4797
4798 if (len == 0) {
4799 return 0;
4800 }
4801
4802 norm = emalloc(len+1);
4803
4804 n = norm;
4805 t = tag;
4806 c = tolower(*t);
4807 /*
4808 normalize the tag removing leading and trailing whitespace
4809 and turn any <a whatever...> into just <a> and any </tag>
4810 into <tag>
4811 */
4812 while (!done) {
4813 switch (c) {
4814 case '<':
4815 *(n++) = c;
4816 break;
4817 case '>':
4818 done =1;
4819 break;
4820 default:
4821 if (!isspace((int)c)) {
4822 if (state == 0) {
4823 state=1;
4824 }
4825 if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
4826 *(n++) = c;
4827 }
4828 } else {
4829 if (state == 1)
4830 done=1;
4831 }
4832 break;
4833 }
4834 c = tolower(*(++t));
4835 }
4836 *(n++) = '>';
4837 *n = '\0';
4838 if (strstr(set, norm)) {
4839 done=1;
4840 } else {
4841 done=0;
4842 }
4843 efree(norm);
4844 return done;
4845 }
4846 /* }}} */
4847
4848 PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
4849 {
4850 return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
4851 }
4852 /* }}} */
4853
4854 /* {{{ php_strip_tags
4855
4856 A simple little state-machine to strip out html and php tags
4857
4858 State 0 is the output state, State 1 means we are inside a
4859 normal html tag and state 2 means we are inside a php tag.
4860
4861 The state variable is passed in to allow a function like fgetss
4862 to maintain state across calls to the function.
4863
4864 lc holds the last significant character read and br is a bracket
4865 counter.
4866
4867 When an allow string is passed in we keep track of the string
4868 in state 1 and when the tag is closed check it against the
4869 allow string to see if we should allow it.
4870
4871 swm: Added ability to strip <?xml tags without assuming it PHP
4872 code.
4873 */
4874 PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, bool allow_tag_spaces)
4875 {
4876 char *tbuf, *tp, *rp, c, lc;
4877 const char *buf, *p, *end;
4878 int br, depth=0, in_q = 0;
4879 uint8_t state = 0;
4880 size_t pos;
4881 char *allow_free = NULL;
4882 char is_xml = 0;
4883
4884 buf = estrndup(rbuf, len);
4885 end = buf + len;
4886 lc = '\0';
4887 p = buf;
4888 rp = rbuf;
4889 br = 0;
4890 if (allow) {
4891 allow_free = zend_str_tolower_dup_ex(allow, allow_len);
4892 allow = allow_free ? allow_free : allow;
4893 tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
4894 tp = tbuf;
4895 } else {
4896 tbuf = tp = NULL;
4897 }
4898
4899 state_0:
4900 if (p >= end) {
4901 goto finish;
4902 }
4903 c = *p;
4904 switch (c) {
4905 case '\0':
4906 break;
4907 case '<':
4908 if (in_q) {
4909 break;
4910 }
4911 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4912 *(rp++) = c;
4913 break;
4914 }
4915 lc = '<';
4916 state = 1;
4917 if (allow) {
4918 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4919 pos = tp - tbuf;
4920 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4921 tp = tbuf + pos;
4922 }
4923 *(tp++) = '<';
4924 }
4925 p++;
4926 goto state_1;
4927 case '>':
4928 if (depth) {
4929 depth--;
4930 break;
4931 }
4932
4933 if (in_q) {
4934 break;
4935 }
4936
4937 *(rp++) = c;
4938 break;
4939 default:
4940 *(rp++) = c;
4941 break;
4942 }
4943 p++;
4944 goto state_0;
4945
4946 state_1:
4947 if (p >= end) {
4948 goto finish;
4949 }
4950 c = *p;
4951 switch (c) {
4952 case '\0':
4953 break;
4954 case '<':
4955 if (in_q) {
4956 break;
4957 }
4958 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4959 goto reg_char_1;
4960 }
4961 depth++;
4962 break;
4963 case '>':
4964 if (depth) {
4965 depth--;
4966 break;
4967 }
4968 if (in_q) {
4969 break;
4970 }
4971
4972 lc = '>';
4973 if (is_xml && p >= buf + 1 && *(p -1) == '-') {
4974 break;
4975 }
4976 in_q = state = is_xml = 0;
4977 if (allow) {
4978 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4979 pos = tp - tbuf;
4980 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4981 tp = tbuf + pos;
4982 }
4983 *(tp++) = '>';
4984 *tp='\0';
4985 if (php_tag_find(tbuf, tp-tbuf, allow)) {
4986 memcpy(rp, tbuf, tp-tbuf);
4987 rp += tp-tbuf;
4988 }
4989 tp = tbuf;
4990 }
4991 p++;
4992 goto state_0;
4993 case '"':
4994 case '\'':
4995 if (p != buf && (!in_q || *p == in_q)) {
4996 if (in_q) {
4997 in_q = 0;
4998 } else {
4999 in_q = *p;
5000 }
5001 }
5002 goto reg_char_1;
5003 case '!':
5004 /* JavaScript & Other HTML scripting languages */
5005 if (p >= buf + 1 && *(p-1) == '<') {
5006 state = 3;
5007 lc = c;
5008 p++;
5009 goto state_3;
5010 } else {
5011 goto reg_char_1;
5012 }
5013 break;
5014 case '?':
5015 if (p >= buf + 1 && *(p-1) == '<') {
5016 br=0;
5017 state = 2;
5018 p++;
5019 goto state_2;
5020 } else {
5021 goto reg_char_1;
5022 }
5023 break;
5024 default:
5025 reg_char_1:
5026 if (allow) {
5027 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
5028 pos = tp - tbuf;
5029 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
5030 tp = tbuf + pos;
5031 }
5032 *(tp++) = c;
5033 }
5034 break;
5035 }
5036 p++;
5037 goto state_1;
5038
5039 state_2:
5040 if (p >= end) {
5041 goto finish;
5042 }
5043 c = *p;
5044 switch (c) {
5045 case '(':
5046 if (lc != '"' && lc != '\'') {
5047 lc = '(';
5048 br++;
5049 }
5050 break;
5051 case ')':
5052 if (lc != '"' && lc != '\'') {
5053 lc = ')';
5054 br--;
5055 }
5056 break;
5057 case '>':
5058 if (depth) {
5059 depth--;
5060 break;
5061 }
5062 if (in_q) {
5063 break;
5064 }
5065
5066 if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
5067 in_q = state = 0;
5068 tp = tbuf;
5069 p++;
5070 goto state_0;
5071 }
5072 break;
5073 case '"':
5074 case '\'':
5075 if (p >= buf + 1 && *(p-1) != '\\') {
5076 if (lc == c) {
5077 lc = '\0';
5078 } else if (lc != '\\') {
5079 lc = c;
5080 }
5081 if (p != buf && (!in_q || *p == in_q)) {
5082 if (in_q) {
5083 in_q = 0;
5084 } else {
5085 in_q = *p;
5086 }
5087 }
5088 }
5089 break;
5090 case 'l':
5091 case 'L':
5092 /* swm: If we encounter '<?xml' then we shouldn't be in
5093 * state == 2 (PHP). Switch back to HTML.
5094 */
5095 if (state == 2 && p > buf+4
5096 && (*(p-1) == 'm' || *(p-1) == 'M')
5097 && (*(p-2) == 'x' || *(p-2) == 'X')
5098 && *(p-3) == '?'
5099 && *(p-4) == '<') {
5100 state = 1; is_xml=1;
5101 p++;
5102 goto state_1;
5103 }
5104 break;
5105 default:
5106 break;
5107 }
5108 p++;
5109 goto state_2;
5110
5111 state_3:
5112 if (p >= end) {
5113 goto finish;
5114 }
5115 c = *p;
5116 switch (c) {
5117 case '>':
5118 if (depth) {
5119 depth--;
5120 break;
5121 }
5122 if (in_q) {
5123 break;
5124 }
5125 in_q = state = 0;
5126 tp = tbuf;
5127 p++;
5128 goto state_0;
5129 case '"':
5130 case '\'':
5131 if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
5132 if (in_q) {
5133 in_q = 0;
5134 } else {
5135 in_q = *p;
5136 }
5137 }
5138 break;
5139 case '-':
5140 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
5141 state = 4;
5142 p++;
5143 goto state_4;
5144 }
5145 break;
5146 case 'E':
5147 case 'e':
5148 /* !DOCTYPE exception */
5149 if (p > buf+6
5150 && (*(p-1) == 'p' || *(p-1) == 'P')
5151 && (*(p-2) == 'y' || *(p-2) == 'Y')
5152 && (*(p-3) == 't' || *(p-3) == 'T')
5153 && (*(p-4) == 'c' || *(p-4) == 'C')
5154 && (*(p-5) == 'o' || *(p-5) == 'O')
5155 && (*(p-6) == 'd' || *(p-6) == 'D')) {
5156 state = 1;
5157 p++;
5158 goto state_1;
5159 }
5160 break;
5161 default:
5162 break;
5163 }
5164 p++;
5165 goto state_3;
5166
5167 state_4:
5168 while (p < end) {
5169 c = *p;
5170 if (c == '>' && !in_q) {
5171 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
5172 in_q = state = 0;
5173 tp = tbuf;
5174 p++;
5175 goto state_0;
5176 }
5177 }
5178 p++;
5179 }
5180
5181 finish:
5182 if (rp < rbuf + len) {
5183 *rp = '\0';
5184 }
5185 efree((void *)buf);
5186 if (tbuf) {
5187 efree(tbuf);
5188 }
5189 if (allow_free) {
5190 efree(allow_free);
5191 }
5192
5193 return (size_t)(rp - rbuf);
5194 }
5195 /* }}} */
5196
5197 /* {{{ Parse a CSV string into an array */
5198 PHP_FUNCTION(str_getcsv)
5199 {
5200 zend_string *str;
5201 char delim = ',', enc = '"';
5202 int esc = (unsigned char) '\\';
5203 char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
5204 size_t delim_len = 0, enc_len = 0, esc_len = 0;
5205
5206 ZEND_PARSE_PARAMETERS_START(1, 4)
5207 Z_PARAM_STR(str)
5208 Z_PARAM_OPTIONAL
5209 Z_PARAM_STRING(delim_str, delim_len)
5210 Z_PARAM_STRING(enc_str, enc_len)
5211 Z_PARAM_STRING(esc_str, esc_len)
5212 ZEND_PARSE_PARAMETERS_END();
5213
5214 delim = delim_len ? delim_str[0] : delim;
5215 enc = enc_len ? enc_str[0] : enc;
5216 if (esc_str != NULL) {
5217 esc = esc_len ? (unsigned char) esc_str[0] : PHP_CSV_NO_ESCAPE;
5218 }
5219
5220 php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str), return_value);
5221 }
5222 /* }}} */
5223
5224 /* {{{ Returns the input string repeat mult times */
5225 PHP_FUNCTION(str_repeat)
5226 {
5227 zend_string *input_str; /* Input string */
5228 zend_long mult; /* Multiplier */
5229 zend_string *result; /* Resulting string */
5230 size_t result_len; /* Length of the resulting string */
5231
5232 ZEND_PARSE_PARAMETERS_START(2, 2)
5233 Z_PARAM_STR(input_str)
5234 Z_PARAM_LONG(mult)
5235 ZEND_PARSE_PARAMETERS_END();
5236
5237 if (mult < 0) {
5238 zend_argument_value_error(2, "must be greater than or equal to 0");
5239 RETURN_THROWS();
5240 }
5241
5242 /* Don't waste our time if it's empty */
5243 /* ... or if the multiplier is zero */
5244 if (ZSTR_LEN(input_str) == 0 || mult == 0)
5245 RETURN_EMPTY_STRING();
5246
5247 /* Initialize the result string */
5248 result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
5249 result_len = ZSTR_LEN(input_str) * mult;
5250
5251 /* Heavy optimization for situations where input string is 1 byte long */
5252 if (ZSTR_LEN(input_str) == 1) {
5253 memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
5254 } else {
5255 const char *s, *ee;
5256 char *e;
5257 ptrdiff_t l=0;
5258 memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
5259 s = ZSTR_VAL(result);
5260 e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
5261 ee = ZSTR_VAL(result) + result_len;
5262
5263 while (e<ee) {
5264 l = (e-s) < (ee-e) ? (e-s) : (ee-e);
5265 memmove(e, s, l);
5266 e += l;
5267 }
5268 }
5269
5270 ZSTR_VAL(result)[result_len] = '\0';
5271
5272 RETURN_NEW_STR(result);
5273 }
5274 /* }}} */
5275
5276 /* {{{ Returns info about what characters are used in input */
5277 PHP_FUNCTION(count_chars)
5278 {
5279 zend_string *input;
5280 int chars[256];
5281 zend_long mymode=0;
5282 const unsigned char *buf;
5283 int inx;
5284 char retstr[256];
5285 size_t retlen=0;
5286 size_t tmp = 0;
5287
5288 ZEND_PARSE_PARAMETERS_START(1, 2)
5289 Z_PARAM_STR(input)
5290 Z_PARAM_OPTIONAL
5291 Z_PARAM_LONG(mymode)
5292 ZEND_PARSE_PARAMETERS_END();
5293
5294 if (mymode < 0 || mymode > 4) {
5295 zend_argument_value_error(2, "must be between 0 and 4 (inclusive)");
5296 RETURN_THROWS();
5297 }
5298
5299 buf = (const unsigned char *) ZSTR_VAL(input);
5300 memset((void*) chars, 0, sizeof(chars));
5301
5302 while (tmp < ZSTR_LEN(input)) {
5303 chars[*buf]++;
5304 buf++;
5305 tmp++;
5306 }
5307
5308 if (mymode < 3) {
5309 array_init(return_value);
5310 }
5311
5312 for (inx = 0; inx < 256; inx++) {
5313 switch (mymode) {
5314 case 0:
5315 add_index_long(return_value, inx, chars[inx]);
5316 break;
5317 case 1:
5318 if (chars[inx] != 0) {
5319 add_index_long(return_value, inx, chars[inx]);
5320 }
5321 break;
5322 case 2:
5323 if (chars[inx] == 0) {
5324 add_index_long(return_value, inx, chars[inx]);
5325 }
5326 break;
5327 case 3:
5328 if (chars[inx] != 0) {
5329 retstr[retlen++] = inx;
5330 }
5331 break;
5332 case 4:
5333 if (chars[inx] == 0) {
5334 retstr[retlen++] = inx;
5335 }
5336 break;
5337 }
5338 }
5339
5340 if (mymode == 3 || mymode == 4) {
5341 RETURN_STRINGL(retstr, retlen);
5342 }
5343 }
5344 /* }}} */
5345
5346 /* {{{ php_strnatcmp */
5347 static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
5348 {
5349 zend_string *s1, *s2;
5350
5351 ZEND_PARSE_PARAMETERS_START(2, 2)
5352 Z_PARAM_STR(s1)
5353 Z_PARAM_STR(s2)
5354 ZEND_PARSE_PARAMETERS_END();
5355
5356 RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
5357 ZSTR_VAL(s2), ZSTR_LEN(s2),
5358 fold_case));
5359 }
5360 /* }}} */
5361
5362 PHPAPI int string_natural_compare_function_ex(zval *result, zval *op1, zval *op2, bool case_insensitive) /* {{{ */
5363 {
5364 zend_string *tmp_str1, *tmp_str2;
5365 zend_string *str1 = zval_get_tmp_string(op1, &tmp_str1);
5366 zend_string *str2 = zval_get_tmp_string(op2, &tmp_str2);
5367
5368 ZVAL_LONG(result, strnatcmp_ex(ZSTR_VAL(str1), ZSTR_LEN(str1), ZSTR_VAL(str2), ZSTR_LEN(str2), case_insensitive));
5369
5370 zend_tmp_string_release(tmp_str1);
5371 zend_tmp_string_release(tmp_str2);
5372 return SUCCESS;
5373 }
5374 /* }}} */
5375
5376 PHPAPI int string_natural_case_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
5377 {
5378 return string_natural_compare_function_ex(result, op1, op2, 1);
5379 }
5380 /* }}} */
5381
5382 PHPAPI int string_natural_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
5383 {
5384 return string_natural_compare_function_ex(result, op1, op2, 0);
5385 }
5386 /* }}} */
5387
5388 /* {{{ Returns the result of string comparison using 'natural' algorithm */
5389 PHP_FUNCTION(strnatcmp)
5390 {
5391 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
5392 }
5393 /* }}} */
5394
5395 /* {{{ Returns numeric formatting information based on the current locale */
5396 PHP_FUNCTION(localeconv)
5397 {
5398 zval grouping, mon_grouping;
5399 int len, i;
5400
5401 ZEND_PARSE_PARAMETERS_NONE();
5402
5403 array_init(return_value);
5404 array_init(&grouping);
5405 array_init(&mon_grouping);
5406
5407 {
5408 struct lconv currlocdata;
5409
5410 localeconv_r( &currlocdata );
5411
5412 /* Grab the grouping data out of the array */
5413 len = (int)strlen(currlocdata.grouping);
5414
5415 for (i = 0; i < len; i++) {
5416 add_index_long(&grouping, i, currlocdata.grouping[i]);
5417 }
5418
5419 /* Grab the monetary grouping data out of the array */
5420 len = (int)strlen(currlocdata.mon_grouping);
5421
5422 for (i = 0; i < len; i++) {
5423 add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
5424 }
5425
5426 add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
5427 add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
5428 add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
5429 add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
5430 add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
5431 add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
5432 add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
5433 add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
5434 add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
5435 add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
5436 add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
5437 add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
5438 add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
5439 add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
5440 add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
5441 add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
5442 }
5443
5444 zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
5445 zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
5446 }
5447 /* }}} */
5448
5449 /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5450 PHP_FUNCTION(strnatcasecmp)
5451 {
5452 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5453 }
5454 /* }}} */
5455
5456 /* {{{ Returns the number of times a substring occurs in the string */
5457 PHP_FUNCTION(substr_count)
5458 {
5459 char *haystack, *needle;
5460 zend_long offset = 0, length = 0;
5461 bool length_is_null = 1;
5462 zend_long count = 0;
5463 size_t haystack_len, needle_len;
5464 const char *p, *endp;
5465 char cmp;
5466
5467 ZEND_PARSE_PARAMETERS_START(2, 4)
5468 Z_PARAM_STRING(haystack, haystack_len)
5469 Z_PARAM_STRING(needle, needle_len)
5470 Z_PARAM_OPTIONAL
5471 Z_PARAM_LONG(offset)
5472 Z_PARAM_LONG_OR_NULL(length, length_is_null)
5473 ZEND_PARSE_PARAMETERS_END();
5474
5475 if (needle_len == 0) {
5476 zend_argument_value_error(2, "cannot be empty");
5477 RETURN_THROWS();
5478 }
5479
5480 p = haystack;
5481 endp = p + haystack_len;
5482
5483 if (offset < 0) {
5484 offset += (zend_long)haystack_len;
5485 }
5486 if ((offset < 0) || ((size_t)offset > haystack_len)) {
5487 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5488 RETURN_THROWS();
5489 }
5490 p += offset;
5491
5492 if (!length_is_null) {
5493
5494 if (length < 0) {
5495 length += (haystack_len - offset);
5496 }
5497 if (length < 0 || ((size_t)length > (haystack_len - offset))) {
5498 zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
5499 RETURN_THROWS();
5500 }
5501 endp = p + length;
5502 }
5503
5504 if (needle_len == 1) {
5505 cmp = needle[0];
5506
5507 while ((p = memchr(p, cmp, endp - p))) {
5508 count++;
5509 p++;
5510 }
5511 } else {
5512 while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
5513 p += needle_len;
5514 count++;
5515 }
5516 }
5517
5518 RETURN_LONG(count);
5519 }
5520 /* }}} */
5521
5522 /* {{{ Returns input string padded on the left or right to specified length with pad_string */
5523 PHP_FUNCTION(str_pad)
5524 {
5525 /* Input arguments */
5526 zend_string *input; /* Input string */
5527 zend_long pad_length; /* Length to pad to */
5528
5529 /* Helper variables */
5530 size_t num_pad_chars; /* Number of padding characters (total - input size) */
5531 char *pad_str = " "; /* Pointer to padding string */
5532 size_t pad_str_len = 1;
5533 zend_long pad_type_val = STR_PAD_RIGHT; /* The padding type value */
5534 size_t i, left_pad=0, right_pad=0;
5535 zend_string *result = NULL; /* Resulting string */
5536
5537 ZEND_PARSE_PARAMETERS_START(2, 4)
5538 Z_PARAM_STR(input)
5539 Z_PARAM_LONG(pad_length)
5540 Z_PARAM_OPTIONAL
5541 Z_PARAM_STRING(pad_str, pad_str_len)
5542 Z_PARAM_LONG(pad_type_val)
5543 ZEND_PARSE_PARAMETERS_END();
5544
5545 /* If resulting string turns out to be shorter than input string,
5546 we simply copy the input and return. */
5547 if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
5548 RETURN_STR_COPY(input);
5549 }
5550
5551 if (pad_str_len == 0) {
5552 zend_argument_value_error(3, "must be a non-empty string");
5553 RETURN_THROWS();
5554 }
5555
5556 if (pad_type_val < STR_PAD_LEFT || pad_type_val > STR_PAD_BOTH) {
5557 zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
5558 RETURN_THROWS();
5559 }
5560
5561 num_pad_chars = pad_length - ZSTR_LEN(input);
5562 result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
5563 ZSTR_LEN(result) = 0;
5564
5565 /* We need to figure out the left/right padding lengths. */
5566 switch (pad_type_val) {
5567 case STR_PAD_RIGHT:
5568 left_pad = 0;
5569 right_pad = num_pad_chars;
5570 break;
5571
5572 case STR_PAD_LEFT:
5573 left_pad = num_pad_chars;
5574 right_pad = 0;
5575 break;
5576
5577 case STR_PAD_BOTH:
5578 left_pad = num_pad_chars / 2;
5579 right_pad = num_pad_chars - left_pad;
5580 break;
5581 }
5582
5583 /* First we pad on the left. */
5584 for (i = 0; i < left_pad; i++)
5585 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5586
5587 /* Then we copy the input string. */
5588 memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
5589 ZSTR_LEN(result) += ZSTR_LEN(input);
5590
5591 /* Finally, we pad on the right. */
5592 for (i = 0; i < right_pad; i++)
5593 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5594
5595 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
5596
5597 RETURN_NEW_STR(result);
5598 }
5599 /* }}} */
5600
5601 /* {{{ Implements an ANSI C compatible sscanf */
5602 PHP_FUNCTION(sscanf)
5603 {
5604 zval *args = NULL;
5605 char *str, *format;
5606 size_t str_len, format_len;
5607 int result, num_args = 0;
5608
5609 ZEND_PARSE_PARAMETERS_START(2, -1)
5610 Z_PARAM_STRING(str, str_len)
5611 Z_PARAM_STRING(format, format_len)
5612 Z_PARAM_VARIADIC('*', args, num_args)
5613 ZEND_PARSE_PARAMETERS_END();
5614
5615 result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
5616
5617 if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
5618 WRONG_PARAM_COUNT;
5619 }
5620 }
5621 /* }}} */
5622
5623 /* static zend_string *php_str_rot13(zend_string *str) {{{ */
5624 #ifdef __SSE2__
5625 #include <emmintrin.h>
5626 #endif
5627 static zend_string *php_str_rot13(zend_string *str)
5628 {
5629 zend_string *ret;
5630 const char *p, *e;
5631 char *target;
5632
5633 if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
5634 return ZSTR_EMPTY_ALLOC();
5635 }
5636
5637 ret = zend_string_alloc(ZSTR_LEN(str), 0);
5638
5639 p = ZSTR_VAL(str);
5640 e = p + ZSTR_LEN(str);
5641 target = ZSTR_VAL(ret);
5642
5643 #ifdef __SSE2__
5644 if (e - p > 15) {
5645 const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
5646 const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
5647 const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
5648 const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
5649 const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
5650 const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
5651 const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
5652 const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
5653 const __m128i add = _mm_set1_epi8(13);
5654 const __m128i sub = _mm_set1_epi8(-13);
5655
5656 do {
5657 __m128i in, gt, lt, cmp, delta;
5658
5659 delta = _mm_setzero_si128();
5660 in = _mm_loadu_si128((__m128i *)p);
5661
5662 gt = _mm_cmpgt_epi8(in, a_minus_1);
5663 lt = _mm_cmplt_epi8(in, m_plus_1);
5664 cmp = _mm_and_si128(lt, gt);
5665 if (_mm_movemask_epi8(cmp)) {
5666 cmp = _mm_and_si128(cmp, add);
5667 delta = _mm_or_si128(delta, cmp);
5668 }
5669
5670 gt = _mm_cmpgt_epi8(in, n_minus_1);
5671 lt = _mm_cmplt_epi8(in, z_plus_1);
5672 cmp = _mm_and_si128(lt, gt);
5673 if (_mm_movemask_epi8(cmp)) {
5674 cmp = _mm_and_si128(cmp, sub);
5675 delta = _mm_or_si128(delta, cmp);
5676 }
5677
5678 gt = _mm_cmpgt_epi8(in, A_minus_1);
5679 lt = _mm_cmplt_epi8(in, M_plus_1);
5680 cmp = _mm_and_si128(lt, gt);
5681 if (_mm_movemask_epi8(cmp)) {
5682 cmp = _mm_and_si128(cmp, add);
5683 delta = _mm_or_si128(delta, cmp);
5684 }
5685
5686 gt = _mm_cmpgt_epi8(in, N_minus_1);
5687 lt = _mm_cmplt_epi8(in, Z_plus_1);
5688 cmp = _mm_and_si128(lt, gt);
5689 if (_mm_movemask_epi8(cmp)) {
5690 cmp = _mm_and_si128(cmp, sub);
5691 delta = _mm_or_si128(delta, cmp);
5692 }
5693
5694 in = _mm_add_epi8(in, delta);
5695 _mm_storeu_si128((__m128i *)target, in);
5696
5697 p += 16;
5698 target += 16;
5699 } while (e - p > 15);
5700 }
5701 #endif
5702
5703 while (p < e) {
5704 if (*p >= 'a' && *p <= 'z') {
5705 *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
5706 } else if (*p >= 'A' && *p <= 'Z') {
5707 *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
5708 } else {
5709 *target++ = *p++;
5710 }
5711 }
5712
5713 *target = '\0';
5714
5715 return ret;
5716 }
5717 /* }}} */
5718
5719 /* {{{ Perform the rot13 transform on a string */
5720 PHP_FUNCTION(str_rot13)
5721 {
5722 zend_string *arg;
5723
5724 ZEND_PARSE_PARAMETERS_START(1, 1)
5725 Z_PARAM_STR(arg)
5726 ZEND_PARSE_PARAMETERS_END();
5727
5728 RETURN_STR(php_str_rot13(arg));
5729 }
5730 /* }}} */
5731
5732 static void php_string_shuffle(char *str, zend_long len) /* {{{ */
5733 {
5734 zend_long n_elems, rnd_idx, n_left;
5735 char temp;
5736 /* The implementation is stolen from array_data_shuffle */
5737 /* Thus the characteristics of the randomization are the same */
5738 n_elems = len;
5739
5740 if (n_elems <= 1) {
5741 return;
5742 }
5743
5744 n_left = n_elems;
5745
5746 while (--n_left) {
5747 rnd_idx = php_mt_rand_range(0, n_left);
5748 if (rnd_idx != n_left) {
5749 temp = str[n_left];
5750 str[n_left] = str[rnd_idx];
5751 str[rnd_idx] = temp;
5752 }
5753 }
5754 }
5755 /* }}} */
5756
5757 /* {{{ Shuffles string. One permutation of all possible is created */
5758 PHP_FUNCTION(str_shuffle)
5759 {
5760 zend_string *arg;
5761
5762 ZEND_PARSE_PARAMETERS_START(1, 1)
5763 Z_PARAM_STR(arg)
5764 ZEND_PARSE_PARAMETERS_END();
5765
5766 RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
5767 if (Z_STRLEN_P(return_value) > 1) {
5768 php_string_shuffle(Z_STRVAL_P(return_value), (zend_long) Z_STRLEN_P(return_value));
5769 }
5770 }
5771 /* }}} */
5772
5773 /* {{{ Counts the number of words inside a string. If format of 1 is specified,
5774 then the function will return an array containing all the words
5775 found inside the string. If format of 2 is specified, then the function
5776 will return an associated array where the position of the word is the key
5777 and the word itself is the value.
5778 For the purpose of this function, 'word' is defined as a locale dependent
5779 string containing alphabetic characters, which also may contain, but not start
5780 with "'" and "-" characters.
5781 */
5782 PHP_FUNCTION(str_word_count)
5783 {
5784 zend_string *str;
5785 char *char_list = NULL, ch[256];
5786 const char *p, *e, *s;
5787 size_t char_list_len = 0, word_count = 0;
5788 zend_long type = 0;
5789
5790 ZEND_PARSE_PARAMETERS_START(1, 3)
5791 Z_PARAM_STR(str)
5792 Z_PARAM_OPTIONAL
5793 Z_PARAM_LONG(type)
5794 Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
5795 ZEND_PARSE_PARAMETERS_END();
5796
5797 switch(type) {
5798 case 1:
5799 case 2:
5800 array_init(return_value);
5801 if (!ZSTR_LEN(str)) {
5802 return;
5803 }
5804 break;
5805 case 0:
5806 if (!ZSTR_LEN(str)) {
5807 RETURN_LONG(0);
5808 }
5809 /* nothing to be done */
5810 break;
5811 default:
5812 zend_argument_value_error(2, "must be a valid format value");
5813 RETURN_THROWS();
5814 }
5815
5816 if (char_list) {
5817 php_charmask((const unsigned char *) char_list, char_list_len, ch);
5818 }
5819
5820 p = ZSTR_VAL(str);
5821 e = ZSTR_VAL(str) + ZSTR_LEN(str);
5822
5823 /* first character cannot be ' or -, unless explicitly allowed by the user */
5824 if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
5825 p++;
5826 }
5827 /* last character cannot be -, unless explicitly allowed by the user */
5828 if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
5829 e--;
5830 }
5831
5832 while (p < e) {
5833 s = p;
5834 while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
5835 p++;
5836 }
5837 if (p > s) {
5838 switch (type)
5839 {
5840 case 1:
5841 add_next_index_stringl(return_value, s, p - s);
5842 break;
5843 case 2:
5844 add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
5845 break;
5846 default:
5847 word_count++;
5848 break;
5849 }
5850 }
5851 p++;
5852 }
5853
5854 if (!type) {
5855 RETURN_LONG(word_count);
5856 }
5857 }
5858
5859 /* }}} */
5860
5861 /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
5862 PHP_FUNCTION(str_split)
5863 {
5864 zend_string *str;
5865 zend_long split_length = 1;
5866 const char *p;
5867 size_t n_reg_segments;
5868
5869 ZEND_PARSE_PARAMETERS_START(1, 2)
5870 Z_PARAM_STR(str)
5871 Z_PARAM_OPTIONAL
5872 Z_PARAM_LONG(split_length)
5873 ZEND_PARSE_PARAMETERS_END();
5874
5875 if (split_length <= 0) {
5876 zend_argument_value_error(2, "must be greater than 0");
5877 RETURN_THROWS();
5878 }
5879
5880 if (0 == ZSTR_LEN(str) || (size_t)split_length >= ZSTR_LEN(str)) {
5881 array_init_size(return_value, 1);
5882 add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
5883 return;
5884 }
5885
5886 array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
5887
5888 n_reg_segments = ZSTR_LEN(str) / split_length;
5889 p = ZSTR_VAL(str);
5890
5891 while (n_reg_segments-- > 0) {
5892 add_next_index_stringl(return_value, p, split_length);
5893 p += split_length;
5894 }
5895
5896 if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
5897 add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
5898 }
5899 }
5900 /* }}} */
5901
5902 /* {{{ Search a string for any of a set of characters */
5903 PHP_FUNCTION(strpbrk)
5904 {
5905 zend_string *haystack, *char_list;
5906 const char *haystack_ptr, *cl_ptr;
5907
5908 ZEND_PARSE_PARAMETERS_START(2, 2)
5909 Z_PARAM_STR(haystack)
5910 Z_PARAM_STR(char_list)
5911 ZEND_PARSE_PARAMETERS_END();
5912
5913 if (!ZSTR_LEN(char_list)) {
5914 zend_argument_value_error(2, "must be a non-empty string");
5915 RETURN_THROWS();
5916 }
5917
5918 for (haystack_ptr = ZSTR_VAL(haystack); haystack_ptr < (ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); ++haystack_ptr) {
5919 for (cl_ptr = ZSTR_VAL(char_list); cl_ptr < (ZSTR_VAL(char_list) + ZSTR_LEN(char_list)); ++cl_ptr) {
5920 if (*cl_ptr == *haystack_ptr) {
5921 RETURN_STRINGL(haystack_ptr, (ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - haystack_ptr));
5922 }
5923 }
5924 }
5925
5926 RETURN_FALSE;
5927 }
5928 /* }}} */
5929
5930 /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
5931 PHP_FUNCTION(substr_compare)
5932 {
5933 zend_string *s1, *s2;
5934 zend_long offset, len=0;
5935 bool len_is_default=1;
5936 bool cs=0;
5937 size_t cmp_len;
5938
5939 ZEND_PARSE_PARAMETERS_START(3, 5)
5940 Z_PARAM_STR(s1)
5941 Z_PARAM_STR(s2)
5942 Z_PARAM_LONG(offset)
5943 Z_PARAM_OPTIONAL
5944 Z_PARAM_LONG_OR_NULL(len, len_is_default)
5945 Z_PARAM_BOOL(cs)
5946 ZEND_PARSE_PARAMETERS_END();
5947
5948 if (!len_is_default && len <= 0) {
5949 if (len == 0) {
5950 RETURN_LONG(0L);
5951 } else {
5952 zend_argument_value_error(4, "must be greater than or equal to 0");
5953 RETURN_THROWS();
5954 }
5955 }
5956
5957 if (offset < 0) {
5958 offset = ZSTR_LEN(s1) + offset;
5959 offset = (offset < 0) ? 0 : offset;
5960 }
5961
5962 if ((size_t)offset > ZSTR_LEN(s1)) {
5963 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5964 RETURN_THROWS();
5965 }
5966
5967 cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
5968
5969 if (!cs) {
5970 RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5971 } else {
5972 RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5973 }
5974 }
5975 /* }}} */
5976
5977 /* {{{ */
5978 static zend_string *php_utf8_encode(const char *s, size_t len)
5979 {
5980 size_t pos = len;
5981 zend_string *str;
5982 unsigned char c;
5983
5984 str = zend_string_safe_alloc(len, 2, 0, 0);
5985 ZSTR_LEN(str) = 0;
5986 while (pos > 0) {
5987 /* The lower 256 codepoints of Unicode are identical to Latin-1,
5988 * so we don't need to do any mapping here. */
5989 c = (unsigned char)(*s);
5990 if (c < 0x80) {
5991 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
5992 /* We only account for the single-byte and two-byte cases because
5993 * we're only dealing with the first 256 Unicode codepoints. */
5994 } else {
5995 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
5996 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
5997 }
5998 pos--;
5999 s++;
6000 }
6001 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6002 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6003 return str;
6004 }
6005 /* }}} */
6006
6007 /* {{{ */
6008 static zend_string *php_utf8_decode(const char *s, size_t len)
6009 {
6010 size_t pos = 0;
6011 unsigned int c;
6012 zend_string *str;
6013
6014 str = zend_string_alloc(len, 0);
6015 ZSTR_LEN(str) = 0;
6016 while (pos < len) {
6017 int status = FAILURE;
6018 c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
6019
6020 /* The lower 256 codepoints of Unicode are identical to Latin-1,
6021 * so we don't need to do any mapping here beyond replacing non-Latin-1
6022 * characters. */
6023 if (status == FAILURE || c > 0xFFU) {
6024 c = '?';
6025 }
6026
6027 ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
6028 }
6029 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
6030 if (ZSTR_LEN(str) < len) {
6031 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
6032 }
6033
6034 return str;
6035 }
6036 /* }}} */
6037
6038 /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
6039 PHP_FUNCTION(utf8_encode)
6040 {
6041 char *arg;
6042 size_t arg_len;
6043
6044 ZEND_PARSE_PARAMETERS_START(1, 1)
6045 Z_PARAM_STRING(arg, arg_len)
6046 ZEND_PARSE_PARAMETERS_END();
6047
6048 RETURN_STR(php_utf8_encode(arg, arg_len));
6049 }
6050 /* }}} */
6051
6052 /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
6053 PHP_FUNCTION(utf8_decode)
6054 {
6055 char *arg;
6056 size_t arg_len;
6057
6058 ZEND_PARSE_PARAMETERS_START(1, 1)
6059 Z_PARAM_STRING(arg, arg_len)
6060 ZEND_PARSE_PARAMETERS_END();
6061
6062 RETURN_STR(php_utf8_decode(arg, arg_len));
6063 }
6064 /* }}} */
6065