1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Rasmus Lerdorf <rasmus@php.net> |
14 | Stig Sæther Bakken <ssb@php.net> |
15 | Zeev Suraski <zeev@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include <stdio.h>
20 #include "php.h"
21 #include "php_rand.h"
22 #include "php_string.h"
23 #include "php_variables.h"
24 #include <locale.h>
25 #ifdef HAVE_LANGINFO_H
26 # include <langinfo.h>
27 #endif
28
29 #ifdef HAVE_LIBINTL
30 # include <libintl.h> /* For LC_MESSAGES */
31 #endif
32
33 #include "scanf.h"
34 #include "zend_API.h"
35 #include "zend_execute.h"
36 #include "php_globals.h"
37 #include "basic_functions.h"
38 #include "zend_smart_str.h"
39 #include <Zend/zend_exceptions.h>
40 #ifdef ZTS
41 #include "TSRM.h"
42 #endif
43
44 /* For str_getcsv() support */
45 #include "ext/standard/file.h"
46 /* For php_next_utf8_char() */
47 #include "ext/standard/html.h"
48
49 #define STR_PAD_LEFT 0
50 #define STR_PAD_RIGHT 1
51 #define STR_PAD_BOTH 2
52 #define PHP_PATHINFO_DIRNAME 1
53 #define PHP_PATHINFO_BASENAME 2
54 #define PHP_PATHINFO_EXTENSION 4
55 #define PHP_PATHINFO_FILENAME 8
56 #define PHP_PATHINFO_ALL (PHP_PATHINFO_DIRNAME | PHP_PATHINFO_BASENAME | PHP_PATHINFO_EXTENSION | PHP_PATHINFO_FILENAME)
57
58 #define STR_STRSPN 0
59 #define STR_STRCSPN 1
60
61 /* {{{ register_string_constants */
register_string_constants(INIT_FUNC_ARGS)62 void register_string_constants(INIT_FUNC_ARGS)
63 {
64 REGISTER_LONG_CONSTANT("STR_PAD_LEFT", STR_PAD_LEFT, CONST_CS | CONST_PERSISTENT);
65 REGISTER_LONG_CONSTANT("STR_PAD_RIGHT", STR_PAD_RIGHT, CONST_CS | CONST_PERSISTENT);
66 REGISTER_LONG_CONSTANT("STR_PAD_BOTH", STR_PAD_BOTH, CONST_CS | CONST_PERSISTENT);
67 REGISTER_LONG_CONSTANT("PATHINFO_DIRNAME", PHP_PATHINFO_DIRNAME, CONST_CS | CONST_PERSISTENT);
68 REGISTER_LONG_CONSTANT("PATHINFO_BASENAME", PHP_PATHINFO_BASENAME, CONST_CS | CONST_PERSISTENT);
69 REGISTER_LONG_CONSTANT("PATHINFO_EXTENSION", PHP_PATHINFO_EXTENSION, CONST_CS | CONST_PERSISTENT);
70 REGISTER_LONG_CONSTANT("PATHINFO_FILENAME", PHP_PATHINFO_FILENAME, CONST_CS | CONST_PERSISTENT);
71 REGISTER_LONG_CONSTANT("PATHINFO_ALL", PHP_PATHINFO_ALL, CONST_CS | CONST_PERSISTENT);
72
73 /* If last members of struct lconv equal CHAR_MAX, no grouping is done */
74 REGISTER_LONG_CONSTANT("CHAR_MAX", CHAR_MAX, CONST_CS | CONST_PERSISTENT);
75 REGISTER_LONG_CONSTANT("LC_CTYPE", LC_CTYPE, CONST_CS | CONST_PERSISTENT);
76 REGISTER_LONG_CONSTANT("LC_NUMERIC", LC_NUMERIC, CONST_CS | CONST_PERSISTENT);
77 REGISTER_LONG_CONSTANT("LC_TIME", LC_TIME, CONST_CS | CONST_PERSISTENT);
78 REGISTER_LONG_CONSTANT("LC_COLLATE", LC_COLLATE, CONST_CS | CONST_PERSISTENT);
79 REGISTER_LONG_CONSTANT("LC_MONETARY", LC_MONETARY, CONST_CS | CONST_PERSISTENT);
80 REGISTER_LONG_CONSTANT("LC_ALL", LC_ALL, CONST_CS | CONST_PERSISTENT);
81 # ifdef LC_MESSAGES
82 REGISTER_LONG_CONSTANT("LC_MESSAGES", LC_MESSAGES, CONST_CS | CONST_PERSISTENT);
83 # endif
84
85 }
86 /* }}} */
87
88 int php_tag_find(char *tag, size_t len, const char *set);
89
90 /* this is read-only, so it's ok */
91 ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
92
93 /* localeconv mutex */
94 #ifdef ZTS
95 static MUTEX_T locale_mutex = NULL;
96 #endif
97
98 /* {{{ php_bin2hex */
php_bin2hex(const unsigned char * old,const size_t oldlen)99 static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
100 {
101 zend_string *result;
102 size_t i, j;
103
104 result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
105
106 for (i = j = 0; i < oldlen; i++) {
107 ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
108 ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
109 }
110 ZSTR_VAL(result)[j] = '\0';
111
112 return result;
113 }
114 /* }}} */
115
116 /* {{{ php_hex2bin */
php_hex2bin(const unsigned char * old,const size_t oldlen)117 static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
118 {
119 size_t target_length = oldlen >> 1;
120 zend_string *str = zend_string_alloc(target_length, 0);
121 unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
122 size_t i, j;
123
124 for (i = j = 0; i < target_length; i++) {
125 unsigned char c = old[j++];
126 unsigned char l = c & ~0x20;
127 int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
128 unsigned char d;
129
130 /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
131 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
132 d = (l - 0x10 - 0x27 * is_letter) << 4;
133 } else {
134 zend_string_efree(str);
135 return NULL;
136 }
137 c = old[j++];
138 l = c & ~0x20;
139 is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
140 if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
141 d |= l - 0x10 - 0x27 * is_letter;
142 } else {
143 zend_string_efree(str);
144 return NULL;
145 }
146 ret[i] = d;
147 }
148 ret[i] = '\0';
149
150 return str;
151 }
152 /* }}} */
153
154 /* {{{ localeconv_r
155 * glibc's localeconv is not reentrant, so lets make it so ... sorta */
localeconv_r(struct lconv * out)156 PHPAPI struct lconv *localeconv_r(struct lconv *out)
157 {
158
159 #ifdef ZTS
160 tsrm_mutex_lock( locale_mutex );
161 #endif
162
163 /* cur->locinfo is struct __crt_locale_info which implementation is
164 hidden in vc14. TODO revisit this and check if a workaround available
165 and needed. */
166 #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
167 {
168 /* Even with the enabled per thread locale, localeconv
169 won't check any locale change in the master thread. */
170 _locale_t cur = _get_current_locale();
171 *out = *cur->locinfo->lconv;
172 _free_locale(cur);
173 }
174 #else
175 /* localeconv doesn't return an error condition */
176 *out = *localeconv();
177 #endif
178
179 #ifdef ZTS
180 tsrm_mutex_unlock( locale_mutex );
181 #endif
182
183 return out;
184 }
185 /* }}} */
186
187 #ifdef ZTS
188 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(localeconv)189 PHP_MINIT_FUNCTION(localeconv)
190 {
191 locale_mutex = tsrm_mutex_alloc();
192 return SUCCESS;
193 }
194 /* }}} */
195
196 /* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(localeconv)197 PHP_MSHUTDOWN_FUNCTION(localeconv)
198 {
199 tsrm_mutex_free( locale_mutex );
200 locale_mutex = NULL;
201 return SUCCESS;
202 }
203 /* }}} */
204 #endif
205
206 /* {{{ Converts the binary representation of data to hex */
PHP_FUNCTION(bin2hex)207 PHP_FUNCTION(bin2hex)
208 {
209 zend_string *result;
210 zend_string *data;
211
212 ZEND_PARSE_PARAMETERS_START(1, 1)
213 Z_PARAM_STR(data)
214 ZEND_PARSE_PARAMETERS_END();
215
216 result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
217
218 RETURN_STR(result);
219 }
220 /* }}} */
221
222 /* {{{ Converts the hex representation of data to binary */
PHP_FUNCTION(hex2bin)223 PHP_FUNCTION(hex2bin)
224 {
225 zend_string *result, *data;
226
227 ZEND_PARSE_PARAMETERS_START(1, 1)
228 Z_PARAM_STR(data)
229 ZEND_PARSE_PARAMETERS_END();
230
231 if (ZSTR_LEN(data) % 2 != 0) {
232 php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
233 RETURN_FALSE;
234 }
235
236 result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
237
238 if (!result) {
239 php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
240 RETURN_FALSE;
241 }
242
243 RETVAL_STR(result);
244 }
245 /* }}} */
246
php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS,int behavior)247 static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
248 {
249 zend_string *s11, *s22;
250 zend_long start = 0, len = 0;
251 zend_bool len_is_null = 1;
252
253 ZEND_PARSE_PARAMETERS_START(2, 4)
254 Z_PARAM_STR(s11)
255 Z_PARAM_STR(s22)
256 Z_PARAM_OPTIONAL
257 Z_PARAM_LONG(start)
258 Z_PARAM_LONG_OR_NULL(len, len_is_null)
259 ZEND_PARSE_PARAMETERS_END();
260
261 size_t remain_len = ZSTR_LEN(s11);
262 if (start < 0) {
263 start += remain_len;
264 if (start < 0) {
265 start = 0;
266 }
267 } else if ((size_t) start > remain_len) {
268 start = remain_len;
269 }
270
271 remain_len -= start;
272 if (!len_is_null) {
273 if (len < 0) {
274 len += remain_len;
275 if (len < 0) {
276 len = 0;
277 }
278 } else if ((size_t) len > remain_len) {
279 len = remain_len;
280 }
281 } else {
282 len = remain_len;
283 }
284
285 if (len == 0) {
286 RETURN_LONG(0);
287 }
288
289 if (behavior == STR_STRSPN) {
290 RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
291 ZSTR_VAL(s22) /*str2_start*/,
292 ZSTR_VAL(s11) + start + len /*str1_end*/,
293 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
294 } else {
295 ZEND_ASSERT(behavior == STR_STRCSPN);
296 RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
297 ZSTR_VAL(s22) /*str2_start*/,
298 ZSTR_VAL(s11) + start + len /*str1_end*/,
299 ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
300 }
301 }
302 /* }}} */
303
304 /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
PHP_FUNCTION(strspn)305 PHP_FUNCTION(strspn)
306 {
307 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRSPN);
308 }
309 /* }}} */
310
311 /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
PHP_FUNCTION(strcspn)312 PHP_FUNCTION(strcspn)
313 {
314 php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRCSPN);
315 }
316 /* }}} */
317
318 /* {{{ PHP_MINIT_FUNCTION(nl_langinfo) */
319 #if HAVE_NL_LANGINFO
PHP_MINIT_FUNCTION(nl_langinfo)320 PHP_MINIT_FUNCTION(nl_langinfo)
321 {
322 #define REGISTER_NL_LANGINFO_CONSTANT(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS | CONST_PERSISTENT)
323 #ifdef ABDAY_1
324 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_1);
325 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_2);
326 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_3);
327 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_4);
328 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_5);
329 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_6);
330 REGISTER_NL_LANGINFO_CONSTANT(ABDAY_7);
331 #endif
332 #ifdef DAY_1
333 REGISTER_NL_LANGINFO_CONSTANT(DAY_1);
334 REGISTER_NL_LANGINFO_CONSTANT(DAY_2);
335 REGISTER_NL_LANGINFO_CONSTANT(DAY_3);
336 REGISTER_NL_LANGINFO_CONSTANT(DAY_4);
337 REGISTER_NL_LANGINFO_CONSTANT(DAY_5);
338 REGISTER_NL_LANGINFO_CONSTANT(DAY_6);
339 REGISTER_NL_LANGINFO_CONSTANT(DAY_7);
340 #endif
341 #ifdef ABMON_1
342 REGISTER_NL_LANGINFO_CONSTANT(ABMON_1);
343 REGISTER_NL_LANGINFO_CONSTANT(ABMON_2);
344 REGISTER_NL_LANGINFO_CONSTANT(ABMON_3);
345 REGISTER_NL_LANGINFO_CONSTANT(ABMON_4);
346 REGISTER_NL_LANGINFO_CONSTANT(ABMON_5);
347 REGISTER_NL_LANGINFO_CONSTANT(ABMON_6);
348 REGISTER_NL_LANGINFO_CONSTANT(ABMON_7);
349 REGISTER_NL_LANGINFO_CONSTANT(ABMON_8);
350 REGISTER_NL_LANGINFO_CONSTANT(ABMON_9);
351 REGISTER_NL_LANGINFO_CONSTANT(ABMON_10);
352 REGISTER_NL_LANGINFO_CONSTANT(ABMON_11);
353 REGISTER_NL_LANGINFO_CONSTANT(ABMON_12);
354 #endif
355 #ifdef MON_1
356 REGISTER_NL_LANGINFO_CONSTANT(MON_1);
357 REGISTER_NL_LANGINFO_CONSTANT(MON_2);
358 REGISTER_NL_LANGINFO_CONSTANT(MON_3);
359 REGISTER_NL_LANGINFO_CONSTANT(MON_4);
360 REGISTER_NL_LANGINFO_CONSTANT(MON_5);
361 REGISTER_NL_LANGINFO_CONSTANT(MON_6);
362 REGISTER_NL_LANGINFO_CONSTANT(MON_7);
363 REGISTER_NL_LANGINFO_CONSTANT(MON_8);
364 REGISTER_NL_LANGINFO_CONSTANT(MON_9);
365 REGISTER_NL_LANGINFO_CONSTANT(MON_10);
366 REGISTER_NL_LANGINFO_CONSTANT(MON_11);
367 REGISTER_NL_LANGINFO_CONSTANT(MON_12);
368 #endif
369 #ifdef AM_STR
370 REGISTER_NL_LANGINFO_CONSTANT(AM_STR);
371 #endif
372 #ifdef PM_STR
373 REGISTER_NL_LANGINFO_CONSTANT(PM_STR);
374 #endif
375 #ifdef D_T_FMT
376 REGISTER_NL_LANGINFO_CONSTANT(D_T_FMT);
377 #endif
378 #ifdef D_FMT
379 REGISTER_NL_LANGINFO_CONSTANT(D_FMT);
380 #endif
381 #ifdef T_FMT
382 REGISTER_NL_LANGINFO_CONSTANT(T_FMT);
383 #endif
384 #ifdef T_FMT_AMPM
385 REGISTER_NL_LANGINFO_CONSTANT(T_FMT_AMPM);
386 #endif
387 #ifdef ERA
388 REGISTER_NL_LANGINFO_CONSTANT(ERA);
389 #endif
390 #ifdef ERA_YEAR
391 REGISTER_NL_LANGINFO_CONSTANT(ERA_YEAR);
392 #endif
393 #ifdef ERA_D_T_FMT
394 REGISTER_NL_LANGINFO_CONSTANT(ERA_D_T_FMT);
395 #endif
396 #ifdef ERA_D_FMT
397 REGISTER_NL_LANGINFO_CONSTANT(ERA_D_FMT);
398 #endif
399 #ifdef ERA_T_FMT
400 REGISTER_NL_LANGINFO_CONSTANT(ERA_T_FMT);
401 #endif
402 #ifdef ALT_DIGITS
403 REGISTER_NL_LANGINFO_CONSTANT(ALT_DIGITS);
404 #endif
405 #ifdef INT_CURR_SYMBOL
406 REGISTER_NL_LANGINFO_CONSTANT(INT_CURR_SYMBOL);
407 #endif
408 #ifdef CURRENCY_SYMBOL
409 REGISTER_NL_LANGINFO_CONSTANT(CURRENCY_SYMBOL);
410 #endif
411 #ifdef CRNCYSTR
412 REGISTER_NL_LANGINFO_CONSTANT(CRNCYSTR);
413 #endif
414 #ifdef MON_DECIMAL_POINT
415 REGISTER_NL_LANGINFO_CONSTANT(MON_DECIMAL_POINT);
416 #endif
417 #ifdef MON_THOUSANDS_SEP
418 REGISTER_NL_LANGINFO_CONSTANT(MON_THOUSANDS_SEP);
419 #endif
420 #ifdef MON_GROUPING
421 REGISTER_NL_LANGINFO_CONSTANT(MON_GROUPING);
422 #endif
423 #ifdef POSITIVE_SIGN
424 REGISTER_NL_LANGINFO_CONSTANT(POSITIVE_SIGN);
425 #endif
426 #ifdef NEGATIVE_SIGN
427 REGISTER_NL_LANGINFO_CONSTANT(NEGATIVE_SIGN);
428 #endif
429 #ifdef INT_FRAC_DIGITS
430 REGISTER_NL_LANGINFO_CONSTANT(INT_FRAC_DIGITS);
431 #endif
432 #ifdef FRAC_DIGITS
433 REGISTER_NL_LANGINFO_CONSTANT(FRAC_DIGITS);
434 #endif
435 #ifdef P_CS_PRECEDES
436 REGISTER_NL_LANGINFO_CONSTANT(P_CS_PRECEDES);
437 #endif
438 #ifdef P_SEP_BY_SPACE
439 REGISTER_NL_LANGINFO_CONSTANT(P_SEP_BY_SPACE);
440 #endif
441 #ifdef N_CS_PRECEDES
442 REGISTER_NL_LANGINFO_CONSTANT(N_CS_PRECEDES);
443 #endif
444 #ifdef N_SEP_BY_SPACE
445 REGISTER_NL_LANGINFO_CONSTANT(N_SEP_BY_SPACE);
446 #endif
447 #ifdef P_SIGN_POSN
448 REGISTER_NL_LANGINFO_CONSTANT(P_SIGN_POSN);
449 #endif
450 #ifdef N_SIGN_POSN
451 REGISTER_NL_LANGINFO_CONSTANT(N_SIGN_POSN);
452 #endif
453 #ifdef DECIMAL_POINT
454 REGISTER_NL_LANGINFO_CONSTANT(DECIMAL_POINT);
455 #endif
456 #ifdef RADIXCHAR
457 REGISTER_NL_LANGINFO_CONSTANT(RADIXCHAR);
458 #endif
459 #ifdef THOUSANDS_SEP
460 REGISTER_NL_LANGINFO_CONSTANT(THOUSANDS_SEP);
461 #endif
462 #ifdef THOUSEP
463 REGISTER_NL_LANGINFO_CONSTANT(THOUSEP);
464 #endif
465 #ifdef GROUPING
466 REGISTER_NL_LANGINFO_CONSTANT(GROUPING);
467 #endif
468 #ifdef YESEXPR
469 REGISTER_NL_LANGINFO_CONSTANT(YESEXPR);
470 #endif
471 #ifdef NOEXPR
472 REGISTER_NL_LANGINFO_CONSTANT(NOEXPR);
473 #endif
474 #ifdef YESSTR
475 REGISTER_NL_LANGINFO_CONSTANT(YESSTR);
476 #endif
477 #ifdef NOSTR
478 REGISTER_NL_LANGINFO_CONSTANT(NOSTR);
479 #endif
480 #ifdef CODESET
481 REGISTER_NL_LANGINFO_CONSTANT(CODESET);
482 #endif
483 #undef REGISTER_NL_LANGINFO_CONSTANT
484 return SUCCESS;
485 }
486 /* }}} */
487
488 /* {{{ Query language and locale information */
PHP_FUNCTION(nl_langinfo)489 PHP_FUNCTION(nl_langinfo)
490 {
491 zend_long item;
492 char *value;
493
494 ZEND_PARSE_PARAMETERS_START(1, 1)
495 Z_PARAM_LONG(item)
496 ZEND_PARSE_PARAMETERS_END();
497
498 switch(item) { /* {{{ */
499 #ifdef ABDAY_1
500 case ABDAY_1:
501 case ABDAY_2:
502 case ABDAY_3:
503 case ABDAY_4:
504 case ABDAY_5:
505 case ABDAY_6:
506 case ABDAY_7:
507 #endif
508 #ifdef DAY_1
509 case DAY_1:
510 case DAY_2:
511 case DAY_3:
512 case DAY_4:
513 case DAY_5:
514 case DAY_6:
515 case DAY_7:
516 #endif
517 #ifdef ABMON_1
518 case ABMON_1:
519 case ABMON_2:
520 case ABMON_3:
521 case ABMON_4:
522 case ABMON_5:
523 case ABMON_6:
524 case ABMON_7:
525 case ABMON_8:
526 case ABMON_9:
527 case ABMON_10:
528 case ABMON_11:
529 case ABMON_12:
530 #endif
531 #ifdef MON_1
532 case MON_1:
533 case MON_2:
534 case MON_3:
535 case MON_4:
536 case MON_5:
537 case MON_6:
538 case MON_7:
539 case MON_8:
540 case MON_9:
541 case MON_10:
542 case MON_11:
543 case MON_12:
544 #endif
545 #ifdef AM_STR
546 case AM_STR:
547 #endif
548 #ifdef PM_STR
549 case PM_STR:
550 #endif
551 #ifdef D_T_FMT
552 case D_T_FMT:
553 #endif
554 #ifdef D_FMT
555 case D_FMT:
556 #endif
557 #ifdef T_FMT
558 case T_FMT:
559 #endif
560 #ifdef T_FMT_AMPM
561 case T_FMT_AMPM:
562 #endif
563 #ifdef ERA
564 case ERA:
565 #endif
566 #ifdef ERA_YEAR
567 case ERA_YEAR:
568 #endif
569 #ifdef ERA_D_T_FMT
570 case ERA_D_T_FMT:
571 #endif
572 #ifdef ERA_D_FMT
573 case ERA_D_FMT:
574 #endif
575 #ifdef ERA_T_FMT
576 case ERA_T_FMT:
577 #endif
578 #ifdef ALT_DIGITS
579 case ALT_DIGITS:
580 #endif
581 #ifdef INT_CURR_SYMBOL
582 case INT_CURR_SYMBOL:
583 #endif
584 #ifdef CURRENCY_SYMBOL
585 case CURRENCY_SYMBOL:
586 #endif
587 #ifdef CRNCYSTR
588 case CRNCYSTR:
589 #endif
590 #ifdef MON_DECIMAL_POINT
591 case MON_DECIMAL_POINT:
592 #endif
593 #ifdef MON_THOUSANDS_SEP
594 case MON_THOUSANDS_SEP:
595 #endif
596 #ifdef MON_GROUPING
597 case MON_GROUPING:
598 #endif
599 #ifdef POSITIVE_SIGN
600 case POSITIVE_SIGN:
601 #endif
602 #ifdef NEGATIVE_SIGN
603 case NEGATIVE_SIGN:
604 #endif
605 #ifdef INT_FRAC_DIGITS
606 case INT_FRAC_DIGITS:
607 #endif
608 #ifdef FRAC_DIGITS
609 case FRAC_DIGITS:
610 #endif
611 #ifdef P_CS_PRECEDES
612 case P_CS_PRECEDES:
613 #endif
614 #ifdef P_SEP_BY_SPACE
615 case P_SEP_BY_SPACE:
616 #endif
617 #ifdef N_CS_PRECEDES
618 case N_CS_PRECEDES:
619 #endif
620 #ifdef N_SEP_BY_SPACE
621 case N_SEP_BY_SPACE:
622 #endif
623 #ifdef P_SIGN_POSN
624 case P_SIGN_POSN:
625 #endif
626 #ifdef N_SIGN_POSN
627 case N_SIGN_POSN:
628 #endif
629 #ifdef DECIMAL_POINT
630 case DECIMAL_POINT:
631 #elif defined(RADIXCHAR)
632 case RADIXCHAR:
633 #endif
634 #ifdef THOUSANDS_SEP
635 case THOUSANDS_SEP:
636 #elif defined(THOUSEP)
637 case THOUSEP:
638 #endif
639 #ifdef GROUPING
640 case GROUPING:
641 #endif
642 #ifdef YESEXPR
643 case YESEXPR:
644 #endif
645 #ifdef NOEXPR
646 case NOEXPR:
647 #endif
648 #ifdef YESSTR
649 case YESSTR:
650 #endif
651 #ifdef NOSTR
652 case NOSTR:
653 #endif
654 #ifdef CODESET
655 case CODESET:
656 #endif
657 break;
658 default:
659 php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
660 RETURN_FALSE;
661 }
662 /* }}} */
663
664 value = nl_langinfo(item);
665 if (value == NULL) {
666 RETURN_FALSE;
667 } else {
668 RETURN_STRING(value);
669 }
670 }
671 #endif
672 /* }}} */
673
674 /* {{{ Compares two strings using the current locale */
PHP_FUNCTION(strcoll)675 PHP_FUNCTION(strcoll)
676 {
677 zend_string *s1, *s2;
678
679 ZEND_PARSE_PARAMETERS_START(2, 2)
680 Z_PARAM_STR(s1)
681 Z_PARAM_STR(s2)
682 ZEND_PARSE_PARAMETERS_END();
683
684 RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
685 (const char *) ZSTR_VAL(s2)));
686 }
687 /* }}} */
688
689 /* {{{ php_charmask
690 * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
691 * it needs to be incrementing.
692 * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
693 */
php_charmask(const unsigned char * input,size_t len,char * mask)694 static inline int php_charmask(const unsigned char *input, size_t len, char *mask)
695 {
696 const unsigned char *end;
697 unsigned char c;
698 int result = SUCCESS;
699
700 memset(mask, 0, 256);
701 for (end = input+len; input < end; input++) {
702 c=*input;
703 if ((input+3 < end) && input[1] == '.' && input[2] == '.'
704 && input[3] >= c) {
705 memset(mask+c, 1, input[3] - c + 1);
706 input+=3;
707 } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
708 /* Error, try to be as helpful as possible:
709 (a range ending/starting with '.' won't be captured here) */
710 if (end-len >= input) { /* there was no 'left' char */
711 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
712 result = FAILURE;
713 continue;
714 }
715 if (input+2 >= end) { /* there is no 'right' char */
716 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
717 result = FAILURE;
718 continue;
719 }
720 if (input[-1] > input[2]) { /* wrong order */
721 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
722 result = FAILURE;
723 continue;
724 }
725 /* FIXME: better error (a..b..c is the only left possibility?) */
726 php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
727 result = FAILURE;
728 continue;
729 } else {
730 mask[c]=1;
731 }
732 }
733 return result;
734 }
735 /* }}} */
736
737 /* {{{ php_trim_int()
738 * mode 1 : trim left
739 * mode 2 : trim right
740 * mode 3 : trim left and right
741 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
742 */
php_trim_int(zend_string * str,const char * what,size_t what_len,int mode)743 static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
744 {
745 const char *start = ZSTR_VAL(str);
746 const char *end = start + ZSTR_LEN(str);
747 char mask[256];
748
749 if (what) {
750 if (what_len == 1) {
751 char p = *what;
752 if (mode & 1) {
753 while (start != end) {
754 if (*start == p) {
755 start++;
756 } else {
757 break;
758 }
759 }
760 }
761 if (mode & 2) {
762 while (start != end) {
763 if (*(end-1) == p) {
764 end--;
765 } else {
766 break;
767 }
768 }
769 }
770 } else {
771 php_charmask((const unsigned char *) what, what_len, mask);
772
773 if (mode & 1) {
774 while (start != end) {
775 if (mask[(unsigned char)*start]) {
776 start++;
777 } else {
778 break;
779 }
780 }
781 }
782 if (mode & 2) {
783 while (start != end) {
784 if (mask[(unsigned char)*(end-1)]) {
785 end--;
786 } else {
787 break;
788 }
789 }
790 }
791 }
792 } else {
793 if (mode & 1) {
794 while (start != end) {
795 unsigned char c = (unsigned char)*start;
796
797 if (c <= ' ' &&
798 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
799 start++;
800 } else {
801 break;
802 }
803 }
804 }
805 if (mode & 2) {
806 while (start != end) {
807 unsigned char c = (unsigned char)*(end-1);
808
809 if (c <= ' ' &&
810 (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
811 end--;
812 } else {
813 break;
814 }
815 }
816 }
817 }
818
819 if (ZSTR_LEN(str) == end - start) {
820 return zend_string_copy(str);
821 } else if (end - start == 0) {
822 return ZSTR_EMPTY_ALLOC();
823 } else {
824 return zend_string_init(start, end - start, 0);
825 }
826 }
827 /* }}} */
828
829 /* {{{ php_trim_int()
830 * mode 1 : trim left
831 * mode 2 : trim right
832 * mode 3 : trim left and right
833 * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
834 */
php_trim(zend_string * str,const char * what,size_t what_len,int mode)835 PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
836 {
837 return php_trim_int(str, what, what_len, mode);
838 }
839 /* }}} */
840
841 /* {{{ php_do_trim
842 * Base for trim(), rtrim() and ltrim() functions.
843 */
php_do_trim(INTERNAL_FUNCTION_PARAMETERS,int mode)844 static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
845 {
846 zend_string *str;
847 zend_string *what = NULL;
848
849 ZEND_PARSE_PARAMETERS_START(1, 2)
850 Z_PARAM_STR(str)
851 Z_PARAM_OPTIONAL
852 Z_PARAM_STR(what)
853 ZEND_PARSE_PARAMETERS_END();
854
855 ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
856 }
857 /* }}} */
858
859 /* {{{ Strips whitespace from the beginning and end of a string */
PHP_FUNCTION(trim)860 PHP_FUNCTION(trim)
861 {
862 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
863 }
864 /* }}} */
865
866 /* {{{ Removes trailing whitespace */
PHP_FUNCTION(rtrim)867 PHP_FUNCTION(rtrim)
868 {
869 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
870 }
871 /* }}} */
872
873 /* {{{ Strips whitespace from the beginning of a string */
PHP_FUNCTION(ltrim)874 PHP_FUNCTION(ltrim)
875 {
876 php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
877 }
878 /* }}} */
879
880 /* {{{ Wraps buffer to selected number of characters using string break char */
PHP_FUNCTION(wordwrap)881 PHP_FUNCTION(wordwrap)
882 {
883 zend_string *text;
884 char *breakchar = "\n";
885 size_t newtextlen, chk, breakchar_len = 1;
886 size_t alloced;
887 zend_long current = 0, laststart = 0, lastspace = 0;
888 zend_long linelength = 75;
889 zend_bool docut = 0;
890 zend_string *newtext;
891
892 ZEND_PARSE_PARAMETERS_START(1, 4)
893 Z_PARAM_STR(text)
894 Z_PARAM_OPTIONAL
895 Z_PARAM_LONG(linelength)
896 Z_PARAM_STRING(breakchar, breakchar_len)
897 Z_PARAM_BOOL(docut)
898 ZEND_PARSE_PARAMETERS_END();
899
900 if (ZSTR_LEN(text) == 0) {
901 RETURN_EMPTY_STRING();
902 }
903
904 if (breakchar_len == 0) {
905 zend_argument_value_error(3, "cannot be empty");
906 RETURN_THROWS();
907 }
908
909 if (linelength == 0 && docut) {
910 zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
911 RETURN_THROWS();
912 }
913
914 /* Special case for a single-character break as it needs no
915 additional storage space */
916 if (breakchar_len == 1 && !docut) {
917 newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
918
919 laststart = lastspace = 0;
920 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
921 if (ZSTR_VAL(text)[current] == breakchar[0]) {
922 laststart = lastspace = current + 1;
923 } else if (ZSTR_VAL(text)[current] == ' ') {
924 if (current - laststart >= linelength) {
925 ZSTR_VAL(newtext)[current] = breakchar[0];
926 laststart = current + 1;
927 }
928 lastspace = current;
929 } else if (current - laststart >= linelength && laststart != lastspace) {
930 ZSTR_VAL(newtext)[lastspace] = breakchar[0];
931 laststart = lastspace + 1;
932 }
933 }
934
935 RETURN_NEW_STR(newtext);
936 } else {
937 /* Multiple character line break or forced cut */
938 if (linelength > 0) {
939 chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
940 newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
941 alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
942 } else {
943 chk = ZSTR_LEN(text);
944 alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
945 newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
946 }
947
948 /* now keep track of the actual new text length */
949 newtextlen = 0;
950
951 laststart = lastspace = 0;
952 for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
953 if (chk == 0) {
954 alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
955 newtext = zend_string_extend(newtext, alloced, 0);
956 chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
957 }
958 /* when we hit an existing break, copy to new buffer, and
959 * fix up laststart and lastspace */
960 if (ZSTR_VAL(text)[current] == breakchar[0]
961 && current + breakchar_len < ZSTR_LEN(text)
962 && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
963 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
964 newtextlen += current - laststart + breakchar_len;
965 current += breakchar_len - 1;
966 laststart = lastspace = current + 1;
967 chk--;
968 }
969 /* if it is a space, check if it is at the line boundary,
970 * copy and insert a break, or just keep track of it */
971 else if (ZSTR_VAL(text)[current] == ' ') {
972 if (current - laststart >= linelength) {
973 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
974 newtextlen += current - laststart;
975 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
976 newtextlen += breakchar_len;
977 laststart = current + 1;
978 chk--;
979 }
980 lastspace = current;
981 }
982 /* if we are cutting, and we've accumulated enough
983 * characters, and we haven't see a space for this line,
984 * copy and insert a break. */
985 else if (current - laststart >= linelength
986 && docut && laststart >= lastspace) {
987 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
988 newtextlen += current - laststart;
989 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
990 newtextlen += breakchar_len;
991 laststart = lastspace = current;
992 chk--;
993 }
994 /* if the current word puts us over the linelength, copy
995 * back up until the last space, insert a break, and move
996 * up the laststart */
997 else if (current - laststart >= linelength
998 && laststart < lastspace) {
999 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
1000 newtextlen += lastspace - laststart;
1001 memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
1002 newtextlen += breakchar_len;
1003 laststart = lastspace = lastspace + 1;
1004 chk--;
1005 }
1006 }
1007
1008 /* copy over any stragglers */
1009 if (laststart != current) {
1010 memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
1011 newtextlen += current - laststart;
1012 }
1013
1014 ZSTR_VAL(newtext)[newtextlen] = '\0';
1015 /* free unused memory */
1016 newtext = zend_string_truncate(newtext, newtextlen, 0);
1017
1018 RETURN_NEW_STR(newtext);
1019 }
1020 }
1021 /* }}} */
1022
1023 /* {{{ php_explode */
php_explode(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)1024 PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
1025 {
1026 const char *p1 = ZSTR_VAL(str);
1027 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
1028 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1029 zval tmp;
1030
1031 if (p2 == NULL) {
1032 ZVAL_STR_COPY(&tmp, str);
1033 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1034 } else {
1035 do {
1036 ZVAL_STRINGL_FAST(&tmp, p1, p2 - p1);
1037 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1038 p1 = p2 + ZSTR_LEN(delim);
1039 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1040 } while (p2 != NULL && --limit > 1);
1041
1042 if (p1 <= endp) {
1043 ZVAL_STRINGL(&tmp, p1, endp - p1);
1044 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1045 }
1046 }
1047 }
1048 /* }}} */
1049
1050 /* {{{ php_explode_negative_limit */
php_explode_negative_limit(const zend_string * delim,zend_string * str,zval * return_value,zend_long limit)1051 PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
1052 {
1053 #define EXPLODE_ALLOC_STEP 64
1054 const char *p1 = ZSTR_VAL(str);
1055 const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
1056 const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1057 zval tmp;
1058
1059 if (p2 == NULL) {
1060 /*
1061 do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
1062 by doing nothing we return empty array
1063 */
1064 } else {
1065 size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
1066 zend_long i, to_return;
1067 const char **positions = emalloc(allocated * sizeof(char *));
1068
1069 positions[found++] = p1;
1070 do {
1071 if (found >= allocated) {
1072 allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
1073 positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
1074 }
1075 positions[found++] = p1 = p2 + ZSTR_LEN(delim);
1076 p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
1077 } while (p2 != NULL);
1078
1079 to_return = limit + found;
1080 /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
1081 for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
1082 ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
1083 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1084 }
1085 efree((void *)positions);
1086 }
1087 #undef EXPLODE_ALLOC_STEP
1088 }
1089 /* }}} */
1090
1091 /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
PHP_FUNCTION(explode)1092 PHP_FUNCTION(explode)
1093 {
1094 zend_string *str, *delim;
1095 zend_long limit = ZEND_LONG_MAX; /* No limit */
1096 zval tmp;
1097
1098 ZEND_PARSE_PARAMETERS_START(2, 3)
1099 Z_PARAM_STR(delim)
1100 Z_PARAM_STR(str)
1101 Z_PARAM_OPTIONAL
1102 Z_PARAM_LONG(limit)
1103 ZEND_PARSE_PARAMETERS_END();
1104
1105 if (ZSTR_LEN(delim) == 0) {
1106 zend_argument_value_error(1, "cannot be empty");
1107 RETURN_THROWS();
1108 }
1109
1110 array_init(return_value);
1111
1112 if (ZSTR_LEN(str) == 0) {
1113 if (limit >= 0) {
1114 ZVAL_EMPTY_STRING(&tmp);
1115 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
1116 }
1117 return;
1118 }
1119
1120 if (limit > 1) {
1121 php_explode(delim, str, return_value, limit);
1122 } else if (limit < 0) {
1123 php_explode_negative_limit(delim, str, return_value, limit);
1124 } else {
1125 ZVAL_STR_COPY(&tmp, str);
1126 zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
1127 }
1128 }
1129 /* }}} */
1130
1131 /* {{{ An alias for implode */
1132 /* }}} */
1133
1134 /* {{{ php_implode */
php_implode(const zend_string * glue,HashTable * pieces,zval * return_value)1135 PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
1136 {
1137 zval *tmp;
1138 int numelems;
1139 zend_string *str;
1140 char *cptr;
1141 size_t len = 0;
1142 struct {
1143 zend_string *str;
1144 zend_long lval;
1145 } *strings, *ptr;
1146 ALLOCA_FLAG(use_heap)
1147
1148 numelems = zend_hash_num_elements(pieces);
1149
1150 if (numelems == 0) {
1151 RETURN_EMPTY_STRING();
1152 } else if (numelems == 1) {
1153 /* loop to search the first not undefined element... */
1154 ZEND_HASH_FOREACH_VAL_IND(pieces, tmp) {
1155 RETURN_STR(zval_get_string(tmp));
1156 } ZEND_HASH_FOREACH_END();
1157 }
1158
1159 ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
1160
1161 ZEND_HASH_FOREACH_VAL_IND(pieces, tmp) {
1162 if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
1163 ptr->str = Z_STR_P(tmp);
1164 len += ZSTR_LEN(ptr->str);
1165 ptr->lval = 0;
1166 ptr++;
1167 } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
1168 zend_long val = Z_LVAL_P(tmp);
1169
1170 ptr->str = NULL;
1171 ptr->lval = val;
1172 ptr++;
1173 if (val <= 0) {
1174 len++;
1175 }
1176 while (val) {
1177 val /= 10;
1178 len++;
1179 }
1180 } else {
1181 ptr->str = zval_get_string_func(tmp);
1182 len += ZSTR_LEN(ptr->str);
1183 ptr->lval = 1;
1184 ptr++;
1185 }
1186 } ZEND_HASH_FOREACH_END();
1187
1188 /* numelems can not be 0, we checked above */
1189 str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
1190 cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
1191 *cptr = 0;
1192
1193 while (1) {
1194 ptr--;
1195 if (EXPECTED(ptr->str)) {
1196 cptr -= ZSTR_LEN(ptr->str);
1197 memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
1198 if (ptr->lval) {
1199 zend_string_release_ex(ptr->str, 0);
1200 }
1201 } else {
1202 char *oldPtr = cptr;
1203 char oldVal = *cptr;
1204 cptr = zend_print_long_to_buf(cptr, ptr->lval);
1205 *oldPtr = oldVal;
1206 }
1207
1208 if (ptr == strings) {
1209 break;
1210 }
1211
1212 cptr -= ZSTR_LEN(glue);
1213 memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
1214 }
1215
1216 free_alloca(strings, use_heap);
1217 RETURN_NEW_STR(str);
1218 }
1219 /* }}} */
1220
1221 /* {{{ Joins array elements placing glue string between items and return one string */
PHP_FUNCTION(implode)1222 PHP_FUNCTION(implode)
1223 {
1224 zend_string *arg1_str = NULL;
1225 HashTable *arg1_array = NULL;
1226 zend_array *pieces = NULL;
1227
1228 ZEND_PARSE_PARAMETERS_START(1, 2)
1229 Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
1230 Z_PARAM_OPTIONAL
1231 Z_PARAM_ARRAY_HT_OR_NULL(pieces)
1232 ZEND_PARSE_PARAMETERS_END();
1233
1234 if (pieces == NULL) {
1235 if (arg1_array == NULL) {
1236 zend_type_error("%s(): Argument #1 ($pieces) must be of type array, string given", get_active_function_name());
1237 RETURN_THROWS();
1238 }
1239
1240 arg1_str = ZSTR_EMPTY_ALLOC();
1241 pieces = arg1_array;
1242 } else {
1243 if (arg1_str == NULL) {
1244 zend_argument_type_error(1, "must be of type string, array given");
1245 RETURN_THROWS();
1246 }
1247 }
1248
1249 php_implode(arg1_str, pieces, return_value);
1250 }
1251 /* }}} */
1252
1253 #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
1254
1255 /* {{{ Tokenize a string */
PHP_FUNCTION(strtok)1256 PHP_FUNCTION(strtok)
1257 {
1258 zend_string *str, *tok = NULL;
1259 char *token;
1260 char *token_end;
1261 char *p;
1262 char *pe;
1263 size_t skipped = 0;
1264
1265 ZEND_PARSE_PARAMETERS_START(1, 2)
1266 Z_PARAM_STR(str)
1267 Z_PARAM_OPTIONAL
1268 Z_PARAM_STR_OR_NULL(tok)
1269 ZEND_PARSE_PARAMETERS_END();
1270
1271 if (!tok) {
1272 tok = str;
1273 } else {
1274 if (BG(strtok_string)) {
1275 zend_string_release(BG(strtok_string));
1276 }
1277 BG(strtok_string) = zend_string_copy(str);
1278 BG(strtok_last) = ZSTR_VAL(str);
1279 BG(strtok_len) = ZSTR_LEN(str);
1280 }
1281
1282 if (!BG(strtok_string)) {
1283 /* String to tokenize not set. */
1284 // TODO: Should this warn?
1285 RETURN_FALSE;
1286 }
1287
1288 p = BG(strtok_last); /* Where we start to search */
1289 pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
1290 if (p >= pe) {
1291 /* Reached the end of the string. */
1292 RETURN_FALSE;
1293 }
1294
1295 token = ZSTR_VAL(tok);
1296 token_end = token + ZSTR_LEN(tok);
1297
1298 while (token < token_end) {
1299 STRTOK_TABLE(token++) = 1;
1300 }
1301
1302 /* Skip leading delimiters */
1303 while (STRTOK_TABLE(p)) {
1304 if (++p >= pe) {
1305 /* no other chars left */
1306 goto return_false;
1307 }
1308 skipped++;
1309 }
1310
1311 /* We know at this place that *p is no delimiter, so skip it */
1312 while (++p < pe) {
1313 if (STRTOK_TABLE(p)) {
1314 goto return_token;
1315 }
1316 }
1317
1318 if (p - BG(strtok_last)) {
1319 return_token:
1320 RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
1321 BG(strtok_last) = p + 1;
1322 } else {
1323 return_false:
1324 RETVAL_FALSE;
1325 zend_string_release(BG(strtok_string));
1326 BG(strtok_string) = NULL;
1327 }
1328
1329 /* Restore table -- usually faster then memset'ing the table on every invocation */
1330 token = ZSTR_VAL(tok);
1331 while (token < token_end) {
1332 STRTOK_TABLE(token++) = 0;
1333 }
1334 }
1335 /* }}} */
1336
1337 /* {{{ php_strtoupper */
php_strtoupper(char * s,size_t len)1338 PHPAPI char *php_strtoupper(char *s, size_t len)
1339 {
1340 unsigned char *c;
1341 const unsigned char *e;
1342
1343 c = (unsigned char *)s;
1344 e = (unsigned char *)c+len;
1345
1346 while (c < e) {
1347 *c = toupper(*c);
1348 c++;
1349 }
1350 return s;
1351 }
1352 /* }}} */
1353
1354 /* {{{ php_string_toupper */
php_string_toupper(zend_string * s)1355 PHPAPI zend_string *php_string_toupper(zend_string *s)
1356 {
1357 unsigned char *c;
1358 const unsigned char *e;
1359
1360 c = (unsigned char *)ZSTR_VAL(s);
1361 e = c + ZSTR_LEN(s);
1362
1363 while (c < e) {
1364 if (islower(*c)) {
1365 register unsigned char *r;
1366 zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
1367
1368 if (c != (unsigned char*)ZSTR_VAL(s)) {
1369 memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
1370 }
1371 r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
1372 while (c < e) {
1373 *r = toupper(*c);
1374 r++;
1375 c++;
1376 }
1377 *r = '\0';
1378 return res;
1379 }
1380 c++;
1381 }
1382 return zend_string_copy(s);
1383 }
1384 /* }}} */
1385
1386 /* {{{ Makes a string uppercase */
PHP_FUNCTION(strtoupper)1387 PHP_FUNCTION(strtoupper)
1388 {
1389 zend_string *arg;
1390
1391 ZEND_PARSE_PARAMETERS_START(1, 1)
1392 Z_PARAM_STR(arg)
1393 ZEND_PARSE_PARAMETERS_END();
1394
1395 RETURN_STR(php_string_toupper(arg));
1396 }
1397 /* }}} */
1398
1399 /* {{{ php_strtolower */
php_strtolower(char * s,size_t len)1400 PHPAPI char *php_strtolower(char *s, size_t len)
1401 {
1402 unsigned char *c;
1403 const unsigned char *e;
1404
1405 c = (unsigned char *)s;
1406 e = c+len;
1407
1408 while (c < e) {
1409 *c = tolower(*c);
1410 c++;
1411 }
1412 return s;
1413 }
1414 /* }}} */
1415
1416 /* {{{ php_string_tolower */
php_string_tolower(zend_string * s)1417 PHPAPI zend_string *php_string_tolower(zend_string *s)
1418 {
1419 unsigned char *c;
1420 const unsigned char *e;
1421
1422 if (EXPECTED(!BG(ctype_string))) {
1423 return zend_string_tolower(s);
1424 } else {
1425 c = (unsigned char *)ZSTR_VAL(s);
1426 e = c + ZSTR_LEN(s);
1427
1428 while (c < e) {
1429 if (isupper(*c)) {
1430 register unsigned char *r;
1431 zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
1432
1433 if (c != (unsigned char*)ZSTR_VAL(s)) {
1434 memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
1435 }
1436 r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
1437 while (c < e) {
1438 *r = tolower(*c);
1439 r++;
1440 c++;
1441 }
1442 *r = '\0';
1443 return res;
1444 }
1445 c++;
1446 }
1447 return zend_string_copy(s);
1448 }
1449 }
1450 /* }}} */
1451
1452 /* {{{ Makes a string lowercase */
PHP_FUNCTION(strtolower)1453 PHP_FUNCTION(strtolower)
1454 {
1455 zend_string *str;
1456
1457 ZEND_PARSE_PARAMETERS_START(1, 1)
1458 Z_PARAM_STR(str)
1459 ZEND_PARSE_PARAMETERS_END();
1460
1461 RETURN_STR(php_string_tolower(str));
1462 }
1463 /* }}} */
1464
1465 /* {{{ php_basename */
php_basename(const char * s,size_t len,const char * suffix,size_t suffix_len)1466 PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
1467 {
1468 /* State 0 is directly after a directory separator (or at the start of the string).
1469 * State 1 is everything else. */
1470 int state = 0;
1471 const char *basename_start = s;
1472 const char *basename_end = s;
1473 while (len > 0) {
1474 int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
1475
1476 switch (inc_len) {
1477 case 0:
1478 goto quit_loop;
1479 case 1:
1480 #if defined(PHP_WIN32)
1481 if (*s == '/' || *s == '\\') {
1482 #else
1483 if (*s == '/') {
1484 #endif
1485 if (state == 1) {
1486 state = 0;
1487 basename_end = s;
1488 }
1489 #if defined(PHP_WIN32)
1490 /* Catch relative paths in c:file.txt style. They're not to confuse
1491 with the NTFS streams. This part ensures also, that no drive
1492 letter traversing happens. */
1493 } else if ((*s == ':' && (s - basename_start == 1))) {
1494 if (state == 0) {
1495 basename_start = s;
1496 state = 1;
1497 } else {
1498 basename_end = s;
1499 state = 0;
1500 }
1501 #endif
1502 } else {
1503 if (state == 0) {
1504 basename_start = s;
1505 state = 1;
1506 }
1507 }
1508 break;
1509 default:
1510 if (inc_len < 0) {
1511 /* If character is invalid, treat it like other non-significant characters. */
1512 inc_len = 1;
1513 php_mb_reset();
1514 }
1515 if (state == 0) {
1516 basename_start = s;
1517 state = 1;
1518 }
1519 break;
1520 }
1521 s += inc_len;
1522 len -= inc_len;
1523 }
1524
1525 quit_loop:
1526 if (state == 1) {
1527 basename_end = s;
1528 }
1529
1530 if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
1531 memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
1532 basename_end -= suffix_len;
1533 }
1534
1535 return zend_string_init(basename_start, basename_end - basename_start, 0);
1536 }
1537 /* }}} */
1538
1539 /* {{{ Returns the filename component of the path */
1540 PHP_FUNCTION(basename)
1541 {
1542 char *string, *suffix = NULL;
1543 size_t string_len, suffix_len = 0;
1544
1545 ZEND_PARSE_PARAMETERS_START(1, 2)
1546 Z_PARAM_STRING(string, string_len)
1547 Z_PARAM_OPTIONAL
1548 Z_PARAM_STRING(suffix, suffix_len)
1549 ZEND_PARSE_PARAMETERS_END();
1550
1551 RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
1552 }
1553 /* }}} */
1554
1555 /* {{{ php_dirname
1556 Returns directory name component of path */
1557 PHPAPI size_t php_dirname(char *path, size_t len)
1558 {
1559 return zend_dirname(path, len);
1560 }
1561 /* }}} */
1562
1563 /* {{{ Returns the directory name component of the path */
1564 PHP_FUNCTION(dirname)
1565 {
1566 char *str;
1567 size_t str_len;
1568 zend_string *ret;
1569 zend_long levels = 1;
1570
1571 ZEND_PARSE_PARAMETERS_START(1, 2)
1572 Z_PARAM_STRING(str, str_len)
1573 Z_PARAM_OPTIONAL
1574 Z_PARAM_LONG(levels)
1575 ZEND_PARSE_PARAMETERS_END();
1576
1577 ret = zend_string_init(str, str_len, 0);
1578
1579 if (levels == 1) {
1580 /* Default case */
1581 #ifdef PHP_WIN32
1582 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len);
1583 #else
1584 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len);
1585 #endif
1586 } else if (levels < 1) {
1587 zend_argument_value_error(2, "must be greater than or equal to 1");
1588 zend_string_efree(ret);
1589 RETURN_THROWS();
1590 } else {
1591 /* Some levels up */
1592 do {
1593 #ifdef PHP_WIN32
1594 ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1595 #else
1596 ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
1597 #endif
1598 } while (ZSTR_LEN(ret) < str_len && --levels);
1599 }
1600
1601 RETURN_NEW_STR(ret);
1602 }
1603 /* }}} */
1604
1605 /* {{{ Returns information about a certain string */
1606 PHP_FUNCTION(pathinfo)
1607 {
1608 zval tmp;
1609 char *path, *dirname;
1610 size_t path_len;
1611 int have_basename;
1612 zend_long opt = PHP_PATHINFO_ALL;
1613 zend_string *ret = NULL;
1614
1615 ZEND_PARSE_PARAMETERS_START(1, 2)
1616 Z_PARAM_STRING(path, path_len)
1617 Z_PARAM_OPTIONAL
1618 Z_PARAM_LONG(opt)
1619 ZEND_PARSE_PARAMETERS_END();
1620
1621 have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
1622
1623 array_init(&tmp);
1624
1625 if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
1626 dirname = estrndup(path, path_len);
1627 php_dirname(dirname, path_len);
1628 if (*dirname) {
1629 add_assoc_string(&tmp, "dirname", dirname);
1630 }
1631 efree(dirname);
1632 }
1633
1634 if (have_basename) {
1635 ret = php_basename(path, path_len, NULL, 0);
1636 add_assoc_str(&tmp, "basename", zend_string_copy(ret));
1637 }
1638
1639 if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
1640 const char *p;
1641 ptrdiff_t idx;
1642
1643 if (!have_basename) {
1644 ret = php_basename(path, path_len, NULL, 0);
1645 }
1646
1647 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1648
1649 if (p) {
1650 idx = p - ZSTR_VAL(ret);
1651 add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
1652 }
1653 }
1654
1655 if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
1656 const char *p;
1657 ptrdiff_t idx;
1658
1659 /* Have we already looked up the basename? */
1660 if (!have_basename && !ret) {
1661 ret = php_basename(path, path_len, NULL, 0);
1662 }
1663
1664 p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
1665
1666 idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
1667 add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
1668 }
1669
1670 if (ret) {
1671 zend_string_release_ex(ret, 0);
1672 }
1673
1674 if (opt == PHP_PATHINFO_ALL) {
1675 ZVAL_COPY_VALUE(return_value, &tmp);
1676 } else {
1677 zval *element;
1678 if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
1679 ZVAL_COPY_DEREF(return_value, element);
1680 } else {
1681 ZVAL_EMPTY_STRING(return_value);
1682 }
1683 zval_ptr_dtor(&tmp);
1684 }
1685 }
1686 /* }}} */
1687
1688 /* {{{ php_stristr
1689 case insensitive strstr */
1690 PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
1691 {
1692 php_strtolower(s, s_len);
1693 php_strtolower(t, t_len);
1694 return (char*)php_memnstr(s, t, t_len, s + s_len);
1695 }
1696 /* }}} */
1697
1698 /* {{{ php_strspn */
1699 PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1700 {
1701 register const char *p = s1, *spanp;
1702 register char c = *p;
1703
1704 cont:
1705 for (spanp = s2; p != s1_end && spanp != s2_end;) {
1706 if (*spanp++ == c) {
1707 c = *(++p);
1708 goto cont;
1709 }
1710 }
1711 return (p - s1);
1712 }
1713 /* }}} */
1714
1715 /* {{{ php_strcspn */
1716 PHPAPI size_t php_strcspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
1717 {
1718 register const char *p, *spanp;
1719 register char c = *s1;
1720
1721 for (p = s1;;) {
1722 spanp = s2;
1723 do {
1724 if (*spanp == c || p == s1_end) {
1725 return p - s1;
1726 }
1727 } while (spanp++ < (s2_end - 1));
1728 c = *++p;
1729 }
1730 /* NOTREACHED */
1731 }
1732 /* }}} */
1733
1734 /* {{{ Finds first occurrence of a string within another, case insensitive */
1735 PHP_FUNCTION(stristr)
1736 {
1737 zend_string *haystack, *needle;
1738 const char *found = NULL;
1739 size_t found_offset;
1740 char *haystack_dup;
1741 char *orig_needle;
1742 zend_bool part = 0;
1743
1744 ZEND_PARSE_PARAMETERS_START(2, 3)
1745 Z_PARAM_STR(haystack)
1746 Z_PARAM_STR(needle)
1747 Z_PARAM_OPTIONAL
1748 Z_PARAM_BOOL(part)
1749 ZEND_PARSE_PARAMETERS_END();
1750
1751 haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack));
1752 orig_needle = estrndup(ZSTR_VAL(needle), ZSTR_LEN(needle));
1753 found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), ZSTR_LEN(needle));
1754 efree(orig_needle);
1755
1756 if (found) {
1757 found_offset = found - haystack_dup;
1758 if (part) {
1759 RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
1760 } else {
1761 RETVAL_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
1762 }
1763 } else {
1764 RETVAL_FALSE;
1765 }
1766
1767 efree(haystack_dup);
1768 }
1769 /* }}} */
1770
1771 /* {{{ Finds first occurrence of a string within another */
1772 PHP_FUNCTION(strstr)
1773 {
1774 zend_string *haystack, *needle;
1775 const char *found = NULL;
1776 zend_long found_offset;
1777 zend_bool part = 0;
1778
1779 ZEND_PARSE_PARAMETERS_START(2, 3)
1780 Z_PARAM_STR(haystack)
1781 Z_PARAM_STR(needle)
1782 Z_PARAM_OPTIONAL
1783 Z_PARAM_BOOL(part)
1784 ZEND_PARSE_PARAMETERS_END();
1785
1786 found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1787
1788 if (found) {
1789 found_offset = found - ZSTR_VAL(haystack);
1790 if (part) {
1791 RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1792 } else {
1793 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1794 }
1795 }
1796 RETURN_FALSE;
1797 }
1798 /* }}} */
1799
1800 /* {{{ Checks if a string contains another */
1801 PHP_FUNCTION(str_contains)
1802 {
1803 zend_string *haystack, *needle;
1804
1805 ZEND_PARSE_PARAMETERS_START(2, 2)
1806 Z_PARAM_STR(haystack)
1807 Z_PARAM_STR(needle)
1808 ZEND_PARSE_PARAMETERS_END();
1809
1810 RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
1811 }
1812 /* }}} */
1813
1814 /* {{{ Checks if haystack starts with needle */
1815 PHP_FUNCTION(str_starts_with)
1816 {
1817 zend_string *haystack, *needle;
1818
1819 ZEND_PARSE_PARAMETERS_START(2, 2)
1820 Z_PARAM_STR(haystack)
1821 Z_PARAM_STR(needle)
1822 ZEND_PARSE_PARAMETERS_END();
1823
1824 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1825 RETURN_FALSE;
1826 }
1827
1828 RETURN_BOOL(memcmp(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1829 }
1830 /* }}} */
1831
1832 /* {{{ Checks if haystack ends with needle */
1833 PHP_FUNCTION(str_ends_with)
1834 {
1835 zend_string *haystack, *needle;
1836
1837 ZEND_PARSE_PARAMETERS_START(2, 2)
1838 Z_PARAM_STR(haystack)
1839 Z_PARAM_STR(needle)
1840 ZEND_PARSE_PARAMETERS_END();
1841
1842 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1843 RETURN_FALSE;
1844 }
1845
1846 RETURN_BOOL(memcmp(
1847 ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
1848 ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
1849 }
1850 /* }}} */
1851
1852 /* {{{ An alias for strstr */
1853 /* }}} */
1854
1855 /* {{{ Finds position of first occurrence of a string within another */
1856 PHP_FUNCTION(strpos)
1857 {
1858 zend_string *haystack, *needle;
1859 const char *found = NULL;
1860 zend_long offset = 0;
1861
1862 ZEND_PARSE_PARAMETERS_START(2, 3)
1863 Z_PARAM_STR(haystack)
1864 Z_PARAM_STR(needle)
1865 Z_PARAM_OPTIONAL
1866 Z_PARAM_LONG(offset)
1867 ZEND_PARSE_PARAMETERS_END();
1868
1869 if (offset < 0) {
1870 offset += (zend_long)ZSTR_LEN(haystack);
1871 }
1872 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1873 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1874 RETURN_THROWS();
1875 }
1876
1877 found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
1878 ZSTR_VAL(needle), ZSTR_LEN(needle),
1879 ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
1880
1881 if (found) {
1882 RETURN_LONG(found - ZSTR_VAL(haystack));
1883 } else {
1884 RETURN_FALSE;
1885 }
1886 }
1887 /* }}} */
1888
1889 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
1890 PHP_FUNCTION(stripos)
1891 {
1892 const char *found = NULL;
1893 zend_string *haystack, *needle;
1894 zend_long offset = 0;
1895 zend_string *needle_dup = NULL, *haystack_dup;
1896
1897 ZEND_PARSE_PARAMETERS_START(2, 3)
1898 Z_PARAM_STR(haystack)
1899 Z_PARAM_STR(needle)
1900 Z_PARAM_OPTIONAL
1901 Z_PARAM_LONG(offset)
1902 ZEND_PARSE_PARAMETERS_END();
1903
1904 if (offset < 0) {
1905 offset += (zend_long)ZSTR_LEN(haystack);
1906 }
1907 if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
1908 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1909 RETURN_THROWS();
1910 }
1911
1912 if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1913 RETURN_FALSE;
1914 }
1915
1916 haystack_dup = php_string_tolower(haystack);
1917 needle_dup = php_string_tolower(needle);
1918 found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
1919 ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
1920
1921 if (found) {
1922 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
1923 } else {
1924 RETVAL_FALSE;
1925 }
1926
1927 zend_string_release_ex(haystack_dup, 0);
1928 zend_string_release_ex(needle_dup, 0);
1929 }
1930 /* }}} */
1931
1932 /* {{{ Finds position of last occurrence of a string within another string */
1933 PHP_FUNCTION(strrpos)
1934 {
1935 zend_string *needle;
1936 zend_string *haystack;
1937 zend_long offset = 0;
1938 const char *p, *e, *found;
1939
1940 ZEND_PARSE_PARAMETERS_START(2, 3)
1941 Z_PARAM_STR(haystack)
1942 Z_PARAM_STR(needle)
1943 Z_PARAM_OPTIONAL
1944 Z_PARAM_LONG(offset)
1945 ZEND_PARSE_PARAMETERS_END();
1946
1947 if (offset >= 0) {
1948 if ((size_t)offset > ZSTR_LEN(haystack)) {
1949 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1950 RETURN_THROWS();
1951 }
1952 p = ZSTR_VAL(haystack) + (size_t)offset;
1953 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
1954 } else {
1955 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
1956 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1957 RETURN_THROWS();
1958 }
1959
1960 p = ZSTR_VAL(haystack);
1961 if ((size_t)-offset < ZSTR_LEN(needle)) {
1962 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
1963 } else {
1964 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
1965 }
1966 }
1967
1968 if ((found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e))) {
1969 RETURN_LONG(found - ZSTR_VAL(haystack));
1970 }
1971
1972 RETURN_FALSE;
1973 }
1974 /* }}} */
1975
1976 /* {{{ Finds position of last occurrence of a string within another string */
1977 PHP_FUNCTION(strripos)
1978 {
1979 zend_string *needle;
1980 zend_string *haystack;
1981 zend_long offset = 0;
1982 const char *p, *e, *found;
1983 zend_string *needle_dup, *haystack_dup;
1984
1985 ZEND_PARSE_PARAMETERS_START(2, 3)
1986 Z_PARAM_STR(haystack)
1987 Z_PARAM_STR(needle)
1988 Z_PARAM_OPTIONAL
1989 Z_PARAM_LONG(offset)
1990 ZEND_PARSE_PARAMETERS_END();
1991
1992 if (ZSTR_LEN(needle) == 1) {
1993 /* Single character search can shortcut memcmps
1994 Can also avoid tolower emallocs */
1995 char lowered;
1996 if (offset >= 0) {
1997 if ((size_t)offset > ZSTR_LEN(haystack)) {
1998 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1999 RETURN_THROWS();
2000 }
2001 p = ZSTR_VAL(haystack) + (size_t)offset;
2002 e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
2003 } else {
2004 p = ZSTR_VAL(haystack);
2005 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2006 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2007 RETURN_THROWS();
2008 }
2009 e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
2010 }
2011 /* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */
2012 lowered = tolower(*ZSTR_VAL(needle));
2013 while (e >= p) {
2014 if (tolower(*e) == lowered) {
2015 RETURN_LONG(e - p + (offset > 0 ? offset : 0));
2016 }
2017 e--;
2018 }
2019 RETURN_FALSE;
2020 }
2021
2022 haystack_dup = php_string_tolower(haystack);
2023 if (offset >= 0) {
2024 if ((size_t)offset > ZSTR_LEN(haystack)) {
2025 zend_string_release_ex(haystack_dup, 0);
2026 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2027 RETURN_THROWS();
2028 }
2029 p = ZSTR_VAL(haystack_dup) + offset;
2030 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2031 } else {
2032 if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
2033 zend_string_release_ex(haystack_dup, 0);
2034 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
2035 RETURN_THROWS();
2036 }
2037
2038 p = ZSTR_VAL(haystack_dup);
2039 if ((size_t)-offset < ZSTR_LEN(needle)) {
2040 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
2041 } else {
2042 e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
2043 }
2044 }
2045
2046 needle_dup = php_string_tolower(needle);
2047 if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
2048 RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
2049 zend_string_release_ex(needle_dup, 0);
2050 zend_string_release_ex(haystack_dup, 0);
2051 } else {
2052 zend_string_release_ex(needle_dup, 0);
2053 zend_string_release_ex(haystack_dup, 0);
2054 RETURN_FALSE;
2055 }
2056 }
2057 /* }}} */
2058
2059 /* {{{ Finds the last occurrence of a character in a string within another */
2060 PHP_FUNCTION(strrchr)
2061 {
2062 zend_string *haystack, *needle;
2063 const char *found = NULL;
2064 zend_long found_offset;
2065
2066 ZEND_PARSE_PARAMETERS_START(2, 2)
2067 Z_PARAM_STR(haystack)
2068 Z_PARAM_STR(needle)
2069 ZEND_PARSE_PARAMETERS_END();
2070
2071 found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
2072 if (found) {
2073 found_offset = found - ZSTR_VAL(haystack);
2074 RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
2075 } else {
2076 RETURN_FALSE;
2077 }
2078 }
2079 /* }}} */
2080
2081 /* {{{ php_chunk_split */
2082 static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
2083 {
2084 char *q;
2085 const char *p;
2086 size_t chunks;
2087 size_t restlen;
2088 zend_string *dest;
2089
2090 chunks = srclen / chunklen;
2091 restlen = srclen - chunks * chunklen; /* srclen % chunklen */
2092 if (restlen) {
2093 /* We want chunks to be rounded up rather than rounded down.
2094 * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
2095 chunks++;
2096 }
2097
2098 dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
2099
2100 for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
2101 memcpy(q, p, chunklen);
2102 q += chunklen;
2103 memcpy(q, end, endlen);
2104 q += endlen;
2105 p += chunklen;
2106 }
2107
2108 if (restlen) {
2109 memcpy(q, p, restlen);
2110 q += restlen;
2111 memcpy(q, end, endlen);
2112 q += endlen;
2113 }
2114
2115 *q = '\0';
2116 ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
2117
2118 return dest;
2119 }
2120 /* }}} */
2121
2122 /* {{{ Returns split line */
2123 PHP_FUNCTION(chunk_split)
2124 {
2125 zend_string *str;
2126 char *end = "\r\n";
2127 size_t endlen = 2;
2128 zend_long chunklen = 76;
2129 zend_string *result;
2130
2131 ZEND_PARSE_PARAMETERS_START(1, 3)
2132 Z_PARAM_STR(str)
2133 Z_PARAM_OPTIONAL
2134 Z_PARAM_LONG(chunklen)
2135 Z_PARAM_STRING(end, endlen)
2136 ZEND_PARSE_PARAMETERS_END();
2137
2138 if (chunklen <= 0) {
2139 zend_argument_value_error(2, "must be greater than 0");
2140 RETURN_THROWS();
2141 }
2142
2143 if ((size_t)chunklen > ZSTR_LEN(str)) {
2144 /* to maintain BC, we must return original string + ending */
2145 result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
2146 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
2147 memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
2148 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2149 RETURN_NEW_STR(result);
2150 }
2151
2152 if (!ZSTR_LEN(str)) {
2153 RETURN_EMPTY_STRING();
2154 }
2155
2156 result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
2157
2158 RETURN_STR(result);
2159 }
2160 /* }}} */
2161
2162 /* {{{ Returns part of a string */
2163 PHP_FUNCTION(substr)
2164 {
2165 zend_string *str;
2166 zend_long l = 0, f;
2167 zend_bool len_is_null = 1;
2168
2169 ZEND_PARSE_PARAMETERS_START(2, 3)
2170 Z_PARAM_STR(str)
2171 Z_PARAM_LONG(f)
2172 Z_PARAM_OPTIONAL
2173 Z_PARAM_LONG_OR_NULL(l, len_is_null)
2174 ZEND_PARSE_PARAMETERS_END();
2175
2176 if (f < 0) {
2177 /* if "from" position is negative, count start position from the end
2178 * of the string
2179 */
2180 if (-(size_t)f > ZSTR_LEN(str)) {
2181 f = 0;
2182 } else {
2183 f = (zend_long)ZSTR_LEN(str) + f;
2184 }
2185 } else if ((size_t)f > ZSTR_LEN(str)) {
2186 RETURN_EMPTY_STRING();
2187 }
2188
2189 if (!len_is_null) {
2190 if (l < 0) {
2191 /* if "length" position is negative, set it to the length
2192 * needed to stop that many chars from the end of the string
2193 */
2194 if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
2195 l = 0;
2196 } else {
2197 l = (zend_long)ZSTR_LEN(str) - f + l;
2198 }
2199 } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
2200 l = (zend_long)ZSTR_LEN(str) - f;
2201 }
2202 } else {
2203 l = (zend_long)ZSTR_LEN(str) - f;
2204 }
2205
2206 if (l == ZSTR_LEN(str)) {
2207 RETURN_STR_COPY(str);
2208 } else {
2209 RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
2210 }
2211 }
2212 /* }}} */
2213
2214 /* {{{ Replaces part of a string with another string */
2215 PHP_FUNCTION(substr_replace)
2216 {
2217 zend_string *str, *repl_str;
2218 HashTable *str_ht, *repl_ht;
2219 HashTable *from_ht;
2220 zend_long from_long;
2221 HashTable *len_ht = NULL;
2222 zend_long len_long;
2223 zend_bool len_is_null = 1;
2224 zend_long l = 0;
2225 zend_long f;
2226 zend_string *result;
2227 HashPosition from_idx, repl_idx, len_idx;
2228 zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
2229
2230 ZEND_PARSE_PARAMETERS_START(3, 4)
2231 Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
2232 Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
2233 Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
2234 Z_PARAM_OPTIONAL
2235 Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
2236 ZEND_PARSE_PARAMETERS_END();
2237
2238 if (len_is_null) {
2239 if (str) {
2240 l = ZSTR_LEN(str);
2241 }
2242 } else if (!len_ht) {
2243 l = len_long;
2244 }
2245
2246 if (str) {
2247 if (from_ht) {
2248 zend_argument_type_error(3, "cannot be an array when working on a single string");
2249 RETURN_THROWS();
2250 }
2251 if (len_ht) {
2252 zend_argument_type_error(4, "cannot be an array when working on a single string");
2253 RETURN_THROWS();
2254 }
2255
2256 f = from_long;
2257
2258 /* if "from" position is negative, count start position from the end
2259 * of the string
2260 */
2261 if (f < 0) {
2262 f = (zend_long)ZSTR_LEN(str) + f;
2263 if (f < 0) {
2264 f = 0;
2265 }
2266 } else if ((size_t)f > ZSTR_LEN(str)) {
2267 f = ZSTR_LEN(str);
2268 }
2269 /* if "length" position is negative, set it to the length
2270 * needed to stop that many chars from the end of the string
2271 */
2272 if (l < 0) {
2273 l = ((zend_long)ZSTR_LEN(str) - f) + l;
2274 if (l < 0) {
2275 l = 0;
2276 }
2277 }
2278
2279 if ((size_t)l > ZSTR_LEN(str) || (l < 0 && (size_t)(-l) > ZSTR_LEN(str))) {
2280 l = ZSTR_LEN(str);
2281 }
2282
2283 if ((f + l) > (zend_long)ZSTR_LEN(str)) {
2284 l = ZSTR_LEN(str) - f;
2285 }
2286
2287 zend_string *tmp_repl_str = NULL;
2288 if (repl_ht) {
2289 repl_idx = 0;
2290 while (repl_idx < repl_ht->nNumUsed) {
2291 tmp_repl = &repl_ht->arData[repl_idx].val;
2292 if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
2293 break;
2294 }
2295 repl_idx++;
2296 }
2297 if (repl_idx < repl_ht->nNumUsed) {
2298 repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2299 } else {
2300 repl_str = STR_EMPTY_ALLOC();
2301 }
2302 }
2303
2304 result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
2305
2306 memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
2307 if (ZSTR_LEN(repl_str)) {
2308 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2309 }
2310 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
2311 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2312 zend_tmp_string_release(tmp_repl_str);
2313 RETURN_NEW_STR(result);
2314 } else { /* str is array of strings */
2315 zend_string *str_index = NULL;
2316 size_t result_len;
2317 zend_ulong num_index;
2318
2319 /* TODO
2320 if (!len_is_null && from_ht) {
2321 if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
2322 php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
2323 RETURN_STR_COPY(str);
2324 }
2325 }
2326 */
2327
2328 array_init(return_value);
2329
2330 from_idx = len_idx = repl_idx = 0;
2331
2332 ZEND_HASH_FOREACH_KEY_VAL_IND(str_ht, num_index, str_index, tmp_str) {
2333 zend_string *tmp_orig_str;
2334 zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
2335
2336 if (from_ht) {
2337 while (from_idx < from_ht->nNumUsed) {
2338 tmp_from = &from_ht->arData[from_idx].val;
2339 if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
2340 break;
2341 }
2342 from_idx++;
2343 }
2344 if (from_idx < from_ht->nNumUsed) {
2345 f = zval_get_long(tmp_from);
2346
2347 if (f < 0) {
2348 f = (zend_long)ZSTR_LEN(orig_str) + f;
2349 if (f < 0) {
2350 f = 0;
2351 }
2352 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2353 f = ZSTR_LEN(orig_str);
2354 }
2355 from_idx++;
2356 } else {
2357 f = 0;
2358 }
2359 } else {
2360 f = from_long;
2361 if (f < 0) {
2362 f = (zend_long)ZSTR_LEN(orig_str) + f;
2363 if (f < 0) {
2364 f = 0;
2365 }
2366 } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
2367 f = ZSTR_LEN(orig_str);
2368 }
2369 }
2370
2371 if (len_ht) {
2372 while (len_idx < len_ht->nNumUsed) {
2373 tmp_len = &len_ht->arData[len_idx].val;
2374 if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
2375 break;
2376 }
2377 len_idx++;
2378 }
2379 if (len_idx < len_ht->nNumUsed) {
2380 l = zval_get_long(tmp_len);
2381 len_idx++;
2382 } else {
2383 l = ZSTR_LEN(orig_str);
2384 }
2385 } else if (!len_is_null) {
2386 l = len_long;
2387 } else {
2388 l = ZSTR_LEN(orig_str);
2389 }
2390
2391 if (l < 0) {
2392 l = (ZSTR_LEN(orig_str) - f) + l;
2393 if (l < 0) {
2394 l = 0;
2395 }
2396 }
2397
2398 ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
2399 ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
2400 if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
2401 l = ZSTR_LEN(orig_str) - f;
2402 }
2403
2404 result_len = ZSTR_LEN(orig_str) - l;
2405
2406 if (repl_ht) {
2407 while (repl_idx < repl_ht->nNumUsed) {
2408 tmp_repl = &repl_ht->arData[repl_idx].val;
2409 if (repl_ht != IS_UNDEF) {
2410 break;
2411 }
2412 repl_idx++;
2413 }
2414 if (repl_idx < repl_ht->nNumUsed) {
2415 zend_string *tmp_repl_str;
2416 zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
2417
2418 result_len += ZSTR_LEN(repl_str);
2419 repl_idx++;
2420 result = zend_string_safe_alloc(1, result_len, 0, 0);
2421
2422 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2423 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2424 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2425 zend_tmp_string_release(tmp_repl_str);
2426 } else {
2427 result = zend_string_safe_alloc(1, result_len, 0, 0);
2428
2429 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2430 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2431 }
2432 } else {
2433 result_len += ZSTR_LEN(repl_str);
2434
2435 result = zend_string_safe_alloc(1, result_len, 0, 0);
2436
2437 memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
2438 memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
2439 memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
2440 }
2441
2442 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
2443
2444 if (str_index) {
2445 zval tmp;
2446
2447 ZVAL_NEW_STR(&tmp, result);
2448 zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
2449 } else {
2450 add_index_str(return_value, num_index, result);
2451 }
2452
2453 zend_tmp_string_release(tmp_orig_str);
2454 } ZEND_HASH_FOREACH_END();
2455 } /* if */
2456 }
2457 /* }}} */
2458
2459 /* {{{ Quotes meta characters */
2460 PHP_FUNCTION(quotemeta)
2461 {
2462 zend_string *old;
2463 const char *old_end, *p;
2464 char *q;
2465 char c;
2466 zend_string *str;
2467
2468 ZEND_PARSE_PARAMETERS_START(1, 1)
2469 Z_PARAM_STR(old)
2470 ZEND_PARSE_PARAMETERS_END();
2471
2472 old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
2473
2474 if (ZSTR_LEN(old) == 0) {
2475 RETURN_EMPTY_STRING();
2476 }
2477
2478 str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
2479
2480 for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
2481 c = *p;
2482 switch (c) {
2483 case '.':
2484 case '\\':
2485 case '+':
2486 case '*':
2487 case '?':
2488 case '[':
2489 case '^':
2490 case ']':
2491 case '$':
2492 case '(':
2493 case ')':
2494 *q++ = '\\';
2495 /* break is missing _intentionally_ */
2496 default:
2497 *q++ = c;
2498 }
2499 }
2500
2501 *q = '\0';
2502
2503 RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
2504 }
2505 /* }}} */
2506
2507 /* {{{ Returns ASCII value of character
2508 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
2509 PHP_FUNCTION(ord)
2510 {
2511 zend_string *str;
2512
2513 ZEND_PARSE_PARAMETERS_START(1, 1)
2514 Z_PARAM_STR(str)
2515 ZEND_PARSE_PARAMETERS_END();
2516
2517 RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
2518 }
2519 /* }}} */
2520
2521 /* {{{ Converts ASCII code to a character
2522 Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
2523 PHP_FUNCTION(chr)
2524 {
2525 zend_long c;
2526
2527 ZEND_PARSE_PARAMETERS_START(1, 1)
2528 Z_PARAM_LONG(c)
2529 ZEND_PARSE_PARAMETERS_END();
2530
2531 c &= 0xff;
2532 RETURN_CHAR(c);
2533 }
2534 /* }}} */
2535
2536 /* {{{ php_ucfirst
2537 Uppercase the first character of the word in a native string */
2538 static zend_string* php_ucfirst(zend_string *str)
2539 {
2540 const unsigned char ch = ZSTR_VAL(str)[0];
2541 unsigned char r = toupper(ch);
2542 if (r == ch) {
2543 return zend_string_copy(str);
2544 } else {
2545 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2546 ZSTR_VAL(s)[0] = r;
2547 return s;
2548 }
2549 }
2550 /* }}} */
2551
2552 /* {{{ Makes a string's first character uppercase */
2553 PHP_FUNCTION(ucfirst)
2554 {
2555 zend_string *str;
2556
2557 ZEND_PARSE_PARAMETERS_START(1, 1)
2558 Z_PARAM_STR(str)
2559 ZEND_PARSE_PARAMETERS_END();
2560
2561 if (!ZSTR_LEN(str)) {
2562 RETURN_EMPTY_STRING();
2563 }
2564
2565 RETURN_STR(php_ucfirst(str));
2566 }
2567 /* }}} */
2568
2569 /* {{{
2570 Lowercase the first character of the word in a native string */
2571 static zend_string* php_lcfirst(zend_string *str)
2572 {
2573 unsigned char r = tolower(ZSTR_VAL(str)[0]);
2574 if (r == ZSTR_VAL(str)[0]) {
2575 return zend_string_copy(str);
2576 } else {
2577 zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
2578 ZSTR_VAL(s)[0] = r;
2579 return s;
2580 }
2581 }
2582 /* }}} */
2583
2584 /* {{{ Make a string's first character lowercase */
2585 PHP_FUNCTION(lcfirst)
2586 {
2587 zend_string *str;
2588
2589 ZEND_PARSE_PARAMETERS_START(1, 1)
2590 Z_PARAM_STR(str)
2591 ZEND_PARSE_PARAMETERS_END();
2592
2593 if (!ZSTR_LEN(str)) {
2594 RETURN_EMPTY_STRING();
2595 }
2596
2597 RETURN_STR(php_lcfirst(str));
2598 }
2599 /* }}} */
2600
2601 /* {{{ Uppercase the first character of every word in a string */
2602 PHP_FUNCTION(ucwords)
2603 {
2604 zend_string *str;
2605 char *delims = " \t\r\n\f\v";
2606 register char *r;
2607 register const char *r_end;
2608 size_t delims_len = 6;
2609 char mask[256];
2610
2611 ZEND_PARSE_PARAMETERS_START(1, 2)
2612 Z_PARAM_STR(str)
2613 Z_PARAM_OPTIONAL
2614 Z_PARAM_STRING(delims, delims_len)
2615 ZEND_PARSE_PARAMETERS_END();
2616
2617 if (!ZSTR_LEN(str)) {
2618 RETURN_EMPTY_STRING();
2619 }
2620
2621 php_charmask((const unsigned char *) delims, delims_len, mask);
2622
2623 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
2624 r = Z_STRVAL_P(return_value);
2625
2626 *r = toupper((unsigned char) *r);
2627 for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
2628 if (mask[(unsigned char)*r++]) {
2629 *r = toupper((unsigned char) *r);
2630 }
2631 }
2632 }
2633 /* }}} */
2634
2635 /* {{{ php_strtr */
2636 PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
2637 {
2638 size_t i;
2639
2640 if (UNEXPECTED(trlen < 1)) {
2641 return str;
2642 } else if (trlen == 1) {
2643 char ch_from = *str_from;
2644 char ch_to = *str_to;
2645
2646 for (i = 0; i < len; i++) {
2647 if (str[i] == ch_from) {
2648 str[i] = ch_to;
2649 }
2650 }
2651 } else {
2652 unsigned char xlat[256], j = 0;
2653
2654 do { xlat[j] = j; } while (++j != 0);
2655
2656 for (i = 0; i < trlen; i++) {
2657 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i];
2658 }
2659
2660 for (i = 0; i < len; i++) {
2661 str[i] = xlat[(size_t)(unsigned char) str[i]];
2662 }
2663 }
2664
2665 return str;
2666 }
2667 /* }}} */
2668
2669 /* {{{ php_strtr_ex */
2670 static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
2671 {
2672 zend_string *new_str = NULL;
2673 size_t i;
2674
2675 if (UNEXPECTED(trlen < 1)) {
2676 return zend_string_copy(str);
2677 } else if (trlen == 1) {
2678 char ch_from = *str_from;
2679 char ch_to = *str_to;
2680
2681 for (i = 0; i < ZSTR_LEN(str); i++) {
2682 if (ZSTR_VAL(str)[i] == ch_from) {
2683 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2684 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2685 ZSTR_VAL(new_str)[i] = ch_to;
2686 break;
2687 }
2688 }
2689 for (; i < ZSTR_LEN(str); i++) {
2690 ZSTR_VAL(new_str)[i] = (ZSTR_VAL(str)[i] != ch_from) ? ZSTR_VAL(str)[i] : ch_to;
2691 }
2692 } else {
2693 unsigned char xlat[256], j = 0;
2694
2695 do { xlat[j] = j; } while (++j != 0);
2696
2697 for (i = 0; i < trlen; i++) {
2698 xlat[(size_t)(unsigned char) str_from[i]] = str_to[i];
2699 }
2700
2701 for (i = 0; i < ZSTR_LEN(str); i++) {
2702 if (ZSTR_VAL(str)[i] != xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
2703 new_str = zend_string_alloc(ZSTR_LEN(str), 0);
2704 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
2705 ZSTR_VAL(new_str)[i] = xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
2706 break;
2707 }
2708 }
2709
2710 for (;i < ZSTR_LEN(str); i++) {
2711 ZSTR_VAL(new_str)[i] = xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
2712 }
2713 }
2714
2715 if (!new_str) {
2716 return zend_string_copy(str);
2717 }
2718
2719 ZSTR_VAL(new_str)[ZSTR_LEN(new_str)] = 0;
2720 return new_str;
2721 }
2722 /* }}} */
2723
2724 /* {{{ php_strtr_array */
2725 static void php_strtr_array(zval *return_value, zend_string *input, HashTable *pats)
2726 {
2727 const char *str = ZSTR_VAL(input);
2728 size_t slen = ZSTR_LEN(input);
2729 zend_ulong num_key;
2730 zend_string *str_key;
2731 size_t len, pos, old_pos;
2732 int num_keys = 0;
2733 size_t minlen = 128*1024;
2734 size_t maxlen = 0;
2735 HashTable str_hash;
2736 zval *entry;
2737 const char *key;
2738 smart_str result = {0};
2739 zend_ulong bitset[256/sizeof(zend_ulong)];
2740 zend_ulong *num_bitset;
2741
2742 /* we will collect all possible key lengths */
2743 num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
2744 memset(bitset, 0, sizeof(bitset));
2745
2746 /* check if original array has numeric keys */
2747 ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
2748 if (UNEXPECTED(!str_key)) {
2749 num_keys = 1;
2750 } else {
2751 len = ZSTR_LEN(str_key);
2752 if (UNEXPECTED(len < 1)) {
2753 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
2754 continue;
2755 } else if (UNEXPECTED(len > slen)) {
2756 /* skip long patterns */
2757 continue;
2758 }
2759 if (len > maxlen) {
2760 maxlen = len;
2761 }
2762 if (len < minlen) {
2763 minlen = len;
2764 }
2765 /* remember possible key length */
2766 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2767 bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
2768 }
2769 } ZEND_HASH_FOREACH_END();
2770
2771 if (UNEXPECTED(num_keys)) {
2772 zend_string *key_used;
2773 /* we have to rebuild HashTable with numeric keys */
2774 zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
2775 ZEND_HASH_FOREACH_KEY_VAL_IND(pats, num_key, str_key, entry) {
2776 if (UNEXPECTED(!str_key)) {
2777 key_used = zend_long_to_str(num_key);
2778 len = ZSTR_LEN(key_used);
2779 if (UNEXPECTED(len > slen)) {
2780 /* skip long patterns */
2781 zend_string_release(key_used);
2782 continue;
2783 }
2784 if (len > maxlen) {
2785 maxlen = len;
2786 }
2787 if (len < minlen) {
2788 minlen = len;
2789 }
2790 /* remember possible key length */
2791 num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
2792 bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
2793 } else {
2794 key_used = str_key;
2795 len = ZSTR_LEN(key_used);
2796 if (UNEXPECTED(len > slen)) {
2797 /* skip long patterns */
2798 continue;
2799 }
2800 }
2801 zend_hash_add(&str_hash, key_used, entry);
2802 if (UNEXPECTED(!str_key)) {
2803 zend_string_release_ex(key_used, 0);
2804 }
2805 } ZEND_HASH_FOREACH_END();
2806 pats = &str_hash;
2807 }
2808
2809 if (UNEXPECTED(minlen > maxlen)) {
2810 /* return the original string */
2811 if (pats == &str_hash) {
2812 zend_hash_destroy(&str_hash);
2813 }
2814 efree(num_bitset);
2815 RETURN_STR_COPY(input);
2816 }
2817
2818 old_pos = pos = 0;
2819 while (pos <= slen - minlen) {
2820 key = str + pos;
2821 if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
2822 len = maxlen;
2823 if (len > slen - pos) {
2824 len = slen - pos;
2825 }
2826 while (len >= minlen) {
2827 if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
2828 entry = zend_hash_str_find(pats, key, len);
2829 if (entry != NULL) {
2830 zend_string *tmp;
2831 zend_string *s = zval_get_tmp_string(entry, &tmp);
2832 smart_str_appendl(&result, str + old_pos, pos - old_pos);
2833 smart_str_append(&result, s);
2834 old_pos = pos + len;
2835 pos = old_pos - 1;
2836 zend_tmp_string_release(tmp);
2837 break;
2838 }
2839 }
2840 len--;
2841 }
2842 }
2843 pos++;
2844 }
2845
2846 if (result.s) {
2847 smart_str_appendl(&result, str + old_pos, slen - old_pos);
2848 smart_str_0(&result);
2849 RETVAL_NEW_STR(result.s);
2850 } else {
2851 smart_str_free(&result);
2852 RETVAL_STR_COPY(input);
2853 }
2854
2855 if (pats == &str_hash) {
2856 zend_hash_destroy(&str_hash);
2857 }
2858 efree(num_bitset);
2859 }
2860 /* }}} */
2861
2862 /* {{{ php_char_to_str_ex */
2863 static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, int case_sensitivity, zend_long *replace_count)
2864 {
2865 zend_string *result;
2866 size_t char_count = 0;
2867 int lc_from = 0;
2868 const char *source, *source_end= ZSTR_VAL(str) + ZSTR_LEN(str);
2869 char *target;
2870
2871 if (case_sensitivity) {
2872 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str);
2873 while ((p = memchr(p, from, (e - p)))) {
2874 char_count++;
2875 p++;
2876 }
2877 } else {
2878 lc_from = tolower(from);
2879 for (source = ZSTR_VAL(str); source < source_end; source++) {
2880 if (tolower(*source) == lc_from) {
2881 char_count++;
2882 }
2883 }
2884 }
2885
2886 if (char_count == 0) {
2887 return zend_string_copy(str);
2888 }
2889
2890 if (to_len > 0) {
2891 result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
2892 } else {
2893 result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
2894 }
2895 target = ZSTR_VAL(result);
2896
2897 if (case_sensitivity) {
2898 char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
2899 while ((p = memchr(p, from, (e - p)))) {
2900 memcpy(target, s, (p - s));
2901 target += p - s;
2902 memcpy(target, to, to_len);
2903 target += to_len;
2904 p++;
2905 s = p;
2906 if (replace_count) {
2907 *replace_count += 1;
2908 }
2909 }
2910 if (s < e) {
2911 memcpy(target, s, (e - s));
2912 target += e - s;
2913 }
2914 } else {
2915 for (source = ZSTR_VAL(str); source < source_end; source++) {
2916 if (tolower(*source) == lc_from) {
2917 if (replace_count) {
2918 *replace_count += 1;
2919 }
2920 memcpy(target, to, to_len);
2921 target += to_len;
2922 } else {
2923 *target = *source;
2924 target++;
2925 }
2926 }
2927 }
2928 *target = 0;
2929 return result;
2930 }
2931 /* }}} */
2932
2933 /* {{{ php_str_to_str_ex */
2934 static zend_string *php_str_to_str_ex(zend_string *haystack,
2935 const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
2936 {
2937
2938 if (needle_len < ZSTR_LEN(haystack)) {
2939 zend_string *new_str;
2940 const char *end;
2941 const char *p, *r;
2942 char *e;
2943
2944 if (needle_len == str_len) {
2945 new_str = NULL;
2946 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2947 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
2948 if (!new_str) {
2949 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
2950 }
2951 memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
2952 (*replace_count)++;
2953 }
2954 if (!new_str) {
2955 goto nothing_todo;
2956 }
2957 return new_str;
2958 } else {
2959 size_t count = 0;
2960 const char *o = ZSTR_VAL(haystack);
2961 const char *n = needle;
2962 const char *endp = o + ZSTR_LEN(haystack);
2963
2964 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
2965 o += needle_len;
2966 count++;
2967 }
2968 if (count == 0) {
2969 /* Needle doesn't occur, shortcircuit the actual replacement. */
2970 goto nothing_todo;
2971 }
2972 if (str_len > needle_len) {
2973 new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
2974 } else {
2975 new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
2976 }
2977
2978 e = ZSTR_VAL(new_str);
2979 end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
2980 for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
2981 memcpy(e, p, r - p);
2982 e += r - p;
2983 memcpy(e, str, str_len);
2984 e += str_len;
2985 (*replace_count)++;
2986 }
2987
2988 if (p < end) {
2989 memcpy(e, p, end - p);
2990 e += end - p;
2991 }
2992
2993 *e = '\0';
2994 return new_str;
2995 }
2996 } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
2997 nothing_todo:
2998 return zend_string_copy(haystack);
2999 } else {
3000 (*replace_count)++;
3001 return zend_string_init_fast(str, str_len);
3002 }
3003 }
3004 /* }}} */
3005
3006 /* {{{ php_str_to_str_i_ex */
3007 static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
3008 zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
3009 {
3010 zend_string *new_str = NULL;
3011 zend_string *lc_needle;
3012
3013 if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
3014 const char *end;
3015 const char *p, *r;
3016 char *e;
3017
3018 if (ZSTR_LEN(needle) == str_len) {
3019 lc_needle = php_string_tolower(needle);
3020 end = lc_haystack + ZSTR_LEN(haystack);
3021 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3022 if (!new_str) {
3023 new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
3024 }
3025 memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
3026 (*replace_count)++;
3027 }
3028 zend_string_release_ex(lc_needle, 0);
3029
3030 if (!new_str) {
3031 goto nothing_todo;
3032 }
3033 return new_str;
3034 } else {
3035 size_t count = 0;
3036 const char *o = lc_haystack;
3037 const char *n;
3038 const char *endp = o + ZSTR_LEN(haystack);
3039
3040 lc_needle = php_string_tolower(needle);
3041 n = ZSTR_VAL(lc_needle);
3042
3043 while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
3044 o += ZSTR_LEN(lc_needle);
3045 count++;
3046 }
3047 if (count == 0) {
3048 /* Needle doesn't occur, shortcircuit the actual replacement. */
3049 zend_string_release_ex(lc_needle, 0);
3050 goto nothing_todo;
3051 }
3052
3053 if (str_len > ZSTR_LEN(lc_needle)) {
3054 new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
3055 } else {
3056 new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
3057 }
3058
3059 e = ZSTR_VAL(new_str);
3060 end = lc_haystack + ZSTR_LEN(haystack);
3061
3062 for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
3063 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
3064 e += r - p;
3065 memcpy(e, str, str_len);
3066 e += str_len;
3067 (*replace_count)++;
3068 }
3069
3070 if (p < end) {
3071 memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
3072 e += end - p;
3073 }
3074 *e = '\0';
3075
3076 zend_string_release_ex(lc_needle, 0);
3077
3078 return new_str;
3079 }
3080 } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
3081 nothing_todo:
3082 return zend_string_copy(haystack);
3083 } else {
3084 lc_needle = php_string_tolower(needle);
3085
3086 if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
3087 zend_string_release_ex(lc_needle, 0);
3088 goto nothing_todo;
3089 }
3090 zend_string_release_ex(lc_needle, 0);
3091
3092 new_str = zend_string_init(str, str_len, 0);
3093
3094 (*replace_count)++;
3095 return new_str;
3096 }
3097 }
3098 /* }}} */
3099
3100 /* {{{ php_str_to_str */
3101 PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
3102 {
3103 zend_string *new_str;
3104
3105 if (needle_len < length) {
3106 const char *end;
3107 const char *s, *p;
3108 char *e, *r;
3109
3110 if (needle_len == str_len) {
3111 new_str = zend_string_init(haystack, length, 0);
3112 end = ZSTR_VAL(new_str) + length;
3113 for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3114 memcpy(r, str, str_len);
3115 }
3116 return new_str;
3117 } else {
3118 if (str_len < needle_len) {
3119 new_str = zend_string_alloc(length, 0);
3120 } else {
3121 size_t count = 0;
3122 const char *o = haystack;
3123 const char *n = needle;
3124 const char *endp = o + length;
3125
3126 while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
3127 o += needle_len;
3128 count++;
3129 }
3130 if (count == 0) {
3131 /* Needle doesn't occur, shortcircuit the actual replacement. */
3132 new_str = zend_string_init(haystack, length, 0);
3133 return new_str;
3134 } else {
3135 if (str_len > needle_len) {
3136 new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
3137 } else {
3138 new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
3139 }
3140 }
3141 }
3142
3143 s = e = ZSTR_VAL(new_str);
3144 end = haystack + length;
3145 for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
3146 memcpy(e, p, r - p);
3147 e += r - p;
3148 memcpy(e, str, str_len);
3149 e += str_len;
3150 }
3151
3152 if (p < end) {
3153 memcpy(e, p, end - p);
3154 e += end - p;
3155 }
3156
3157 *e = '\0';
3158 new_str = zend_string_truncate(new_str, e - s, 0);
3159 return new_str;
3160 }
3161 } else if (needle_len > length || memcmp(haystack, needle, length)) {
3162 new_str = zend_string_init(haystack, length, 0);
3163 return new_str;
3164 } else {
3165 new_str = zend_string_init(str, str_len, 0);
3166
3167 return new_str;
3168 }
3169 }
3170 /* }}} */
3171
3172 /* {{{ Translates characters in str using given translation tables */
3173 PHP_FUNCTION(strtr)
3174 {
3175 zend_string *str, *from_str = NULL;
3176 HashTable *from_ht = NULL;
3177 char *to = NULL;
3178 size_t to_len = 0;
3179
3180 ZEND_PARSE_PARAMETERS_START(2, 3)
3181 Z_PARAM_STR(str)
3182 Z_PARAM_ARRAY_HT_OR_STR(from_ht, from_str)
3183 Z_PARAM_OPTIONAL
3184 Z_PARAM_STRING_OR_NULL(to, to_len)
3185 ZEND_PARSE_PARAMETERS_END();
3186
3187 if (!to && from_ht == NULL) {
3188 zend_argument_type_error(2, "must be of type array, string given");
3189 RETURN_THROWS();
3190 } else if (to && from_str == NULL) {
3191 zend_argument_type_error(2, "must be of type string, array given");
3192 RETURN_THROWS();
3193 }
3194
3195 /* shortcut for empty string */
3196 if (ZSTR_LEN(str) == 0) {
3197 RETURN_EMPTY_STRING();
3198 }
3199
3200 if (!to) {
3201 if (zend_hash_num_elements(from_ht) < 1) {
3202 RETURN_STR_COPY(str);
3203 } else if (zend_hash_num_elements(from_ht) == 1) {
3204 zend_long num_key;
3205 zend_string *str_key, *tmp_str, *replace, *tmp_replace;
3206 zval *entry;
3207
3208 ZEND_HASH_FOREACH_KEY_VAL_IND(from_ht, num_key, str_key, entry) {
3209 tmp_str = NULL;
3210 if (UNEXPECTED(!str_key)) {
3211 str_key = tmp_str = zend_long_to_str(num_key);
3212 }
3213 replace = zval_get_tmp_string(entry, &tmp_replace);
3214 if (ZSTR_LEN(str_key) < 1) {
3215 php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
3216 RETVAL_STR_COPY(str);
3217 } else if (ZSTR_LEN(str_key) == 1) {
3218 RETVAL_STR(php_char_to_str_ex(str,
3219 ZSTR_VAL(str_key)[0],
3220 ZSTR_VAL(replace),
3221 ZSTR_LEN(replace),
3222 1,
3223 NULL));
3224 } else {
3225 zend_long dummy;
3226 RETVAL_STR(php_str_to_str_ex(str,
3227 ZSTR_VAL(str_key), ZSTR_LEN(str_key),
3228 ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
3229 }
3230 zend_tmp_string_release(tmp_str);
3231 zend_tmp_string_release(tmp_replace);
3232 return;
3233 } ZEND_HASH_FOREACH_END();
3234 } else {
3235 php_strtr_array(return_value, str, from_ht);
3236 }
3237 } else {
3238 RETURN_STR(php_strtr_ex(str,
3239 ZSTR_VAL(from_str),
3240 to,
3241 MIN(ZSTR_LEN(from_str), to_len)));
3242 }
3243 }
3244 /* }}} */
3245
3246 /* {{{ Reverse a string */
3247 #if ZEND_INTRIN_SSSE3_NATIVE
3248 #include <tmmintrin.h>
3249 #elif defined(__aarch64__)
3250 #include <arm_neon.h>
3251 #endif
3252 PHP_FUNCTION(strrev)
3253 {
3254 zend_string *str;
3255 const char *s, *e;
3256 char *p;
3257 zend_string *n;
3258
3259 ZEND_PARSE_PARAMETERS_START(1, 1)
3260 Z_PARAM_STR(str)
3261 ZEND_PARSE_PARAMETERS_END();
3262
3263 n = zend_string_alloc(ZSTR_LEN(str), 0);
3264 p = ZSTR_VAL(n);
3265
3266 s = ZSTR_VAL(str);
3267 e = s + ZSTR_LEN(str);
3268 --e;
3269 #if ZEND_INTRIN_SSSE3_NATIVE
3270 if (e - s > 15) {
3271 const __m128i map = _mm_set_epi8(
3272 0, 1, 2, 3,
3273 4, 5, 6, 7,
3274 8, 9, 10, 11,
3275 12, 13, 14, 15);
3276 do {
3277 const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
3278 _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
3279 p += 16;
3280 e -= 16;
3281 } while (e - s > 15);
3282 }
3283 #elif defined(__aarch64__)
3284 if (e - s > 15) {
3285 do {
3286 const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
3287 /* Synthesize rev128 with a rev64 + ext. */
3288 const uint8x16_t rev = vrev64q_u8(str);
3289 const uint8x16_t ext = (uint8x16_t)
3290 vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
3291 vst1q_u8((uint8_t *)p, ext);
3292 p += 16;
3293 e -= 16;
3294 } while (e - s > 15);
3295 }
3296 #endif
3297 while (e >= s) {
3298 *p++ = *e--;
3299 }
3300
3301 *p = '\0';
3302
3303 RETVAL_NEW_STR(n);
3304 }
3305 /* }}} */
3306
3307 /* {{{ php_similar_str */
3308 static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
3309 {
3310 const char *p, *q;
3311 const char *end1 = (char *) txt1 + len1;
3312 const char *end2 = (char *) txt2 + len2;
3313 size_t l;
3314
3315 *max = 0;
3316 *count = 0;
3317 for (p = (char *) txt1; p < end1; p++) {
3318 for (q = (char *) txt2; q < end2; q++) {
3319 for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
3320 if (l > *max) {
3321 *max = l;
3322 *count += 1;
3323 *pos1 = p - txt1;
3324 *pos2 = q - txt2;
3325 }
3326 }
3327 }
3328 }
3329 /* }}} */
3330
3331 /* {{{ php_similar_char */
3332 static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
3333 {
3334 size_t sum;
3335 size_t pos1 = 0, pos2 = 0, max, count;
3336
3337 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
3338 if ((sum = max)) {
3339 if (pos1 && pos2 && count > 1) {
3340 sum += php_similar_char(txt1, pos1,
3341 txt2, pos2);
3342 }
3343 if ((pos1 + max < len1) && (pos2 + max < len2)) {
3344 sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
3345 txt2 + pos2 + max, len2 - pos2 - max);
3346 }
3347 }
3348
3349 return sum;
3350 }
3351 /* }}} */
3352
3353 /* {{{ Calculates the similarity between two strings */
3354 PHP_FUNCTION(similar_text)
3355 {
3356 zend_string *t1, *t2;
3357 zval *percent = NULL;
3358 int ac = ZEND_NUM_ARGS();
3359 size_t sim;
3360
3361 ZEND_PARSE_PARAMETERS_START(2, 3)
3362 Z_PARAM_STR(t1)
3363 Z_PARAM_STR(t2)
3364 Z_PARAM_OPTIONAL
3365 Z_PARAM_ZVAL(percent)
3366 ZEND_PARSE_PARAMETERS_END();
3367
3368 if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
3369 if (ac > 2) {
3370 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
3371 }
3372
3373 RETURN_LONG(0);
3374 }
3375
3376 sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
3377
3378 if (ac > 2) {
3379 ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
3380 }
3381
3382 RETURN_LONG(sim);
3383 }
3384 /* }}} */
3385
3386 /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
3387 PHP_FUNCTION(addcslashes)
3388 {
3389 zend_string *str, *what;
3390
3391 ZEND_PARSE_PARAMETERS_START(2, 2)
3392 Z_PARAM_STR(str)
3393 Z_PARAM_STR(what)
3394 ZEND_PARSE_PARAMETERS_END();
3395
3396 if (ZSTR_LEN(str) == 0) {
3397 RETURN_EMPTY_STRING();
3398 }
3399
3400 if (ZSTR_LEN(what) == 0) {
3401 RETURN_STR_COPY(str);
3402 }
3403
3404 RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
3405 }
3406 /* }}} */
3407
3408 /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
3409 PHP_FUNCTION(addslashes)
3410 {
3411 zend_string *str;
3412
3413 ZEND_PARSE_PARAMETERS_START(1, 1)
3414 Z_PARAM_STR(str)
3415 ZEND_PARSE_PARAMETERS_END();
3416
3417 if (ZSTR_LEN(str) == 0) {
3418 RETURN_EMPTY_STRING();
3419 }
3420
3421 RETURN_STR(php_addslashes(str));
3422 }
3423 /* }}} */
3424
3425 /* {{{ Strips backslashes from a string. Uses C-style conventions */
3426 PHP_FUNCTION(stripcslashes)
3427 {
3428 zend_string *str;
3429
3430 ZEND_PARSE_PARAMETERS_START(1, 1)
3431 Z_PARAM_STR(str)
3432 ZEND_PARSE_PARAMETERS_END();
3433
3434 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3435 php_stripcslashes(Z_STR_P(return_value));
3436 }
3437 /* }}} */
3438
3439 /* {{{ Strips backslashes from a string */
3440 PHP_FUNCTION(stripslashes)
3441 {
3442 zend_string *str;
3443
3444 ZEND_PARSE_PARAMETERS_START(1, 1)
3445 Z_PARAM_STR(str)
3446 ZEND_PARSE_PARAMETERS_END();
3447
3448 ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
3449 php_stripslashes(Z_STR_P(return_value));
3450 }
3451 /* }}} */
3452
3453 /* {{{ php_stripcslashes */
3454 PHPAPI void php_stripcslashes(zend_string *str)
3455 {
3456 const char *source, *end;
3457 char *target;
3458 size_t nlen = ZSTR_LEN(str), i;
3459 char numtmp[4];
3460
3461 for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
3462 if (*source == '\\' && source + 1 < end) {
3463 source++;
3464 switch (*source) {
3465 case 'n': *target++='\n'; nlen--; break;
3466 case 'r': *target++='\r'; nlen--; break;
3467 case 'a': *target++='\a'; nlen--; break;
3468 case 't': *target++='\t'; nlen--; break;
3469 case 'v': *target++='\v'; nlen--; break;
3470 case 'b': *target++='\b'; nlen--; break;
3471 case 'f': *target++='\f'; nlen--; break;
3472 case '\\': *target++='\\'; nlen--; break;
3473 case 'x':
3474 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3475 numtmp[0] = *++source;
3476 if (source+1 < end && isxdigit((int)(*(source+1)))) {
3477 numtmp[1] = *++source;
3478 numtmp[2] = '\0';
3479 nlen-=3;
3480 } else {
3481 numtmp[1] = '\0';
3482 nlen-=2;
3483 }
3484 *target++=(char)strtol(numtmp, NULL, 16);
3485 break;
3486 }
3487 /* break is left intentionally */
3488 default:
3489 i=0;
3490 while (source < end && *source >= '0' && *source <= '7' && i<3) {
3491 numtmp[i++] = *source++;
3492 }
3493 if (i) {
3494 numtmp[i]='\0';
3495 *target++=(char)strtol(numtmp, NULL, 8);
3496 nlen-=i;
3497 source--;
3498 } else {
3499 *target++=*source;
3500 nlen--;
3501 }
3502 }
3503 } else {
3504 *target++=*source;
3505 }
3506 }
3507
3508 if (nlen != 0) {
3509 *target='\0';
3510 }
3511
3512 ZSTR_LEN(str) = nlen;
3513 }
3514 /* }}} */
3515
3516 /* {{{ php_addcslashes_str */
3517 PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
3518 {
3519 char flags[256];
3520 char *target;
3521 const char *source, *end;
3522 char c;
3523 size_t newlen;
3524 zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
3525
3526 php_charmask((const unsigned char *) what, wlength, flags);
3527
3528 for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
3529 c = *source;
3530 if (flags[(unsigned char)c]) {
3531 if ((unsigned char) c < 32 || (unsigned char) c > 126) {
3532 *target++ = '\\';
3533 switch (c) {
3534 case '\n': *target++ = 'n'; break;
3535 case '\t': *target++ = 't'; break;
3536 case '\r': *target++ = 'r'; break;
3537 case '\a': *target++ = 'a'; break;
3538 case '\v': *target++ = 'v'; break;
3539 case '\b': *target++ = 'b'; break;
3540 case '\f': *target++ = 'f'; break;
3541 default: target += sprintf(target, "%03o", (unsigned char) c);
3542 }
3543 continue;
3544 }
3545 *target++ = '\\';
3546 }
3547 *target++ = c;
3548 }
3549 *target = 0;
3550 newlen = target - ZSTR_VAL(new_str);
3551 if (newlen < len * 4) {
3552 new_str = zend_string_truncate(new_str, newlen, 0);
3553 }
3554 return new_str;
3555 }
3556 /* }}} */
3557
3558 /* {{{ php_addcslashes */
3559 PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
3560 {
3561 return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
3562 }
3563 /* }}} */
3564
3565 /* {{{ php_addslashes */
3566
3567 #if ZEND_INTRIN_SSE4_2_NATIVE
3568 # include <nmmintrin.h>
3569 # include "Zend/zend_bitset.h"
3570 #elif ZEND_INTRIN_SSE4_2_RESOLVER
3571 # include <nmmintrin.h>
3572 # include "Zend/zend_bitset.h"
3573 # include "Zend/zend_cpuinfo.h"
3574
3575 ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
3576 zend_string *php_addslashes_default(zend_string *str);
3577
3578 ZEND_INTRIN_SSE4_2_FUNC_DECL(void php_stripslashes_sse42(zend_string *str));
3579 void php_stripslashes_default(zend_string *str);
3580
3581 # if ZEND_INTRIN_SSE4_2_FUNC_PROTO
3582 PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
3583 PHPAPI void php_stripslashes(zend_string *str) __attribute__((ifunc("resolve_stripslashes")));
3584
3585 typedef zend_string *(*php_addslashes_func_t)(zend_string *);
3586 typedef void (*php_stripslashes_func_t)(zend_string *);
3587
3588 ZEND_NO_SANITIZE_ADDRESS
3589 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3590 static php_addslashes_func_t resolve_addslashes() {
3591 if (zend_cpu_supports_sse42()) {
3592 return php_addslashes_sse42;
3593 }
3594 return php_addslashes_default;
3595 }
3596
3597 ZEND_NO_SANITIZE_ADDRESS
3598 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
3599 static php_stripslashes_func_t resolve_stripslashes() {
3600 if (zend_cpu_supports_sse42()) {
3601 return php_stripslashes_sse42;
3602 }
3603 return php_stripslashes_default;
3604 }
3605 # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
3606
3607 static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
3608 static void (*php_stripslashes_ptr)(zend_string *str) = NULL;
3609
3610 PHPAPI zend_string *php_addslashes(zend_string *str) {
3611 return php_addslashes_ptr(str);
3612 }
3613 PHPAPI void php_stripslashes(zend_string *str) {
3614 php_stripslashes_ptr(str);
3615 }
3616
3617 /* {{{ PHP_MINIT_FUNCTION */
3618 PHP_MINIT_FUNCTION(string_intrin)
3619 {
3620 if (zend_cpu_supports_sse42()) {
3621 php_addslashes_ptr = php_addslashes_sse42;
3622 php_stripslashes_ptr = php_stripslashes_sse42;
3623 } else {
3624 php_addslashes_ptr = php_addslashes_default;
3625 php_stripslashes_ptr = php_stripslashes_default;
3626 }
3627 return SUCCESS;
3628 }
3629 /* }}} */
3630 # endif
3631 #endif
3632
3633 #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
3634 # if ZEND_INTRIN_SSE4_2_NATIVE
3635 PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
3636 # elif ZEND_INTRIN_SSE4_2_RESOLVER
3637 zend_string *php_addslashes_sse42(zend_string *str)
3638 # endif
3639 {
3640 ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
3641 __m128i w128, s128;
3642 uint32_t res = 0;
3643 /* maximum string length, worst case situation */
3644 char *target;
3645 const char *source, *end;
3646 size_t offset;
3647 zend_string *new_str;
3648
3649 if (!str) {
3650 return ZSTR_EMPTY_ALLOC();
3651 }
3652
3653 source = ZSTR_VAL(str);
3654 end = source + ZSTR_LEN(str);
3655
3656 if (ZSTR_LEN(str) > 15) {
3657 w128 = _mm_load_si128((__m128i *)slashchars);
3658 do {
3659 s128 = _mm_loadu_si128((__m128i *)source);
3660 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3661 if (res) {
3662 goto do_escape;
3663 }
3664 source += 16;
3665 } while ((end - source) > 15);
3666 }
3667
3668 while (source < end) {
3669 switch (*source) {
3670 case '\0':
3671 case '\'':
3672 case '\"':
3673 case '\\':
3674 goto do_escape;
3675 default:
3676 source++;
3677 break;
3678 }
3679 }
3680
3681 return zend_string_copy(str);
3682
3683 do_escape:
3684 offset = source - (char *)ZSTR_VAL(str);
3685 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3686 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3687 target = ZSTR_VAL(new_str) + offset;
3688
3689 if (res) {
3690 int pos = 0;
3691 do {
3692 int i, n = zend_ulong_ntz(res);
3693 for (i = 0; i < n; i++) {
3694 *target++ = source[pos + i];
3695 }
3696 pos += n;
3697 *target++ = '\\';
3698 if (source[pos] == '\0') {
3699 *target++ = '0';
3700 } else {
3701 *target++ = source[pos];
3702 }
3703 pos++;
3704 res = res >> (n + 1);
3705 } while (res);
3706
3707 for (; pos < 16; pos++) {
3708 *target++ = source[pos];
3709 }
3710 source += 16;
3711 } else if (end - source > 15) {
3712 w128 = _mm_load_si128((__m128i *)slashchars);
3713 }
3714
3715 for (; end - source > 15; source += 16) {
3716 int pos = 0;
3717 s128 = _mm_loadu_si128((__m128i *)source);
3718 res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
3719 if (res) {
3720 do {
3721 int i, n = zend_ulong_ntz(res);
3722 for (i = 0; i < n; i++) {
3723 *target++ = source[pos + i];
3724 }
3725 pos += n;
3726 *target++ = '\\';
3727 if (source[pos] == '\0') {
3728 *target++ = '0';
3729 } else {
3730 *target++ = source[pos];
3731 }
3732 pos++;
3733 res = res >> (n + 1);
3734 } while (res);
3735 for (; pos < 16; pos++) {
3736 *target++ = source[pos];
3737 }
3738 } else {
3739 _mm_storeu_si128((__m128i*)target, s128);
3740 target += 16;
3741 }
3742 }
3743
3744 while (source < end) {
3745 switch (*source) {
3746 case '\0':
3747 *target++ = '\\';
3748 *target++ = '0';
3749 break;
3750 case '\'':
3751 case '\"':
3752 case '\\':
3753 *target++ = '\\';
3754 /* break is missing *intentionally* */
3755 default:
3756 *target++ = *source;
3757 break;
3758 }
3759 source++;
3760 }
3761
3762 *target = '\0';
3763
3764 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3765 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3766 } else {
3767 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3768 }
3769
3770 return new_str;
3771 }
3772 /* }}} */
3773 #endif
3774
3775 #ifdef __aarch64__
3776 typedef union {
3777 uint8_t mem[16];
3778 uint64_t dw[2];
3779 } quad_word;
3780
3781 static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
3782 uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
3783 uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
3784 uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
3785 uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
3786 uint8x16_t s01 = vorrq_u8(s0, s1);
3787 uint8x16_t s23 = vorrq_u8(s2, s3);
3788 uint8x16_t s0123 = vorrq_u8(s01, s23);
3789 quad_word qw;
3790 vst1q_u8(qw.mem, s0123);
3791 return qw;
3792 }
3793
3794 static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
3795 {
3796 int i = 0;
3797 for (; i < 16; i++) {
3798 char s = source[i];
3799 if (res.mem[i] == 0)
3800 *target++ = s;
3801 else {
3802 *target++ = '\\';
3803 if (s == '\0')
3804 *target++ = '0';
3805 else
3806 *target++ = s;
3807 }
3808 }
3809 return target;
3810 }
3811 #endif /* __aarch64__ */
3812
3813 #if !ZEND_INTRIN_SSE4_2_NATIVE
3814 # if ZEND_INTRIN_SSE4_2_RESOLVER
3815 zend_string *php_addslashes_default(zend_string *str) /* {{{ */
3816 # else
3817 PHPAPI zend_string *php_addslashes(zend_string *str)
3818 # endif
3819 {
3820 /* maximum string length, worst case situation */
3821 char *target;
3822 const char *source, *end;
3823 size_t offset;
3824 zend_string *new_str;
3825
3826 if (!str) {
3827 return ZSTR_EMPTY_ALLOC();
3828 }
3829
3830 source = ZSTR_VAL(str);
3831 end = source + ZSTR_LEN(str);
3832
3833 # ifdef __aarch64__
3834 quad_word res = {0};
3835 if (ZSTR_LEN(str) > 15) {
3836 do {
3837 res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
3838 if (res.dw[0] | res.dw[1])
3839 goto do_escape;
3840 source += 16;
3841 } while ((end - source) > 15);
3842 }
3843 /* Finish the last 15 bytes or less with the scalar loop. */
3844 # endif /* __aarch64__ */
3845
3846 while (source < end) {
3847 switch (*source) {
3848 case '\0':
3849 case '\'':
3850 case '\"':
3851 case '\\':
3852 goto do_escape;
3853 default:
3854 source++;
3855 break;
3856 }
3857 }
3858
3859 return zend_string_copy(str);
3860
3861 do_escape:
3862 offset = source - (char *)ZSTR_VAL(str);
3863 new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
3864 memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
3865 target = ZSTR_VAL(new_str) + offset;
3866
3867 # ifdef __aarch64__
3868 if (res.dw[0] | res.dw[1]) {
3869 target = aarch64_add_slashes(res, source, target);
3870 source += 16;
3871 }
3872 for (; end - source > 15; source += 16) {
3873 uint8x16_t x = vld1q_u8((uint8_t *)source);
3874 res = aarch64_contains_slash_chars(x);
3875 if (res.dw[0] | res.dw[1]) {
3876 target = aarch64_add_slashes(res, source, target);
3877 } else {
3878 vst1q_u8((uint8_t*)target, x);
3879 target += 16;
3880 }
3881 }
3882 /* Finish the last 15 bytes or less with the scalar loop. */
3883 # endif /* __aarch64__ */
3884
3885 while (source < end) {
3886 switch (*source) {
3887 case '\0':
3888 *target++ = '\\';
3889 *target++ = '0';
3890 break;
3891 case '\'':
3892 case '\"':
3893 case '\\':
3894 *target++ = '\\';
3895 /* break is missing *intentionally* */
3896 default:
3897 *target++ = *source;
3898 break;
3899 }
3900 source++;
3901 }
3902
3903 *target = '\0';
3904
3905 if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
3906 new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
3907 } else {
3908 ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
3909 }
3910
3911 return new_str;
3912 }
3913 #endif
3914 /* }}} */
3915 /* }}} */
3916
3917 /* {{{ php_stripslashes
3918 *
3919 * be careful, this edits the string in-place */
3920 static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
3921 {
3922 #ifdef __aarch64__
3923 while (len > 15) {
3924 uint8x16_t x = vld1q_u8((uint8_t *)str);
3925 quad_word q;
3926 vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
3927 if (q.dw[0] | q.dw[1]) {
3928 int i = 0;
3929 for (; i < 16; i++) {
3930 if (q.mem[i] == 0) {
3931 *out++ = str[i];
3932 continue;
3933 }
3934
3935 i++; /* skip the slash */
3936 char s = str[i];
3937 if (s == '0')
3938 *out++ = '\0';
3939 else
3940 *out++ = s; /* preserve the next character */
3941 }
3942 str += i;
3943 len -= i;
3944 } else {
3945 vst1q_u8((uint8_t*)out, x);
3946 out += 16;
3947 str += 16;
3948 len -= 16;
3949 }
3950 }
3951 /* Finish the last 15 bytes or less with the scalar loop. */
3952 #endif /* __aarch64__ */
3953 while (len > 0) {
3954 if (*str == '\\') {
3955 str++; /* skip the slash */
3956 len--;
3957 if (len > 0) {
3958 if (*str == '0') {
3959 *out++='\0';
3960 str++;
3961 } else {
3962 *out++ = *str++; /* preserve the next character */
3963 }
3964 len--;
3965 }
3966 } else {
3967 *out++ = *str++;
3968 len--;
3969 }
3970 }
3971
3972 return out;
3973 }
3974
3975 #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
3976 # if ZEND_INTRIN_SSE4_2_NATIVE
3977 PHPAPI void php_stripslashes(zend_string *str)
3978 # elif ZEND_INTRIN_SSE4_2_RESOLVER
3979 void php_stripslashes_sse42(zend_string *str)
3980 # endif
3981 {
3982 const char *s = ZSTR_VAL(str);
3983 char *t = ZSTR_VAL(str);
3984 size_t l = ZSTR_LEN(str);
3985
3986 if (l > 15) {
3987 const __m128i slash = _mm_set1_epi8('\\');
3988
3989 do {
3990 __m128i in = _mm_loadu_si128((__m128i *)s);
3991 __m128i any_slash = _mm_cmpeq_epi8(in, slash);
3992 uint32_t res = _mm_movemask_epi8(any_slash);
3993
3994 if (res) {
3995 int i, n = zend_ulong_ntz(res);
3996 const char *e = s + 15;
3997 l -= n;
3998 for (i = 0; i < n; i++) {
3999 *t++ = *s++;
4000 }
4001 for (; s < e; s++) {
4002 if (*s == '\\') {
4003 s++;
4004 l--;
4005 if (*s == '0') {
4006 *t = '\0';
4007 } else {
4008 *t = *s;
4009 }
4010 } else {
4011 *t = *s;
4012 }
4013 t++;
4014 l--;
4015 }
4016 } else {
4017 _mm_storeu_si128((__m128i *)t, in);
4018 s += 16;
4019 t += 16;
4020 l -= 16;
4021 }
4022 } while (l > 15);
4023 }
4024
4025 t = php_stripslashes_impl(s, t, l);
4026 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4027 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4028 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4029 }
4030 }
4031 #endif
4032
4033 #if !ZEND_INTRIN_SSE4_2_NATIVE
4034 # if ZEND_INTRIN_SSE4_2_RESOLVER
4035 void php_stripslashes_default(zend_string *str) /* {{{ */
4036 # else
4037 PHPAPI void php_stripslashes(zend_string *str)
4038 # endif
4039 {
4040 const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
4041 if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
4042 ZSTR_LEN(str) = t - ZSTR_VAL(str);
4043 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
4044 }
4045 }
4046 /* }}} */
4047 #endif
4048 /* }}} */
4049
4050 #define _HEB_BLOCK_TYPE_ENG 1
4051 #define _HEB_BLOCK_TYPE_HEB 2
4052 #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
4053 #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
4054 #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
4055
4056 /* {{{ php_str_replace_in_subject */
4057 static zend_long php_str_replace_in_subject(
4058 zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
4059 zend_string *subject_str, zval *result, int case_sensitivity
4060 ) {
4061 zval *search_entry;
4062 zend_string *tmp_result;
4063 char *replace_value = NULL;
4064 size_t replace_len = 0;
4065 zend_long replace_count = 0;
4066 zend_string *lc_subject_str = NULL;
4067 uint32_t replace_idx;
4068
4069 if (ZSTR_LEN(subject_str) == 0) {
4070 ZVAL_EMPTY_STRING(result);
4071 return 0;
4072 }
4073
4074 /* If search is an array */
4075 if (search_ht) {
4076 /* Duplicate subject string for repeated replacement */
4077 zend_string_addref(subject_str);
4078
4079 if (replace_ht) {
4080 replace_idx = 0;
4081 } else {
4082 /* Set replacement value to the passed one */
4083 replace_value = ZSTR_VAL(replace_str);
4084 replace_len = ZSTR_LEN(replace_str);
4085 }
4086
4087 /* For each entry in the search array, get the entry */
4088 ZEND_HASH_FOREACH_VAL_IND(search_ht, search_entry) {
4089 /* Make sure we're dealing with strings. */
4090 zend_string *tmp_search_str;
4091 zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
4092 zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
4093
4094 /* If replace is an array. */
4095 if (replace_ht) {
4096 /* Get current entry */
4097 zval *replace_entry = NULL;
4098 while (replace_idx < replace_ht->nNumUsed) {
4099 replace_entry = &replace_ht->arData[replace_idx].val;
4100 if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
4101 break;
4102 }
4103 replace_idx++;
4104 }
4105 if (replace_idx < replace_ht->nNumUsed) {
4106 /* Make sure we're dealing with strings. */
4107 replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
4108
4109 /* Set replacement value to the one we got from array */
4110 replace_value = ZSTR_VAL(replace_entry_str);
4111 replace_len = ZSTR_LEN(replace_entry_str);
4112
4113 replace_idx++;
4114 } else {
4115 /* We've run out of replacement strings, so use an empty one. */
4116 replace_value = "";
4117 replace_len = 0;
4118 }
4119 }
4120
4121 if (ZSTR_LEN(search_str) == 1) {
4122 zend_long old_replace_count = replace_count;
4123
4124 tmp_result = php_char_to_str_ex(subject_str,
4125 ZSTR_VAL(search_str)[0],
4126 replace_value,
4127 replace_len,
4128 case_sensitivity,
4129 &replace_count);
4130 if (lc_subject_str && replace_count != old_replace_count) {
4131 zend_string_release_ex(lc_subject_str, 0);
4132 lc_subject_str = NULL;
4133 }
4134 } else if (ZSTR_LEN(search_str) > 1) {
4135 if (case_sensitivity) {
4136 tmp_result = php_str_to_str_ex(subject_str,
4137 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4138 replace_value, replace_len, &replace_count);
4139 } else {
4140 zend_long old_replace_count = replace_count;
4141
4142 if (!lc_subject_str) {
4143 lc_subject_str = php_string_tolower(subject_str);
4144 }
4145 tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4146 search_str, replace_value, replace_len, &replace_count);
4147 if (replace_count != old_replace_count) {
4148 zend_string_release_ex(lc_subject_str, 0);
4149 lc_subject_str = NULL;
4150 }
4151 }
4152 } else {
4153 zend_tmp_string_release(tmp_search_str);
4154 zend_tmp_string_release(tmp_replace_entry_str);
4155 continue;
4156 }
4157
4158 zend_tmp_string_release(tmp_search_str);
4159 zend_tmp_string_release(tmp_replace_entry_str);
4160
4161 if (subject_str == tmp_result) {
4162 zend_string_delref(subject_str);
4163 } else {
4164 zend_string_release_ex(subject_str, 0);
4165 subject_str = tmp_result;
4166 if (ZSTR_LEN(subject_str) == 0) {
4167 zend_string_release_ex(subject_str, 0);
4168 ZVAL_EMPTY_STRING(result);
4169 if (lc_subject_str) {
4170 zend_string_release_ex(lc_subject_str, 0);
4171 }
4172 return replace_count;
4173 }
4174 }
4175 } ZEND_HASH_FOREACH_END();
4176 ZVAL_STR(result, subject_str);
4177 if (lc_subject_str) {
4178 zend_string_release_ex(lc_subject_str, 0);
4179 }
4180 } else {
4181 ZEND_ASSERT(search_str);
4182 if (ZSTR_LEN(search_str) == 1) {
4183 ZVAL_STR(result,
4184 php_char_to_str_ex(subject_str,
4185 ZSTR_VAL(search_str)[0],
4186 ZSTR_VAL(replace_str),
4187 ZSTR_LEN(replace_str),
4188 case_sensitivity,
4189 &replace_count));
4190 } else if (ZSTR_LEN(search_str) > 1) {
4191 if (case_sensitivity) {
4192 ZVAL_STR(result, php_str_to_str_ex(subject_str,
4193 ZSTR_VAL(search_str), ZSTR_LEN(search_str),
4194 ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4195 } else {
4196 lc_subject_str = php_string_tolower(subject_str);
4197 ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
4198 search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
4199 zend_string_release_ex(lc_subject_str, 0);
4200 }
4201 } else {
4202 ZVAL_STR_COPY(result, subject_str);
4203 }
4204 }
4205 return replace_count;
4206 }
4207 /* }}} */
4208
4209 /* {{{ php_str_replace_common */
4210 static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, int case_sensitivity)
4211 {
4212 zend_string *search_str;
4213 HashTable *search_ht;
4214 zend_string *replace_str;
4215 HashTable *replace_ht;
4216 zend_string *subject_str;
4217 HashTable *subject_ht;
4218 zval *subject_entry, *zcount = NULL;
4219 zval result;
4220 zend_string *string_key;
4221 zend_ulong num_key;
4222 zend_long count = 0;
4223
4224 ZEND_PARSE_PARAMETERS_START(3, 4)
4225 Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
4226 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
4227 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
4228 Z_PARAM_OPTIONAL
4229 Z_PARAM_ZVAL(zcount)
4230 ZEND_PARSE_PARAMETERS_END();
4231
4232 /* Make sure we're dealing with strings and do the replacement. */
4233 if (search_str && replace_ht) {
4234 zend_argument_type_error(2, "must be of type %s when argument #1 ($search) is %s",
4235 search_str ? "string" : "array", search_str ? "a string" : "an array"
4236 );
4237 RETURN_THROWS();
4238 }
4239
4240 /* if subject is an array */
4241 if (subject_ht) {
4242 array_init(return_value);
4243
4244 /* For each subject entry, convert it to string, then perform replacement
4245 and add the result to the return_value array. */
4246 ZEND_HASH_FOREACH_KEY_VAL_IND(subject_ht, num_key, string_key, subject_entry) {
4247 zend_string *tmp_subject_str;
4248 ZVAL_DEREF(subject_entry);
4249 subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
4250 count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
4251 zend_tmp_string_release(tmp_subject_str);
4252
4253 /* Add to return array */
4254 if (string_key) {
4255 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
4256 } else {
4257 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
4258 }
4259 } ZEND_HASH_FOREACH_END();
4260 } else { /* if subject is not an array */
4261 count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
4262 }
4263 if (zcount) {
4264 ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
4265 }
4266 }
4267 /* }}} */
4268
4269 /* {{{ Replaces all occurrences of search in haystack with replace */
4270 PHP_FUNCTION(str_replace)
4271 {
4272 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4273 }
4274 /* }}} */
4275
4276 /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
4277 PHP_FUNCTION(str_ireplace)
4278 {
4279 php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4280 }
4281 /* }}} */
4282
4283 /* {{{ Converts logical Hebrew text to visual text */
4284 PHP_FUNCTION(hebrev)
4285 {
4286 char *str, *heb_str, *target;
4287 const char *tmp;
4288 size_t block_start, block_end, block_type, block_length, i;
4289 zend_long max_chars=0, char_count;
4290 size_t begin, end, orig_begin;
4291 size_t str_len;
4292 zend_string *broken_str;
4293
4294 ZEND_PARSE_PARAMETERS_START(1, 2)
4295 Z_PARAM_STRING(str, str_len)
4296 Z_PARAM_OPTIONAL
4297 Z_PARAM_LONG(max_chars)
4298 ZEND_PARSE_PARAMETERS_END();
4299
4300 if (str_len == 0) {
4301 RETURN_EMPTY_STRING();
4302 }
4303
4304 tmp = str;
4305 block_start=block_end=0;
4306
4307 heb_str = (char *) emalloc(str_len+1);
4308 target = heb_str+str_len;
4309 *target = 0;
4310 target--;
4311
4312 block_length=0;
4313
4314 if (isheb(*tmp)) {
4315 block_type = _HEB_BLOCK_TYPE_HEB;
4316 } else {
4317 block_type = _HEB_BLOCK_TYPE_ENG;
4318 }
4319
4320 do {
4321 if (block_type == _HEB_BLOCK_TYPE_HEB) {
4322 while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
4323 tmp++;
4324 block_end++;
4325 block_length++;
4326 }
4327 for (i = block_start+1; i<= block_end+1; i++) {
4328 *target = str[i-1];
4329 switch (*target) {
4330 case '(':
4331 *target = ')';
4332 break;
4333 case ')':
4334 *target = '(';
4335 break;
4336 case '[':
4337 *target = ']';
4338 break;
4339 case ']':
4340 *target = '[';
4341 break;
4342 case '{':
4343 *target = '}';
4344 break;
4345 case '}':
4346 *target = '{';
4347 break;
4348 case '<':
4349 *target = '>';
4350 break;
4351 case '>':
4352 *target = '<';
4353 break;
4354 case '\\':
4355 *target = '/';
4356 break;
4357 case '/':
4358 *target = '\\';
4359 break;
4360 default:
4361 break;
4362 }
4363 target--;
4364 }
4365 block_type = _HEB_BLOCK_TYPE_ENG;
4366 } else {
4367 while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
4368 tmp++;
4369 block_end++;
4370 block_length++;
4371 }
4372 while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
4373 tmp--;
4374 block_end--;
4375 }
4376 for (i = block_end+1; i >= block_start+1; i--) {
4377 *target = str[i-1];
4378 target--;
4379 }
4380 block_type = _HEB_BLOCK_TYPE_HEB;
4381 }
4382 block_start=block_end+1;
4383 } while (block_end < str_len-1);
4384
4385
4386 broken_str = zend_string_alloc(str_len, 0);
4387 begin = end = str_len-1;
4388 target = ZSTR_VAL(broken_str);
4389
4390 while (1) {
4391 char_count=0;
4392 while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
4393 char_count++;
4394 begin--;
4395 if (_isnewline(heb_str[begin])) {
4396 while (begin > 0 && _isnewline(heb_str[begin-1])) {
4397 begin--;
4398 char_count++;
4399 }
4400 break;
4401 }
4402 }
4403 if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
4404 size_t new_char_count=char_count, new_begin=begin;
4405
4406 while (new_char_count > 0) {
4407 if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
4408 break;
4409 }
4410 new_begin++;
4411 new_char_count--;
4412 }
4413 if (new_char_count > 0) {
4414 begin=new_begin;
4415 }
4416 }
4417 orig_begin=begin;
4418
4419 if (_isblank(heb_str[begin])) {
4420 heb_str[begin]='\n';
4421 }
4422 while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
4423 begin++;
4424 }
4425 for (i = begin; i <= end; i++) { /* copy content */
4426 *target = heb_str[i];
4427 target++;
4428 }
4429 for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
4430 *target = heb_str[i];
4431 target++;
4432 }
4433 begin=orig_begin;
4434
4435 if (begin == 0) {
4436 *target = 0;
4437 break;
4438 }
4439 begin--;
4440 end=begin;
4441 }
4442 efree(heb_str);
4443
4444 RETURN_NEW_STR(broken_str);
4445 }
4446 /* }}} */
4447
4448 /* {{{ Converts newlines to HTML line breaks */
4449 PHP_FUNCTION(nl2br)
4450 {
4451 /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
4452 const char *tmp, *end;
4453 zend_string *str;
4454 char *target;
4455 size_t repl_cnt = 0;
4456 zend_bool is_xhtml = 1;
4457 zend_string *result;
4458
4459 ZEND_PARSE_PARAMETERS_START(1, 2)
4460 Z_PARAM_STR(str)
4461 Z_PARAM_OPTIONAL
4462 Z_PARAM_BOOL(is_xhtml)
4463 ZEND_PARSE_PARAMETERS_END();
4464
4465 tmp = ZSTR_VAL(str);
4466 end = ZSTR_VAL(str) + ZSTR_LEN(str);
4467
4468 /* it is really faster to scan twice and allocate mem once instead of scanning once
4469 and constantly reallocing */
4470 while (tmp < end) {
4471 if (*tmp == '\r') {
4472 if (*(tmp+1) == '\n') {
4473 tmp++;
4474 }
4475 repl_cnt++;
4476 } else if (*tmp == '\n') {
4477 if (*(tmp+1) == '\r') {
4478 tmp++;
4479 }
4480 repl_cnt++;
4481 }
4482
4483 tmp++;
4484 }
4485
4486 if (repl_cnt == 0) {
4487 RETURN_STR_COPY(str);
4488 }
4489
4490 {
4491 size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
4492
4493 result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
4494 target = ZSTR_VAL(result);
4495 }
4496
4497 tmp = ZSTR_VAL(str);
4498 while (tmp < end) {
4499 switch (*tmp) {
4500 case '\r':
4501 case '\n':
4502 *target++ = '<';
4503 *target++ = 'b';
4504 *target++ = 'r';
4505
4506 if (is_xhtml) {
4507 *target++ = ' ';
4508 *target++ = '/';
4509 }
4510
4511 *target++ = '>';
4512
4513 if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
4514 *target++ = *tmp++;
4515 }
4516 /* lack of a break; is intentional */
4517 default:
4518 *target++ = *tmp;
4519 }
4520
4521 tmp++;
4522 }
4523
4524 *target = '\0';
4525
4526 RETURN_NEW_STR(result);
4527 }
4528 /* }}} */
4529
4530 /* {{{ Strips HTML and PHP tags from a string */
4531 PHP_FUNCTION(strip_tags)
4532 {
4533 zend_string *buf;
4534 zend_string *str;
4535 zend_string *allow_str = NULL;
4536 HashTable *allow_ht = NULL;
4537 const char *allowed_tags=NULL;
4538 size_t allowed_tags_len=0;
4539 smart_str tags_ss = {0};
4540
4541 ZEND_PARSE_PARAMETERS_START(1, 2)
4542 Z_PARAM_STR(str)
4543 Z_PARAM_OPTIONAL
4544 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
4545 ZEND_PARSE_PARAMETERS_END();
4546
4547 if (allow_ht) {
4548 zval *tmp;
4549 zend_string *tag;
4550
4551 ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
4552 tag = zval_get_string(tmp);
4553 smart_str_appendc(&tags_ss, '<');
4554 smart_str_append(&tags_ss, tag);
4555 smart_str_appendc(&tags_ss, '>');
4556 zend_string_release(tag);
4557 } ZEND_HASH_FOREACH_END();
4558 if (tags_ss.s) {
4559 smart_str_0(&tags_ss);
4560 allowed_tags = ZSTR_VAL(tags_ss.s);
4561 allowed_tags_len = ZSTR_LEN(tags_ss.s);
4562 }
4563 } else if (allow_str) {
4564 allowed_tags = ZSTR_VAL(allow_str);
4565 allowed_tags_len = ZSTR_LEN(allow_str);
4566 }
4567
4568 buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
4569 ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
4570 smart_str_free(&tags_ss);
4571 RETURN_NEW_STR(buf);
4572 }
4573 /* }}} */
4574
4575 static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
4576 const char *retval;
4577
4578 if (!strcmp("0", ZSTR_VAL(loc))) {
4579 loc = NULL;
4580 } else {
4581 if (ZSTR_LEN(loc) >= 255) {
4582 php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
4583 return NULL;
4584 }
4585 }
4586
4587 # ifndef PHP_WIN32
4588 retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
4589 # else
4590 if (loc) {
4591 /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
4592 char *locp = ZSTR_VAL(loc);
4593 if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
4594 && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
4595 && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
4596 && (locp[5] == '\0' || locp[5] == '.')
4597 && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
4598 && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
4599 && locp[5] == '\0')
4600 ) {
4601 retval = NULL;
4602 } else {
4603 retval = setlocale(cat, ZSTR_VAL(loc));
4604 }
4605 } else {
4606 retval = setlocale(cat, NULL);
4607 }
4608 # endif
4609 zend_update_current_locale();
4610 if (!retval) {
4611 return NULL;
4612 }
4613
4614 if (loc) {
4615 /* Remember if locale was changed */
4616 size_t len = strlen(retval);
4617
4618 BG(locale_changed) = 1;
4619 if (cat == LC_CTYPE || cat == LC_ALL) {
4620 if (BG(ctype_string)) {
4621 zend_string_release_ex(BG(ctype_string), 0);
4622 }
4623 if (len == 1 && *retval == 'C') {
4624 /* C locale is represented as NULL. */
4625 BG(ctype_string) = NULL;
4626 return ZSTR_CHAR('C');
4627 } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
4628 BG(ctype_string) = zend_string_copy(loc);
4629 return zend_string_copy(BG(ctype_string));
4630 } else {
4631 BG(ctype_string) = zend_string_init(retval, len, 0);
4632 return zend_string_copy(BG(ctype_string));
4633 }
4634 } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
4635 return zend_string_copy(loc);
4636 }
4637 }
4638 return zend_string_init(retval, strlen(retval), 0);
4639 }
4640
4641 static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
4642 zend_string *tmp_loc_str;
4643 zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
4644 if (UNEXPECTED(loc_str == NULL)) {
4645 return NULL;
4646 }
4647 zend_string *result = try_setlocale_str(cat, loc_str);
4648 zend_tmp_string_release(tmp_loc_str);
4649 return result;
4650 }
4651
4652 /* {{{ Set locale information */
4653 PHP_FUNCTION(setlocale)
4654 {
4655 zend_long cat;
4656 zval *args = NULL;
4657 int num_args;
4658
4659 ZEND_PARSE_PARAMETERS_START(2, -1)
4660 Z_PARAM_LONG(cat)
4661 Z_PARAM_VARIADIC('+', args, num_args)
4662 ZEND_PARSE_PARAMETERS_END();
4663
4664 for (uint32_t i = 0; i < num_args; i++) {
4665 if (Z_TYPE(args[i]) == IS_ARRAY) {
4666 zval *elem;
4667 ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(args[i]), elem) {
4668 zend_string *result = try_setlocale_zval(cat, elem);
4669 if (EG(exception)) {
4670 RETURN_THROWS();
4671 }
4672 if (result) {
4673 RETURN_STR(result);
4674 }
4675 } ZEND_HASH_FOREACH_END();
4676 } else {
4677 zend_string *result = try_setlocale_zval(cat, &args[i]);
4678 if (EG(exception)) {
4679 RETURN_THROWS();
4680 }
4681 if (result) {
4682 RETURN_STR(result);
4683 }
4684 }
4685 }
4686
4687 RETURN_FALSE;
4688 }
4689 /* }}} */
4690
4691 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
4692 PHP_FUNCTION(parse_str)
4693 {
4694 char *arg;
4695 zval *arrayArg = NULL;
4696 char *res = NULL;
4697 size_t arglen;
4698
4699 ZEND_PARSE_PARAMETERS_START(2, 2)
4700 Z_PARAM_STRING(arg, arglen)
4701 Z_PARAM_ZVAL(arrayArg)
4702 ZEND_PARSE_PARAMETERS_END();
4703
4704 arrayArg = zend_try_array_init(arrayArg);
4705 if (!arrayArg) {
4706 RETURN_THROWS();
4707 }
4708
4709 res = estrndup(arg, arglen);
4710 sapi_module.treat_data(PARSE_STRING, res, arrayArg);
4711 }
4712 /* }}} */
4713
4714 #define PHP_TAG_BUF_SIZE 1023
4715
4716 /* {{{ php_tag_find
4717 *
4718 * Check if tag is in a set of tags
4719 *
4720 * states:
4721 *
4722 * 0 start tag
4723 * 1 first non-whitespace char seen
4724 */
4725 int php_tag_find(char *tag, size_t len, const char *set) {
4726 char c, *n;
4727 const char *t;
4728 int state=0, done=0;
4729 char *norm;
4730
4731 if (len == 0) {
4732 return 0;
4733 }
4734
4735 norm = emalloc(len+1);
4736
4737 n = norm;
4738 t = tag;
4739 c = tolower(*t);
4740 /*
4741 normalize the tag removing leading and trailing whitespace
4742 and turn any <a whatever...> into just <a> and any </tag>
4743 into <tag>
4744 */
4745 while (!done) {
4746 switch (c) {
4747 case '<':
4748 *(n++) = c;
4749 break;
4750 case '>':
4751 done =1;
4752 break;
4753 default:
4754 if (!isspace((int)c)) {
4755 if (state == 0) {
4756 state=1;
4757 }
4758 if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
4759 *(n++) = c;
4760 }
4761 } else {
4762 if (state == 1)
4763 done=1;
4764 }
4765 break;
4766 }
4767 c = tolower(*(++t));
4768 }
4769 *(n++) = '>';
4770 *n = '\0';
4771 if (strstr(set, norm)) {
4772 done=1;
4773 } else {
4774 done=0;
4775 }
4776 efree(norm);
4777 return done;
4778 }
4779 /* }}} */
4780
4781 PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
4782 {
4783 return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
4784 }
4785 /* }}} */
4786
4787 /* {{{ php_strip_tags
4788
4789 A simple little state-machine to strip out html and php tags
4790
4791 State 0 is the output state, State 1 means we are inside a
4792 normal html tag and state 2 means we are inside a php tag.
4793
4794 The state variable is passed in to allow a function like fgetss
4795 to maintain state across calls to the function.
4796
4797 lc holds the last significant character read and br is a bracket
4798 counter.
4799
4800 When an allow string is passed in we keep track of the string
4801 in state 1 and when the tag is closed check it against the
4802 allow string to see if we should allow it.
4803
4804 swm: Added ability to strip <?xml tags without assuming it PHP
4805 code.
4806 */
4807 PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, zend_bool allow_tag_spaces)
4808 {
4809 char *tbuf, *tp, *rp, c, lc;
4810 const char *buf, *p, *end;
4811 int br, depth=0, in_q = 0;
4812 uint8_t state = 0;
4813 size_t pos;
4814 char *allow_free = NULL;
4815 char is_xml = 0;
4816
4817 buf = estrndup(rbuf, len);
4818 end = buf + len;
4819 lc = '\0';
4820 p = buf;
4821 rp = rbuf;
4822 br = 0;
4823 if (allow) {
4824 allow_free = zend_str_tolower_dup_ex(allow, allow_len);
4825 allow = allow_free ? allow_free : allow;
4826 tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
4827 tp = tbuf;
4828 } else {
4829 tbuf = tp = NULL;
4830 }
4831
4832 state_0:
4833 if (p >= end) {
4834 goto finish;
4835 }
4836 c = *p;
4837 switch (c) {
4838 case '\0':
4839 break;
4840 case '<':
4841 if (in_q) {
4842 break;
4843 }
4844 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4845 *(rp++) = c;
4846 break;
4847 }
4848 lc = '<';
4849 state = 1;
4850 if (allow) {
4851 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4852 pos = tp - tbuf;
4853 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4854 tp = tbuf + pos;
4855 }
4856 *(tp++) = '<';
4857 }
4858 p++;
4859 goto state_1;
4860 case '>':
4861 if (depth) {
4862 depth--;
4863 break;
4864 }
4865
4866 if (in_q) {
4867 break;
4868 }
4869
4870 *(rp++) = c;
4871 break;
4872 default:
4873 *(rp++) = c;
4874 break;
4875 }
4876 p++;
4877 goto state_0;
4878
4879 state_1:
4880 if (p >= end) {
4881 goto finish;
4882 }
4883 c = *p;
4884 switch (c) {
4885 case '\0':
4886 break;
4887 case '<':
4888 if (in_q) {
4889 break;
4890 }
4891 if (isspace(*(p + 1)) && !allow_tag_spaces) {
4892 goto reg_char_1;
4893 }
4894 depth++;
4895 break;
4896 case '>':
4897 if (depth) {
4898 depth--;
4899 break;
4900 }
4901 if (in_q) {
4902 break;
4903 }
4904
4905 lc = '>';
4906 if (is_xml && p >= buf + 1 && *(p -1) == '-') {
4907 break;
4908 }
4909 in_q = state = is_xml = 0;
4910 if (allow) {
4911 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4912 pos = tp - tbuf;
4913 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4914 tp = tbuf + pos;
4915 }
4916 *(tp++) = '>';
4917 *tp='\0';
4918 if (php_tag_find(tbuf, tp-tbuf, allow)) {
4919 memcpy(rp, tbuf, tp-tbuf);
4920 rp += tp-tbuf;
4921 }
4922 tp = tbuf;
4923 }
4924 p++;
4925 goto state_0;
4926 case '"':
4927 case '\'':
4928 if (p != buf && (!in_q || *p == in_q)) {
4929 if (in_q) {
4930 in_q = 0;
4931 } else {
4932 in_q = *p;
4933 }
4934 }
4935 goto reg_char_1;
4936 case '!':
4937 /* JavaScript & Other HTML scripting languages */
4938 if (p >= buf + 1 && *(p-1) == '<') {
4939 state = 3;
4940 lc = c;
4941 p++;
4942 goto state_3;
4943 } else {
4944 goto reg_char_1;
4945 }
4946 break;
4947 case '?':
4948 if (p >= buf + 1 && *(p-1) == '<') {
4949 br=0;
4950 state = 2;
4951 p++;
4952 goto state_2;
4953 } else {
4954 goto reg_char_1;
4955 }
4956 break;
4957 default:
4958 reg_char_1:
4959 if (allow) {
4960 if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
4961 pos = tp - tbuf;
4962 tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
4963 tp = tbuf + pos;
4964 }
4965 *(tp++) = c;
4966 }
4967 break;
4968 }
4969 p++;
4970 goto state_1;
4971
4972 state_2:
4973 if (p >= end) {
4974 goto finish;
4975 }
4976 c = *p;
4977 switch (c) {
4978 case '(':
4979 if (lc != '"' && lc != '\'') {
4980 lc = '(';
4981 br++;
4982 }
4983 break;
4984 case ')':
4985 if (lc != '"' && lc != '\'') {
4986 lc = ')';
4987 br--;
4988 }
4989 break;
4990 case '>':
4991 if (depth) {
4992 depth--;
4993 break;
4994 }
4995 if (in_q) {
4996 break;
4997 }
4998
4999 if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
5000 in_q = state = 0;
5001 tp = tbuf;
5002 p++;
5003 goto state_0;
5004 }
5005 break;
5006 case '"':
5007 case '\'':
5008 if (p >= buf + 1 && *(p-1) != '\\') {
5009 if (lc == c) {
5010 lc = '\0';
5011 } else if (lc != '\\') {
5012 lc = c;
5013 }
5014 if (p != buf && (!in_q || *p == in_q)) {
5015 if (in_q) {
5016 in_q = 0;
5017 } else {
5018 in_q = *p;
5019 }
5020 }
5021 }
5022 break;
5023 case 'l':
5024 case 'L':
5025 /* swm: If we encounter '<?xml' then we shouldn't be in
5026 * state == 2 (PHP). Switch back to HTML.
5027 */
5028 if (state == 2 && p > buf+4
5029 && (*(p-1) == 'm' || *(p-1) == 'M')
5030 && (*(p-2) == 'x' || *(p-2) == 'X')
5031 && *(p-3) == '?'
5032 && *(p-4) == '<') {
5033 state = 1; is_xml=1;
5034 p++;
5035 goto state_1;
5036 }
5037 break;
5038 default:
5039 break;
5040 }
5041 p++;
5042 goto state_2;
5043
5044 state_3:
5045 if (p >= end) {
5046 goto finish;
5047 }
5048 c = *p;
5049 switch (c) {
5050 case '>':
5051 if (depth) {
5052 depth--;
5053 break;
5054 }
5055 if (in_q) {
5056 break;
5057 }
5058 in_q = state = 0;
5059 tp = tbuf;
5060 p++;
5061 goto state_0;
5062 case '"':
5063 case '\'':
5064 if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
5065 if (in_q) {
5066 in_q = 0;
5067 } else {
5068 in_q = *p;
5069 }
5070 }
5071 break;
5072 case '-':
5073 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
5074 state = 4;
5075 p++;
5076 goto state_4;
5077 }
5078 break;
5079 case 'E':
5080 case 'e':
5081 /* !DOCTYPE exception */
5082 if (p > buf+6
5083 && (*(p-1) == 'p' || *(p-1) == 'P')
5084 && (*(p-2) == 'y' || *(p-2) == 'Y')
5085 && (*(p-3) == 't' || *(p-3) == 'T')
5086 && (*(p-4) == 'c' || *(p-4) == 'C')
5087 && (*(p-5) == 'o' || *(p-5) == 'O')
5088 && (*(p-6) == 'd' || *(p-6) == 'D')) {
5089 state = 1;
5090 p++;
5091 goto state_1;
5092 }
5093 break;
5094 default:
5095 break;
5096 }
5097 p++;
5098 goto state_3;
5099
5100 state_4:
5101 while (p < end) {
5102 c = *p;
5103 if (c == '>' && !in_q) {
5104 if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
5105 in_q = state = 0;
5106 tp = tbuf;
5107 p++;
5108 goto state_0;
5109 }
5110 }
5111 p++;
5112 }
5113
5114 finish:
5115 if (rp < rbuf + len) {
5116 *rp = '\0';
5117 }
5118 efree((void *)buf);
5119 if (tbuf) {
5120 efree(tbuf);
5121 }
5122 if (allow_free) {
5123 efree(allow_free);
5124 }
5125
5126 return (size_t)(rp - rbuf);
5127 }
5128 /* }}} */
5129
5130 /* {{{ Parse a CSV string into an array */
5131 PHP_FUNCTION(str_getcsv)
5132 {
5133 zend_string *str;
5134 char delim = ',', enc = '"';
5135 int esc = (unsigned char) '\\';
5136 char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
5137 size_t delim_len = 0, enc_len = 0, esc_len = 0;
5138
5139 ZEND_PARSE_PARAMETERS_START(1, 4)
5140 Z_PARAM_STR(str)
5141 Z_PARAM_OPTIONAL
5142 Z_PARAM_STRING(delim_str, delim_len)
5143 Z_PARAM_STRING(enc_str, enc_len)
5144 Z_PARAM_STRING(esc_str, esc_len)
5145 ZEND_PARSE_PARAMETERS_END();
5146
5147 delim = delim_len ? delim_str[0] : delim;
5148 enc = enc_len ? enc_str[0] : enc;
5149 if (esc_str != NULL) {
5150 esc = esc_len ? (unsigned char) esc_str[0] : PHP_CSV_NO_ESCAPE;
5151 }
5152
5153 php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str), return_value);
5154 }
5155 /* }}} */
5156
5157 /* {{{ Returns the input string repeat mult times */
5158 PHP_FUNCTION(str_repeat)
5159 {
5160 zend_string *input_str; /* Input string */
5161 zend_long mult; /* Multiplier */
5162 zend_string *result; /* Resulting string */
5163 size_t result_len; /* Length of the resulting string */
5164
5165 ZEND_PARSE_PARAMETERS_START(2, 2)
5166 Z_PARAM_STR(input_str)
5167 Z_PARAM_LONG(mult)
5168 ZEND_PARSE_PARAMETERS_END();
5169
5170 if (mult < 0) {
5171 zend_argument_value_error(2, "must be greater than or equal to 0");
5172 RETURN_THROWS();
5173 }
5174
5175 /* Don't waste our time if it's empty */
5176 /* ... or if the multiplier is zero */
5177 if (ZSTR_LEN(input_str) == 0 || mult == 0)
5178 RETURN_EMPTY_STRING();
5179
5180 /* Initialize the result string */
5181 result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
5182 result_len = ZSTR_LEN(input_str) * mult;
5183
5184 /* Heavy optimization for situations where input string is 1 byte long */
5185 if (ZSTR_LEN(input_str) == 1) {
5186 memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
5187 } else {
5188 const char *s, *ee;
5189 char *e;
5190 ptrdiff_t l=0;
5191 memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
5192 s = ZSTR_VAL(result);
5193 e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
5194 ee = ZSTR_VAL(result) + result_len;
5195
5196 while (e<ee) {
5197 l = (e-s) < (ee-e) ? (e-s) : (ee-e);
5198 memmove(e, s, l);
5199 e += l;
5200 }
5201 }
5202
5203 ZSTR_VAL(result)[result_len] = '\0';
5204
5205 RETURN_NEW_STR(result);
5206 }
5207 /* }}} */
5208
5209 /* {{{ Returns info about what characters are used in input */
5210 PHP_FUNCTION(count_chars)
5211 {
5212 zend_string *input;
5213 int chars[256];
5214 zend_long mymode=0;
5215 const unsigned char *buf;
5216 int inx;
5217 char retstr[256];
5218 size_t retlen=0;
5219 size_t tmp = 0;
5220
5221 ZEND_PARSE_PARAMETERS_START(1, 2)
5222 Z_PARAM_STR(input)
5223 Z_PARAM_OPTIONAL
5224 Z_PARAM_LONG(mymode)
5225 ZEND_PARSE_PARAMETERS_END();
5226
5227 if (mymode < 0 || mymode > 4) {
5228 zend_argument_value_error(2, "must be between 1 and 4 (inclusive)");
5229 RETURN_THROWS();
5230 }
5231
5232 buf = (const unsigned char *) ZSTR_VAL(input);
5233 memset((void*) chars, 0, sizeof(chars));
5234
5235 while (tmp < ZSTR_LEN(input)) {
5236 chars[*buf]++;
5237 buf++;
5238 tmp++;
5239 }
5240
5241 if (mymode < 3) {
5242 array_init(return_value);
5243 }
5244
5245 for (inx = 0; inx < 256; inx++) {
5246 switch (mymode) {
5247 case 0:
5248 add_index_long(return_value, inx, chars[inx]);
5249 break;
5250 case 1:
5251 if (chars[inx] != 0) {
5252 add_index_long(return_value, inx, chars[inx]);
5253 }
5254 break;
5255 case 2:
5256 if (chars[inx] == 0) {
5257 add_index_long(return_value, inx, chars[inx]);
5258 }
5259 break;
5260 case 3:
5261 if (chars[inx] != 0) {
5262 retstr[retlen++] = inx;
5263 }
5264 break;
5265 case 4:
5266 if (chars[inx] == 0) {
5267 retstr[retlen++] = inx;
5268 }
5269 break;
5270 }
5271 }
5272
5273 if (mymode >= 3 && mymode <= 4) {
5274 RETURN_STRINGL(retstr, retlen);
5275 }
5276 }
5277 /* }}} */
5278
5279 /* {{{ php_strnatcmp */
5280 static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
5281 {
5282 zend_string *s1, *s2;
5283
5284 ZEND_PARSE_PARAMETERS_START(2, 2)
5285 Z_PARAM_STR(s1)
5286 Z_PARAM_STR(s2)
5287 ZEND_PARSE_PARAMETERS_END();
5288
5289 RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
5290 ZSTR_VAL(s2), ZSTR_LEN(s2),
5291 fold_case));
5292 }
5293 /* }}} */
5294
5295 PHPAPI int string_natural_compare_function_ex(zval *result, zval *op1, zval *op2, zend_bool case_insensitive) /* {{{ */
5296 {
5297 zend_string *tmp_str1, *tmp_str2;
5298 zend_string *str1 = zval_get_tmp_string(op1, &tmp_str1);
5299 zend_string *str2 = zval_get_tmp_string(op2, &tmp_str2);
5300
5301 ZVAL_LONG(result, strnatcmp_ex(ZSTR_VAL(str1), ZSTR_LEN(str1), ZSTR_VAL(str2), ZSTR_LEN(str2), case_insensitive));
5302
5303 zend_tmp_string_release(tmp_str1);
5304 zend_tmp_string_release(tmp_str2);
5305 return SUCCESS;
5306 }
5307 /* }}} */
5308
5309 PHPAPI int string_natural_case_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
5310 {
5311 return string_natural_compare_function_ex(result, op1, op2, 1);
5312 }
5313 /* }}} */
5314
5315 PHPAPI int string_natural_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
5316 {
5317 return string_natural_compare_function_ex(result, op1, op2, 0);
5318 }
5319 /* }}} */
5320
5321 /* {{{ Returns the result of string comparison using 'natural' algorithm */
5322 PHP_FUNCTION(strnatcmp)
5323 {
5324 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
5325 }
5326 /* }}} */
5327
5328 /* {{{ Returns numeric formatting information based on the current locale */
5329 PHP_FUNCTION(localeconv)
5330 {
5331 zval grouping, mon_grouping;
5332 int len, i;
5333
5334 ZEND_PARSE_PARAMETERS_NONE();
5335
5336 array_init(return_value);
5337 array_init(&grouping);
5338 array_init(&mon_grouping);
5339
5340 {
5341 struct lconv currlocdata;
5342
5343 localeconv_r( &currlocdata );
5344
5345 /* Grab the grouping data out of the array */
5346 len = (int)strlen(currlocdata.grouping);
5347
5348 for (i = 0; i < len; i++) {
5349 add_index_long(&grouping, i, currlocdata.grouping[i]);
5350 }
5351
5352 /* Grab the monetary grouping data out of the array */
5353 len = (int)strlen(currlocdata.mon_grouping);
5354
5355 for (i = 0; i < len; i++) {
5356 add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
5357 }
5358
5359 add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
5360 add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
5361 add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
5362 add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
5363 add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
5364 add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
5365 add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
5366 add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
5367 add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
5368 add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
5369 add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
5370 add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
5371 add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
5372 add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
5373 add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
5374 add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
5375 }
5376
5377 zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
5378 zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
5379 }
5380 /* }}} */
5381
5382 /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5383 PHP_FUNCTION(strnatcasecmp)
5384 {
5385 php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5386 }
5387 /* }}} */
5388
5389 /* {{{ Returns the number of times a substring occurs in the string */
5390 PHP_FUNCTION(substr_count)
5391 {
5392 char *haystack, *needle;
5393 zend_long offset = 0, length = 0;
5394 zend_bool length_is_null = 1;
5395 zend_long count = 0;
5396 size_t haystack_len, needle_len;
5397 const char *p, *endp;
5398 char cmp;
5399
5400 ZEND_PARSE_PARAMETERS_START(2, 4)
5401 Z_PARAM_STRING(haystack, haystack_len)
5402 Z_PARAM_STRING(needle, needle_len)
5403 Z_PARAM_OPTIONAL
5404 Z_PARAM_LONG(offset)
5405 Z_PARAM_LONG_OR_NULL(length, length_is_null)
5406 ZEND_PARSE_PARAMETERS_END();
5407
5408 if (needle_len == 0) {
5409 zend_argument_value_error(2, "cannot be empty");
5410 RETURN_THROWS();
5411 }
5412
5413 p = haystack;
5414 endp = p + haystack_len;
5415
5416 if (offset < 0) {
5417 offset += (zend_long)haystack_len;
5418 }
5419 if ((offset < 0) || ((size_t)offset > haystack_len)) {
5420 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
5421 RETURN_THROWS();
5422 }
5423 p += offset;
5424
5425 if (!length_is_null) {
5426
5427 if (length < 0) {
5428 length += (haystack_len - offset);
5429 }
5430 if (length < 0 || ((size_t)length > (haystack_len - offset))) {
5431 zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
5432 RETURN_THROWS();
5433 }
5434 endp = p + length;
5435 }
5436
5437 if (needle_len == 1) {
5438 cmp = needle[0];
5439
5440 while ((p = memchr(p, cmp, endp - p))) {
5441 count++;
5442 p++;
5443 }
5444 } else {
5445 while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
5446 p += needle_len;
5447 count++;
5448 }
5449 }
5450
5451 RETURN_LONG(count);
5452 }
5453 /* }}} */
5454
5455 /* {{{ Returns input string padded on the left or right to specified length with pad_string */
5456 PHP_FUNCTION(str_pad)
5457 {
5458 /* Input arguments */
5459 zend_string *input; /* Input string */
5460 zend_long pad_length; /* Length to pad to */
5461
5462 /* Helper variables */
5463 size_t num_pad_chars; /* Number of padding characters (total - input size) */
5464 char *pad_str = " "; /* Pointer to padding string */
5465 size_t pad_str_len = 1;
5466 zend_long pad_type_val = STR_PAD_RIGHT; /* The padding type value */
5467 size_t i, left_pad=0, right_pad=0;
5468 zend_string *result = NULL; /* Resulting string */
5469
5470 ZEND_PARSE_PARAMETERS_START(2, 4)
5471 Z_PARAM_STR(input)
5472 Z_PARAM_LONG(pad_length)
5473 Z_PARAM_OPTIONAL
5474 Z_PARAM_STRING(pad_str, pad_str_len)
5475 Z_PARAM_LONG(pad_type_val)
5476 ZEND_PARSE_PARAMETERS_END();
5477
5478 /* If resulting string turns out to be shorter than input string,
5479 we simply copy the input and return. */
5480 if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
5481 RETURN_STR_COPY(input);
5482 }
5483
5484 if (pad_str_len == 0) {
5485 zend_argument_value_error(3, "must be a non-empty string");
5486 RETURN_THROWS();
5487 }
5488
5489 if (pad_type_val < STR_PAD_LEFT || pad_type_val > STR_PAD_BOTH) {
5490 zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
5491 RETURN_THROWS();
5492 }
5493
5494 num_pad_chars = pad_length - ZSTR_LEN(input);
5495 result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
5496 ZSTR_LEN(result) = 0;
5497
5498 /* We need to figure out the left/right padding lengths. */
5499 switch (pad_type_val) {
5500 case STR_PAD_RIGHT:
5501 left_pad = 0;
5502 right_pad = num_pad_chars;
5503 break;
5504
5505 case STR_PAD_LEFT:
5506 left_pad = num_pad_chars;
5507 right_pad = 0;
5508 break;
5509
5510 case STR_PAD_BOTH:
5511 left_pad = num_pad_chars / 2;
5512 right_pad = num_pad_chars - left_pad;
5513 break;
5514 }
5515
5516 /* First we pad on the left. */
5517 for (i = 0; i < left_pad; i++)
5518 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5519
5520 /* Then we copy the input string. */
5521 memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
5522 ZSTR_LEN(result) += ZSTR_LEN(input);
5523
5524 /* Finally, we pad on the right. */
5525 for (i = 0; i < right_pad; i++)
5526 ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
5527
5528 ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
5529
5530 RETURN_NEW_STR(result);
5531 }
5532 /* }}} */
5533
5534 /* {{{ Implements an ANSI C compatible sscanf */
5535 PHP_FUNCTION(sscanf)
5536 {
5537 zval *args = NULL;
5538 char *str, *format;
5539 size_t str_len, format_len;
5540 int result, num_args = 0;
5541
5542 ZEND_PARSE_PARAMETERS_START(2, -1)
5543 Z_PARAM_STRING(str, str_len)
5544 Z_PARAM_STRING(format, format_len)
5545 Z_PARAM_VARIADIC('*', args, num_args)
5546 ZEND_PARSE_PARAMETERS_END();
5547
5548 result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
5549
5550 if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
5551 WRONG_PARAM_COUNT;
5552 }
5553 }
5554 /* }}} */
5555
5556 /* static zend_string *php_str_rot13(zend_string *str) {{{ */
5557 #ifdef __SSE2__
5558 #include <emmintrin.h>
5559 #endif
5560 static zend_string *php_str_rot13(zend_string *str)
5561 {
5562 zend_string *ret;
5563 const char *p, *e;
5564 char *target;
5565
5566 if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
5567 return ZSTR_EMPTY_ALLOC();
5568 }
5569
5570 ret = zend_string_alloc(ZSTR_LEN(str), 0);
5571
5572 p = ZSTR_VAL(str);
5573 e = p + ZSTR_LEN(str);
5574 target = ZSTR_VAL(ret);
5575
5576 #ifdef __SSE2__
5577 if (e - p > 15) {
5578 const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
5579 const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
5580 const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
5581 const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
5582 const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
5583 const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
5584 const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
5585 const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
5586 const __m128i add = _mm_set1_epi8(13);
5587 const __m128i sub = _mm_set1_epi8(-13);
5588
5589 do {
5590 __m128i in, gt, lt, cmp, delta;
5591
5592 delta = _mm_setzero_si128();
5593 in = _mm_loadu_si128((__m128i *)p);
5594
5595 gt = _mm_cmpgt_epi8(in, a_minus_1);
5596 lt = _mm_cmplt_epi8(in, m_plus_1);
5597 cmp = _mm_and_si128(lt, gt);
5598 if (_mm_movemask_epi8(cmp)) {
5599 cmp = _mm_and_si128(cmp, add);
5600 delta = _mm_or_si128(delta, cmp);
5601 }
5602
5603 gt = _mm_cmpgt_epi8(in, n_minus_1);
5604 lt = _mm_cmplt_epi8(in, z_plus_1);
5605 cmp = _mm_and_si128(lt, gt);
5606 if (_mm_movemask_epi8(cmp)) {
5607 cmp = _mm_and_si128(cmp, sub);
5608 delta = _mm_or_si128(delta, cmp);
5609 }
5610
5611 gt = _mm_cmpgt_epi8(in, A_minus_1);
5612 lt = _mm_cmplt_epi8(in, M_plus_1);
5613 cmp = _mm_and_si128(lt, gt);
5614 if (_mm_movemask_epi8(cmp)) {
5615 cmp = _mm_and_si128(cmp, add);
5616 delta = _mm_or_si128(delta, cmp);
5617 }
5618
5619 gt = _mm_cmpgt_epi8(in, N_minus_1);
5620 lt = _mm_cmplt_epi8(in, Z_plus_1);
5621 cmp = _mm_and_si128(lt, gt);
5622 if (_mm_movemask_epi8(cmp)) {
5623 cmp = _mm_and_si128(cmp, sub);
5624 delta = _mm_or_si128(delta, cmp);
5625 }
5626
5627 in = _mm_add_epi8(in, delta);
5628 _mm_storeu_si128((__m128i *)target, in);
5629
5630 p += 16;
5631 target += 16;
5632 } while (e - p > 15);
5633 }
5634 #endif
5635
5636 while (p < e) {
5637 if (*p >= 'a' && *p <= 'z') {
5638 *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
5639 } else if (*p >= 'A' && *p <= 'Z') {
5640 *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
5641 } else {
5642 *target++ = *p++;
5643 }
5644 }
5645
5646 *target = '\0';
5647
5648 return ret;
5649 }
5650 /* }}} */
5651
5652 /* {{{ Perform the rot13 transform on a string */
5653 PHP_FUNCTION(str_rot13)
5654 {
5655 zend_string *arg;
5656
5657 ZEND_PARSE_PARAMETERS_START(1, 1)
5658 Z_PARAM_STR(arg)
5659 ZEND_PARSE_PARAMETERS_END();
5660
5661 RETURN_STR(php_str_rot13(arg));
5662 }
5663 /* }}} */
5664
5665 static void php_string_shuffle(char *str, zend_long len) /* {{{ */
5666 {
5667 zend_long n_elems, rnd_idx, n_left;
5668 char temp;
5669 /* The implementation is stolen from array_data_shuffle */
5670 /* Thus the characteristics of the randomization are the same */
5671 n_elems = len;
5672
5673 if (n_elems <= 1) {
5674 return;
5675 }
5676
5677 n_left = n_elems;
5678
5679 while (--n_left) {
5680 rnd_idx = php_mt_rand_range(0, n_left);
5681 if (rnd_idx != n_left) {
5682 temp = str[n_left];
5683 str[n_left] = str[rnd_idx];
5684 str[rnd_idx] = temp;
5685 }
5686 }
5687 }
5688 /* }}} */
5689
5690 /* {{{ Shuffles string. One permutation of all possible is created */
5691 PHP_FUNCTION(str_shuffle)
5692 {
5693 zend_string *arg;
5694
5695 ZEND_PARSE_PARAMETERS_START(1, 1)
5696 Z_PARAM_STR(arg)
5697 ZEND_PARSE_PARAMETERS_END();
5698
5699 RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
5700 if (Z_STRLEN_P(return_value) > 1) {
5701 php_string_shuffle(Z_STRVAL_P(return_value), (zend_long) Z_STRLEN_P(return_value));
5702 }
5703 }
5704 /* }}} */
5705
5706 /* {{{ Counts the number of words inside a string. If format of 1 is specified,
5707 then the function will return an array containing all the words
5708 found inside the string. If format of 2 is specified, then the function
5709 will return an associated array where the position of the word is the key
5710 and the word itself is the value.
5711 For the purpose of this function, 'word' is defined as a locale dependent
5712 string containing alphabetic characters, which also may contain, but not start
5713 with "'" and "-" characters.
5714 */
5715 PHP_FUNCTION(str_word_count)
5716 {
5717 zend_string *str;
5718 char *char_list = NULL, ch[256];
5719 const char *p, *e, *s;
5720 size_t char_list_len = 0, word_count = 0;
5721 zend_long type = 0;
5722
5723 ZEND_PARSE_PARAMETERS_START(1, 3)
5724 Z_PARAM_STR(str)
5725 Z_PARAM_OPTIONAL
5726 Z_PARAM_LONG(type)
5727 Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
5728 ZEND_PARSE_PARAMETERS_END();
5729
5730 switch(type) {
5731 case 1:
5732 case 2:
5733 array_init(return_value);
5734 if (!ZSTR_LEN(str)) {
5735 return;
5736 }
5737 break;
5738 case 0:
5739 if (!ZSTR_LEN(str)) {
5740 RETURN_LONG(0);
5741 }
5742 /* nothing to be done */
5743 break;
5744 default:
5745 zend_argument_value_error(2, "must be a valid format value");
5746 RETURN_THROWS();
5747 }
5748
5749 if (char_list) {
5750 php_charmask((const unsigned char *) char_list, char_list_len, ch);
5751 }
5752
5753 p = ZSTR_VAL(str);
5754 e = ZSTR_VAL(str) + ZSTR_LEN(str);
5755
5756 /* first character cannot be ' or -, unless explicitly allowed by the user */
5757 if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
5758 p++;
5759 }
5760 /* last character cannot be -, unless explicitly allowed by the user */
5761 if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
5762 e--;
5763 }
5764
5765 while (p < e) {
5766 s = p;
5767 while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
5768 p++;
5769 }
5770 if (p > s) {
5771 switch (type)
5772 {
5773 case 1:
5774 add_next_index_stringl(return_value, s, p - s);
5775 break;
5776 case 2:
5777 add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
5778 break;
5779 default:
5780 word_count++;
5781 break;
5782 }
5783 }
5784 p++;
5785 }
5786
5787 if (!type) {
5788 RETURN_LONG(word_count);
5789 }
5790 }
5791
5792 /* }}} */
5793
5794 /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
5795 PHP_FUNCTION(str_split)
5796 {
5797 zend_string *str;
5798 zend_long split_length = 1;
5799 const char *p;
5800 size_t n_reg_segments;
5801
5802 ZEND_PARSE_PARAMETERS_START(1, 2)
5803 Z_PARAM_STR(str)
5804 Z_PARAM_OPTIONAL
5805 Z_PARAM_LONG(split_length)
5806 ZEND_PARSE_PARAMETERS_END();
5807
5808 if (split_length <= 0) {
5809 zend_argument_value_error(2, "must be greater than 0");
5810 RETURN_THROWS();
5811 }
5812
5813 if (0 == ZSTR_LEN(str) || (size_t)split_length >= ZSTR_LEN(str)) {
5814 array_init_size(return_value, 1);
5815 add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
5816 return;
5817 }
5818
5819 array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
5820
5821 n_reg_segments = ZSTR_LEN(str) / split_length;
5822 p = ZSTR_VAL(str);
5823
5824 while (n_reg_segments-- > 0) {
5825 add_next_index_stringl(return_value, p, split_length);
5826 p += split_length;
5827 }
5828
5829 if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
5830 add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
5831 }
5832 }
5833 /* }}} */
5834
5835 /* {{{ Search a string for any of a set of characters */
5836 PHP_FUNCTION(strpbrk)
5837 {
5838 zend_string *haystack, *char_list;
5839 const char *haystack_ptr, *cl_ptr;
5840
5841 ZEND_PARSE_PARAMETERS_START(2, 2)
5842 Z_PARAM_STR(haystack)
5843 Z_PARAM_STR(char_list)
5844 ZEND_PARSE_PARAMETERS_END();
5845
5846 if (!ZSTR_LEN(char_list)) {
5847 zend_argument_value_error(2, "must be a non-empty string");
5848 RETURN_THROWS();
5849 }
5850
5851 for (haystack_ptr = ZSTR_VAL(haystack); haystack_ptr < (ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); ++haystack_ptr) {
5852 for (cl_ptr = ZSTR_VAL(char_list); cl_ptr < (ZSTR_VAL(char_list) + ZSTR_LEN(char_list)); ++cl_ptr) {
5853 if (*cl_ptr == *haystack_ptr) {
5854 RETURN_STRINGL(haystack_ptr, (ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - haystack_ptr));
5855 }
5856 }
5857 }
5858
5859 RETURN_FALSE;
5860 }
5861 /* }}} */
5862
5863 /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
5864 PHP_FUNCTION(substr_compare)
5865 {
5866 zend_string *s1, *s2;
5867 zend_long offset, len=0;
5868 zend_bool len_is_default=1;
5869 zend_bool cs=0;
5870 size_t cmp_len;
5871
5872 ZEND_PARSE_PARAMETERS_START(3, 5)
5873 Z_PARAM_STR(s1)
5874 Z_PARAM_STR(s2)
5875 Z_PARAM_LONG(offset)
5876 Z_PARAM_OPTIONAL
5877 Z_PARAM_LONG_OR_NULL(len, len_is_default)
5878 Z_PARAM_BOOL(cs)
5879 ZEND_PARSE_PARAMETERS_END();
5880
5881 if (!len_is_default && len <= 0) {
5882 if (len == 0) {
5883 RETURN_LONG(0L);
5884 } else {
5885 zend_argument_value_error(4, "must be greater than or equal to 0");
5886 RETURN_THROWS();
5887 }
5888 }
5889
5890 if (offset < 0) {
5891 offset = ZSTR_LEN(s1) + offset;
5892 offset = (offset < 0) ? 0 : offset;
5893 }
5894
5895 if ((size_t)offset > ZSTR_LEN(s1)) {
5896 zend_argument_value_error(3, "must be contained in argument #1 ($main_str)");
5897 RETURN_THROWS();
5898 }
5899
5900 cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
5901
5902 if (!cs) {
5903 RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5904 } else {
5905 RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
5906 }
5907 }
5908 /* }}} */
5909
5910 /* {{{ */
5911 static zend_string *php_utf8_encode(const char *s, size_t len)
5912 {
5913 size_t pos = len;
5914 zend_string *str;
5915 unsigned char c;
5916
5917 str = zend_string_safe_alloc(len, 2, 0, 0);
5918 ZSTR_LEN(str) = 0;
5919 while (pos > 0) {
5920 /* The lower 256 codepoints of Unicode are identical to Latin-1,
5921 * so we don't need to do any mapping here. */
5922 c = (unsigned char)(*s);
5923 if (c < 0x80) {
5924 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
5925 /* We only account for the single-byte and two-byte cases because
5926 * we're only dealing with the first 256 Unicode codepoints. */
5927 } else {
5928 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
5929 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
5930 }
5931 pos--;
5932 s++;
5933 }
5934 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
5935 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
5936 return str;
5937 }
5938 /* }}} */
5939
5940 /* {{{ */
5941 static zend_string *php_utf8_decode(const char *s, size_t len)
5942 {
5943 size_t pos = 0;
5944 unsigned int c;
5945 zend_string *str;
5946
5947 str = zend_string_alloc(len, 0);
5948 ZSTR_LEN(str) = 0;
5949 while (pos < len) {
5950 int status = FAILURE;
5951 c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
5952
5953 /* The lower 256 codepoints of Unicode are identical to Latin-1,
5954 * so we don't need to do any mapping here beyond replacing non-Latin-1
5955 * characters. */
5956 if (status == FAILURE || c > 0xFFU) {
5957 c = '?';
5958 }
5959
5960 ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
5961 }
5962 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
5963 if (ZSTR_LEN(str) < len) {
5964 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
5965 }
5966
5967 return str;
5968 }
5969 /* }}} */
5970
5971 /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
5972 PHP_FUNCTION(utf8_encode)
5973 {
5974 char *arg;
5975 size_t arg_len;
5976
5977 ZEND_PARSE_PARAMETERS_START(1, 1)
5978 Z_PARAM_STRING(arg, arg_len)
5979 ZEND_PARSE_PARAMETERS_END();
5980
5981 RETURN_STR(php_utf8_encode(arg, arg_len));
5982 }
5983 /* }}} */
5984
5985 /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
5986 PHP_FUNCTION(utf8_decode)
5987 {
5988 char *arg;
5989 size_t arg_len;
5990
5991 ZEND_PARSE_PARAMETERS_START(1, 1)
5992 Z_PARAM_STRING(arg, arg_len)
5993 ZEND_PARSE_PARAMETERS_END();
5994
5995 RETURN_STR(php_utf8_decode(arg, arg_len));
5996 }
5997 /* }}} */
5998