1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2016 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Rasmus Lerdorf <rasmus@php.net> |
16 | Jim Winstead <jimw@php.net> |
17 | Jaakko Hyv�tti <jaakko@hyvatti.iki.fi> |
18 +----------------------------------------------------------------------+
19 */
20 /* $Id$ */
21
22 #include <stdio.h>
23 #include <ctype.h>
24 #include "php.h"
25 #include "ext/standard/php_string.h"
26 #include "php_ereg.h"
27 #include "ext/standard/info.h"
28
29 /* {{{ arginfo */
30 ZEND_BEGIN_ARG_INFO_EX(arginfo_ereg, 0, 0, 2)
31 ZEND_ARG_INFO(0, pattern)
32 ZEND_ARG_INFO(0, string)
33 ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */
34 ZEND_END_ARG_INFO()
35
36 ZEND_BEGIN_ARG_INFO(arginfo_ereg_replace, 0)
37 ZEND_ARG_INFO(0, pattern)
38 ZEND_ARG_INFO(0, replacement)
39 ZEND_ARG_INFO(0, string)
40 ZEND_END_ARG_INFO()
41
42 ZEND_BEGIN_ARG_INFO_EX(arginfo_split, 0, 0, 2)
43 ZEND_ARG_INFO(0, pattern)
44 ZEND_ARG_INFO(0, string)
45 ZEND_ARG_INFO(0, limit)
46 ZEND_END_ARG_INFO()
47
48 ZEND_BEGIN_ARG_INFO(arginfo_sql_regcase, 0)
49 ZEND_ARG_INFO(0, string)
50 ZEND_END_ARG_INFO()
51 /* }}} */
52
53 /* {{{ Function table */
54 const zend_function_entry ereg_functions[] = {
55 PHP_DEP_FE(ereg, arginfo_ereg)
56 PHP_DEP_FE(ereg_replace, arginfo_ereg_replace)
57 PHP_DEP_FE(eregi, arginfo_ereg)
58 PHP_DEP_FE(eregi_replace, arginfo_ereg_replace)
59 PHP_DEP_FE(split, arginfo_split)
60 PHP_DEP_FE(spliti, arginfo_split)
61 PHP_DEP_FE(sql_regcase, arginfo_sql_regcase)
62 PHP_FE_END
63 };
64 /* }}} */
65
66 /* {{{ reg_cache */
67 typedef struct {
68 regex_t preg;
69 int cflags;
70 unsigned long lastuse;
71 } reg_cache;
72 static int reg_magic = 0;
73 #define EREG_CACHE_SIZE 4096
74 /* }}} */
75
76 ZEND_DECLARE_MODULE_GLOBALS(ereg)
77 static PHP_GINIT_FUNCTION(ereg);
78 static PHP_GSHUTDOWN_FUNCTION(ereg);
79
80 /* {{{ Module entry */
81 zend_module_entry ereg_module_entry = {
82 STANDARD_MODULE_HEADER,
83 "ereg",
84 ereg_functions,
85 NULL,
86 NULL,
87 NULL,
88 NULL,
89 PHP_MINFO(ereg),
90 NO_VERSION_YET,
91 PHP_MODULE_GLOBALS(ereg),
92 PHP_GINIT(ereg),
93 PHP_GSHUTDOWN(ereg),
94 NULL,
95 STANDARD_MODULE_PROPERTIES_EX
96 };
97 /* }}} */
98
99 /* {{{ COMPILE_DL_EREG */
100 #ifdef COMPILE_DL_EREG
ZEND_GET_MODULE(ereg)101 ZEND_GET_MODULE(ereg)
102 #endif
103 /* }}} */
104
105 /* {{{ ereg_lru_cmp */
106 static int ereg_lru_cmp(const void *a, const void *b TSRMLS_DC)
107 {
108 Bucket *f = *((Bucket **) a);
109 Bucket *s = *((Bucket **) b);
110
111 if (((reg_cache *)f->pData)->lastuse <
112 ((reg_cache *)s->pData)->lastuse) {
113 return -1;
114 } else if (((reg_cache *)f->pData)->lastuse ==
115 ((reg_cache *)s->pData)->lastuse) {
116 return 0;
117 } else {
118 return 1;
119 }
120 }
121 /* }}} */
122
123 /* {{{ static ereg_clean_cache */
ereg_clean_cache(void * data,void * arg TSRMLS_DC)124 static int ereg_clean_cache(void *data, void *arg TSRMLS_DC)
125 {
126 int *num_clean = (int *)arg;
127
128 if (*num_clean > 0) {
129 (*num_clean)--;
130 return ZEND_HASH_APPLY_REMOVE;
131 } else {
132 return ZEND_HASH_APPLY_STOP;
133 }
134 }
135 /* }}} */
136
137 /* {{{ _php_regcomp
138 */
_php_regcomp(regex_t * preg,const char * pattern,int cflags TSRMLS_DC)139 static int _php_regcomp(regex_t *preg, const char *pattern, int cflags TSRMLS_DC)
140 {
141 int r = 0;
142 int patlen = strlen(pattern);
143 reg_cache *rc = NULL;
144
145 if (zend_hash_num_elements(&EREG(ht_rc)) >= EREG_CACHE_SIZE) {
146 /* easier than dealing with overflow as it happens */
147 if (EREG(lru_counter) >= (1 << 31) || zend_hash_sort(&EREG(ht_rc), zend_qsort, ereg_lru_cmp, 0 TSRMLS_CC) == FAILURE) {
148 zend_hash_clean(&EREG(ht_rc));
149 EREG(lru_counter) = 0;
150 } else {
151 int num_clean = EREG_CACHE_SIZE / 4;
152 zend_hash_apply_with_argument(&EREG(ht_rc), ereg_clean_cache, &num_clean TSRMLS_CC);
153 }
154 }
155
156 if(zend_hash_find(&EREG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
157 && rc->cflags == cflags) {
158 #ifdef HAVE_REGEX_T_RE_MAGIC
159 /*
160 * We use a saved magic number to see whether cache is corrupted, and if it
161 * is, we flush it and compile the pattern from scratch.
162 */
163 if (rc->preg.re_magic != reg_magic) {
164 zend_hash_clean(&EREG(ht_rc));
165 EREG(lru_counter) = 0;
166 } else {
167 memcpy(preg, &rc->preg, sizeof(*preg));
168 return r;
169 }
170 }
171
172 r = regcomp(preg, pattern, cflags);
173 if(!r) {
174 reg_cache rcp;
175
176 rcp.cflags = cflags;
177 rcp.lastuse = ++(EREG(lru_counter));
178 memcpy(&rcp.preg, preg, sizeof(*preg));
179 /*
180 * Since we don't have access to the actual MAGIC1 definition in the private
181 * header file, we save the magic value immediately after compilation. Hopefully,
182 * it's good.
183 */
184 if (!reg_magic) reg_magic = preg->re_magic;
185 zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
186 (void *) &rcp, sizeof(rcp), NULL);
187 }
188 #else
189 memcpy(preg, &rc->preg, sizeof(*preg));
190 } else {
191 r = regcomp(preg, pattern, cflags);
192 if(!r) {
193 reg_cache rcp;
194
195 rcp.cflags = cflags;
196 rcp.lastuse = ++(EREG(lru_counter));
197 memcpy(&rcp.preg, preg, sizeof(*preg));
198 zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
199 (void *) &rcp, sizeof(rcp), NULL);
200 }
201 }
202 #endif
203 return r;
204 }
205 /* }}} */
206
_free_ereg_cache(reg_cache * rc)207 static void _free_ereg_cache(reg_cache *rc)
208 {
209 regfree(&rc->preg);
210 }
211
212 #undef regfree
213 #define regfree(a);
214 #undef regcomp
215 #define regcomp(a, b, c) _php_regcomp(a, b, c TSRMLS_CC)
216
217 /* {{{ PHP_GINIT_FUNCTION
218 */
PHP_GINIT_FUNCTION(ereg)219 static PHP_GINIT_FUNCTION(ereg)
220 {
221 zend_hash_init(&ereg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_ereg_cache, 1);
222 ereg_globals->lru_counter = 0;
223 }
224 /* }}} */
225
226 /* {{{ PHP_GSHUTDOWN_FUNCTION
227 */
PHP_GSHUTDOWN_FUNCTION(ereg)228 static PHP_GSHUTDOWN_FUNCTION(ereg)
229 {
230 zend_hash_destroy(&ereg_globals->ht_rc);
231 }
232 /* }}} */
233
PHP_MINFO_FUNCTION(ereg)234 PHP_MINFO_FUNCTION(ereg)
235 {
236 php_info_print_table_start();
237 #if HSREGEX
238 php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
239 #else
240 php_info_print_table_row(2, "Regex Library", "System library enabled");
241 #endif
242 php_info_print_table_end();
243 }
244
245
246 /* {{{ php_ereg_eprint
247 * php_ereg_eprint - convert error number to name
248 */
php_ereg_eprint(int err,regex_t * re TSRMLS_DC)249 static void php_ereg_eprint(int err, regex_t *re TSRMLS_DC) {
250 char *buf = NULL, *message = NULL;
251 size_t len;
252 size_t buf_len;
253
254 #ifdef REG_ITOA
255 /* get the length of the message */
256 buf_len = regerror(REG_ITOA | err, re, NULL, 0);
257 if (buf_len) {
258 buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
259 if (!buf) return; /* fail silently */
260 /* finally, get the error message */
261 regerror(REG_ITOA | err, re, buf, buf_len);
262 }
263 #else
264 buf_len = 0;
265 #endif
266 len = regerror(err, re, NULL, 0);
267 if (len) {
268 message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
269 if (!message) {
270 return; /* fail silently */
271 }
272 if (buf_len) {
273 snprintf(message, buf_len, "%s: ", buf);
274 buf_len += 1; /* so pointer math below works */
275 }
276 /* drop the message into place */
277 regerror(err, re, message + buf_len, len);
278
279 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
280 }
281
282 STR_FREE(buf);
283 STR_FREE(message);
284 }
285 /* }}} */
286
287 /* {{{ php_ereg
288 */
php_ereg(INTERNAL_FUNCTION_PARAMETERS,int icase)289 static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
290 {
291 zval **regex, /* Regular expression */
292 **array = NULL; /* Optional register array */
293 char *findin; /* String to apply expression to */
294 int findin_len;
295 regex_t re;
296 regmatch_t *subs;
297 int err, match_len, string_len;
298 uint i;
299 int copts = 0;
300 off_t start, end;
301 char *buf = NULL;
302 char *string = NULL;
303 int argc = ZEND_NUM_ARGS();
304
305 if (zend_parse_parameters(argc TSRMLS_CC, "Zs|Z", ®ex, &findin, &findin_len, &array) == FAILURE) {
306 return;
307 }
308
309 if (icase) {
310 copts |= REG_ICASE;
311 }
312
313 if (argc == 2) {
314 copts |= REG_NOSUB;
315 }
316
317 /* compile the regular expression from the supplied regex */
318 if (Z_TYPE_PP(regex) == IS_STRING) {
319 err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
320 } else {
321 /* we convert numbers to integers and treat them as a string */
322 if (Z_TYPE_PP(regex) == IS_DOUBLE) {
323 convert_to_long_ex(regex); /* get rid of decimal places */
324 }
325 convert_to_string_ex(regex);
326 /* don't bother doing an extended regex with just a number */
327 err = regcomp(&re, Z_STRVAL_PP(regex), copts);
328 }
329
330 if (err) {
331 php_ereg_eprint(err, &re TSRMLS_CC);
332 RETURN_FALSE;
333 }
334
335 /* make a copy of the string we're looking in */
336 string = estrndup(findin, findin_len);
337
338 /* allocate storage for (sub-)expression-matches */
339 subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
340
341 /* actually execute the regular expression */
342 err = regexec(&re, string, re.re_nsub+1, subs, 0);
343 if (err && err != REG_NOMATCH) {
344 php_ereg_eprint(err, &re TSRMLS_CC);
345 regfree(&re);
346 efree(subs);
347 RETURN_FALSE;
348 }
349 match_len = 1;
350
351 if (array && err != REG_NOMATCH) {
352 match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
353 string_len = findin_len + 1;
354
355 buf = emalloc(string_len);
356
357 zval_dtor(*array); /* start with clean array */
358 array_init(*array);
359
360 for (i = 0; i <= re.re_nsub; i++) {
361 start = subs[i].rm_so;
362 end = subs[i].rm_eo;
363 if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
364 add_index_stringl(*array, i, string+start, end-start, 1);
365 } else {
366 add_index_bool(*array, i, 0);
367 }
368 }
369 efree(buf);
370 }
371
372 efree(subs);
373 efree(string);
374 if (err == REG_NOMATCH) {
375 RETVAL_FALSE;
376 } else {
377 if (match_len == 0)
378 match_len = 1;
379 RETVAL_LONG(match_len);
380 }
381 regfree(&re);
382 }
383 /* }}} */
384
385 /* {{{ proto int ereg(string pattern, string string [, array registers])
386 Regular expression match */
PHP_FUNCTION(ereg)387 PHP_FUNCTION(ereg)
388 {
389 php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
390 }
391 /* }}} */
392
393 /* {{{ proto int eregi(string pattern, string string [, array registers])
394 Case-insensitive regular expression match */
PHP_FUNCTION(eregi)395 PHP_FUNCTION(eregi)
396 {
397 php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
398 }
399 /* }}} */
400
401 /* {{{ php_ereg_replace
402 * this is the meat and potatoes of regex replacement! */
php_ereg_replace(const char * pattern,const char * replace,const char * string,int icase,int extended TSRMLS_DC)403 PHP_EREG_API char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended TSRMLS_DC)
404 {
405 regex_t re;
406 regmatch_t *subs;
407
408 char *buf, /* buf is where we build the replaced string */
409 *nbuf, /* nbuf is used when we grow the buffer */
410 *walkbuf; /* used to walk buf when replacing backrefs */
411 const char *walk; /* used to walk replacement string for backrefs */
412 size_t buf_len, new_l;
413 int pos, tmp, string_len;
414 int err, copts = 0;
415
416 string_len = strlen(string);
417
418 if (icase) {
419 copts = REG_ICASE;
420 }
421 if (extended) {
422 copts |= REG_EXTENDED;
423 }
424
425 err = regcomp(&re, pattern, copts);
426 if (err) {
427 php_ereg_eprint(err, &re TSRMLS_CC);
428 return ((char *) -1);
429 }
430
431
432 /* allocate storage for (sub-)expression-matches */
433 subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
434
435 /* start with a buffer that is twice the size of the stringo
436 we're doing replacements in */
437 buf = safe_emalloc(string_len, 2, 1);
438 buf_len = 2 * string_len + 1;
439
440 err = pos = 0;
441 buf[0] = '\0';
442 while (!err) {
443 err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
444
445 if (err && err != REG_NOMATCH) {
446 php_ereg_eprint(err, &re TSRMLS_CC);
447 efree(subs);
448 efree(buf);
449 regfree(&re);
450 return ((char *) -1);
451 }
452
453 if (!err) {
454 /* backref replacement is done in two passes:
455 1) find out how long the string will be, and allocate buf
456 2) copy the part before match, replacement and backrefs to buf
457
458 Jaakko Hyv�tti <Jaakko.Hyvatti@iki.fi>
459 */
460
461 new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
462 walk = replace;
463 while (*walk) {
464 if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
465 if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
466 new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
467 }
468 walk += 2;
469 } else {
470 new_l++;
471 walk++;
472 }
473 }
474 if (new_l + 1 > buf_len) {
475 nbuf = safe_emalloc(new_l + 1, 2, buf_len);
476 buf_len = 1 + buf_len + 2 * new_l;
477 strncpy(nbuf, buf, buf_len - 1);
478 nbuf[buf_len - 1] = '\0';
479 efree(buf);
480 buf = nbuf;
481 }
482 tmp = strlen(buf);
483 /* copy the part of the string before the match */
484 strncat(buf, &string[pos], subs[0].rm_so);
485
486 /* copy replacement and backrefs */
487 walkbuf = &buf[tmp + subs[0].rm_so];
488 walk = replace;
489 while (*walk) {
490 if ('\\' == *walk && isdigit((unsigned char)walk[1]) && (unsigned char)walk[1] - '0' <= (int)re.re_nsub) {
491 if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
492 /* this next case shouldn't happen. it does. */
493 && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
494
495 tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
496 memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
497 walkbuf += tmp;
498 }
499 walk += 2;
500 } else {
501 *walkbuf++ = *walk++;
502 }
503 }
504 *walkbuf = '\0';
505
506 /* and get ready to keep looking for replacements */
507 if (subs[0].rm_so == subs[0].rm_eo) {
508 if (subs[0].rm_so + pos >= string_len) {
509 break;
510 }
511 new_l = strlen (buf) + 1;
512 if (new_l + 1 > buf_len) {
513 nbuf = safe_emalloc(new_l + 1, 2, buf_len);
514 buf_len = 1 + buf_len + 2 * new_l;
515 strncpy(nbuf, buf, buf_len-1);
516 efree(buf);
517 buf = nbuf;
518 }
519 pos += subs[0].rm_eo + 1;
520 buf [new_l-1] = string [pos-1];
521 buf [new_l] = '\0';
522 } else {
523 pos += subs[0].rm_eo;
524 }
525 } else { /* REG_NOMATCH */
526 new_l = strlen(buf) + strlen(&string[pos]);
527 if (new_l + 1 > buf_len) {
528 buf_len = new_l + 1; /* now we know exactly how long it is */
529 nbuf = safe_emalloc(new_l, 1, 1);
530 strncpy(nbuf, buf, buf_len-1);
531 efree(buf);
532 buf = nbuf;
533 }
534 /* stick that last bit of string on our output */
535 strlcat(buf, &string[pos], buf_len);
536 }
537 }
538
539 /* don't want to leak memory .. */
540 efree(subs);
541 regfree(&re);
542
543 /* whew. */
544 return (buf);
545 }
546 /* }}} */
547
548 /* {{{ php_do_ereg_replace
549 */
php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS,int icase)550 static void php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
551 {
552 zval **arg_pattern,
553 **arg_replace;
554 char *pattern, *arg_string;
555 char *string;
556 char *replace;
557 char *ret;
558 int arg_string_len;
559
560 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZs", &arg_pattern, &arg_replace, &arg_string, &arg_string_len) == FAILURE) {
561 return;
562 }
563
564 if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
565 if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern)) {
566 pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
567 } else {
568 pattern = STR_EMPTY_ALLOC();
569 }
570 } else {
571 convert_to_long_ex(arg_pattern);
572 pattern = emalloc(2);
573 pattern[0] = (char) Z_LVAL_PP(arg_pattern);
574 pattern[1] = '\0';
575 }
576
577 if (Z_TYPE_PP(arg_replace) == IS_STRING) {
578 if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace)) {
579 replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
580 } else {
581 replace = STR_EMPTY_ALLOC();
582 }
583 } else {
584 convert_to_long_ex(arg_replace);
585 replace = emalloc(2);
586 replace[0] = (char) Z_LVAL_PP(arg_replace);
587 replace[1] = '\0';
588 }
589
590 if (arg_string && arg_string_len) {
591 string = estrndup(arg_string, arg_string_len);
592 } else {
593 string = STR_EMPTY_ALLOC();
594 }
595
596 /* do the actual work */
597 ret = php_ereg_replace(pattern, replace, string, icase, 1 TSRMLS_CC);
598 if (ret == (char *) -1) {
599 RETVAL_FALSE;
600 } else {
601 RETVAL_STRINGL_CHECK(ret, strlen(ret), 1);
602 STR_FREE(ret);
603 }
604
605 STR_FREE(string);
606 STR_FREE(replace);
607 STR_FREE(pattern);
608 }
609 /* }}} */
610
611 /* {{{ proto string ereg_replace(string pattern, string replacement, string string)
612 Replace regular expression */
PHP_FUNCTION(ereg_replace)613 PHP_FUNCTION(ereg_replace)
614 {
615 php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
616 }
617 /* }}} */
618
619 /* {{{ proto string eregi_replace(string pattern, string replacement, string string)
620 Case insensitive replace regular expression */
PHP_FUNCTION(eregi_replace)621 PHP_FUNCTION(eregi_replace)
622 {
623 php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
624 }
625 /* }}} */
626
627 /* {{{ php_split
628 */
php_split(INTERNAL_FUNCTION_PARAMETERS,int icase)629 static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
630 {
631 long count = -1;
632 regex_t re;
633 regmatch_t subs[1];
634 char *spliton, *str, *strp, *endp;
635 int spliton_len, str_len;
636 int err, size, copts = 0;
637
638 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &spliton, &spliton_len, &str, &str_len, &count) == FAILURE) {
639 return;
640 }
641
642 if (icase) {
643 copts = REG_ICASE;
644 }
645
646 strp = str;
647 endp = strp + str_len;
648
649 err = regcomp(&re, spliton, REG_EXTENDED | copts);
650 if (err) {
651 php_ereg_eprint(err, &re TSRMLS_CC);
652 RETURN_FALSE;
653 }
654
655 array_init(return_value);
656
657 /* churn through str, generating array entries as we go */
658 while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
659 if (subs[0].rm_so == 0 && subs[0].rm_eo) {
660 /* match is at start of string, return empty string */
661 add_next_index_stringl(return_value, "", 0, 1);
662 /* skip ahead the length of the regex match */
663 strp += subs[0].rm_eo;
664 } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
665 /* No more matches */
666 regfree(&re);
667
668 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression");
669
670 zend_hash_destroy(Z_ARRVAL_P(return_value));
671 efree(Z_ARRVAL_P(return_value));
672 RETURN_FALSE;
673 } else {
674 /* On a real match */
675
676 /* make a copy of the substring */
677 size = subs[0].rm_so;
678
679 /* add it to the array */
680 add_next_index_stringl(return_value, strp, size, 1);
681
682 /* point at our new starting point */
683 strp = strp + subs[0].rm_eo;
684 }
685
686 /* if we're only looking for a certain number of points,
687 stop looking once we hit it */
688 if (count != -1) {
689 count--;
690 }
691 }
692
693 /* see if we encountered an error */
694 if (err && err != REG_NOMATCH) {
695 php_ereg_eprint(err, &re TSRMLS_CC);
696 regfree(&re);
697 zend_hash_destroy(Z_ARRVAL_P(return_value));
698 efree(Z_ARRVAL_P(return_value));
699 RETURN_FALSE;
700 }
701
702 /* otherwise we just have one last element to add to the array */
703 size = endp - strp;
704
705 add_next_index_stringl(return_value, strp, size, 1);
706
707 regfree(&re);
708 }
709 /* }}} */
710
711 /* {{{ proto array split(string pattern, string string [, int limit])
712 Split string into array by regular expression */
PHP_FUNCTION(split)713 PHP_FUNCTION(split)
714 {
715 php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
716 }
717 /* }}} */
718
719 /* {{{ proto array spliti(string pattern, string string [, int limit])
720 Split string into array by regular expression case-insensitive */
721
PHP_FUNCTION(spliti)722 PHP_FUNCTION(spliti)
723 {
724 php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
725 }
726
727 /* }}} */
728
729 /* {{{ proto string sql_regcase(string string)
730 Make regular expression for case insensitive match */
PHP_FUNCTION(sql_regcase)731 PHP_EREG_API PHP_FUNCTION(sql_regcase)
732 {
733 char *string, *tmp;
734 int string_len;
735 unsigned char c;
736 register int i, j;
737
738 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &string, &string_len) == FAILURE) {
739 return;
740 }
741
742 tmp = safe_emalloc(string_len, 4, 1);
743
744 for (i = j = 0; i < string_len; i++) {
745 c = (unsigned char) string[i];
746 if ( j >= INT_MAX - 1 || (isalpha(c) && j >= INT_MAX - 4)) {
747 php_error_docref(NULL TSRMLS_CC, E_WARNING, "String too long, max length is %d", INT_MAX);
748 efree(tmp);
749 RETURN_FALSE;
750 }
751 if (isalpha(c)) {
752 tmp[j++] = '[';
753 tmp[j++] = toupper(c);
754 tmp[j++] = tolower(c);
755 tmp[j++] = ']';
756 } else {
757 tmp[j++] = c;
758 }
759 }
760 tmp[j] = 0;
761
762 RETVAL_STRINGL(tmp, j, 1);
763 efree(tmp);
764 }
765 /* }}} */
766
767 /*
768 * Local variables:
769 * tab-width: 4
770 * c-basic-offset: 4
771 * End:
772 * vim600: noet sw=4 ts=4 fdm=marker
773 * vim<600: noet sw=4 ts=4
774 */
775