xref: /PHP-5.6/ext/ereg/ereg.c (revision 21452a54)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2016 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Authors: Rasmus Lerdorf <rasmus@php.net>                             |
16    |          Jim Winstead <jimw@php.net>                                 |
17    |          Jaakko Hyv�tti <jaakko@hyvatti.iki.fi>                      |
18    +----------------------------------------------------------------------+
19  */
20 /* $Id$ */
21 
22 #include <stdio.h>
23 #include <ctype.h>
24 #include "php.h"
25 #include "ext/standard/php_string.h"
26 #include "php_ereg.h"
27 #include "ext/standard/info.h"
28 
29 /* {{{ arginfo */
30 ZEND_BEGIN_ARG_INFO_EX(arginfo_ereg, 0, 0, 2)
31 	ZEND_ARG_INFO(0, pattern)
32 	ZEND_ARG_INFO(0, string)
33 	ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */
34 ZEND_END_ARG_INFO()
35 
36 ZEND_BEGIN_ARG_INFO(arginfo_ereg_replace, 0)
37 	ZEND_ARG_INFO(0, pattern)
38 	ZEND_ARG_INFO(0, replacement)
39 	ZEND_ARG_INFO(0, string)
40 ZEND_END_ARG_INFO()
41 
42 ZEND_BEGIN_ARG_INFO_EX(arginfo_split, 0, 0, 2)
43 	ZEND_ARG_INFO(0, pattern)
44 	ZEND_ARG_INFO(0, string)
45 	ZEND_ARG_INFO(0, limit)
46 ZEND_END_ARG_INFO()
47 
48 ZEND_BEGIN_ARG_INFO(arginfo_sql_regcase, 0)
49 	ZEND_ARG_INFO(0, string)
50 ZEND_END_ARG_INFO()
51 /* }}} */
52 
53 /* {{{ Function table */
54 const zend_function_entry ereg_functions[] = {
55 	PHP_DEP_FE(ereg,			arginfo_ereg)
56 	PHP_DEP_FE(ereg_replace,	arginfo_ereg_replace)
57 	PHP_DEP_FE(eregi,			arginfo_ereg)
58 	PHP_DEP_FE(eregi_replace,	arginfo_ereg_replace)
59 	PHP_DEP_FE(split,			arginfo_split)
60 	PHP_DEP_FE(spliti,			arginfo_split)
61 	PHP_DEP_FE(sql_regcase,		arginfo_sql_regcase)
62 	PHP_FE_END
63 };
64 /* }}} */
65 
66 /* {{{ reg_cache */
67 typedef struct {
68 	regex_t preg;
69 	int cflags;
70 	unsigned long lastuse;
71 } reg_cache;
72 static int reg_magic = 0;
73 #define EREG_CACHE_SIZE 4096
74 /* }}} */
75 
76 ZEND_DECLARE_MODULE_GLOBALS(ereg)
77 static PHP_GINIT_FUNCTION(ereg);
78 static PHP_GSHUTDOWN_FUNCTION(ereg);
79 
80 /* {{{ Module entry */
81 zend_module_entry ereg_module_entry = {
82 	STANDARD_MODULE_HEADER,
83 	"ereg",
84 	ereg_functions,
85 	NULL,
86 	NULL,
87 	NULL,
88 	NULL,
89 	PHP_MINFO(ereg),
90 	NO_VERSION_YET,
91 	PHP_MODULE_GLOBALS(ereg),
92 	PHP_GINIT(ereg),
93 	PHP_GSHUTDOWN(ereg),
94 	NULL,
95 	STANDARD_MODULE_PROPERTIES_EX
96 };
97 /* }}} */
98 
99 /* {{{ COMPILE_DL_EREG */
100 #ifdef COMPILE_DL_EREG
ZEND_GET_MODULE(ereg)101 ZEND_GET_MODULE(ereg)
102 #endif
103 /* }}} */
104 
105 /* {{{ ereg_lru_cmp */
106 static int ereg_lru_cmp(const void *a, const void *b TSRMLS_DC)
107 {
108 	Bucket *f = *((Bucket **) a);
109 	Bucket *s = *((Bucket **) b);
110 
111 	if (((reg_cache *)f->pData)->lastuse <
112 				((reg_cache *)s->pData)->lastuse) {
113 		return -1;
114 	} else if (((reg_cache *)f->pData)->lastuse ==
115 				((reg_cache *)s->pData)->lastuse) {
116 		return 0;
117 	} else {
118 		return 1;
119 	}
120 }
121 /* }}} */
122 
123 /* {{{ static ereg_clean_cache */
ereg_clean_cache(void * data,void * arg TSRMLS_DC)124 static int ereg_clean_cache(void *data, void *arg TSRMLS_DC)
125 {
126 	int *num_clean = (int *)arg;
127 
128 	if (*num_clean > 0) {
129 		(*num_clean)--;
130 		return ZEND_HASH_APPLY_REMOVE;
131 	} else {
132 		return ZEND_HASH_APPLY_STOP;
133 	}
134 }
135 /* }}} */
136 
137 /* {{{ _php_regcomp
138  */
_php_regcomp(regex_t * preg,const char * pattern,int cflags TSRMLS_DC)139 static int _php_regcomp(regex_t *preg, const char *pattern, int cflags TSRMLS_DC)
140 {
141 	int r = 0;
142 	int patlen = strlen(pattern);
143 	reg_cache *rc = NULL;
144 
145 	if (zend_hash_num_elements(&EREG(ht_rc)) >= EREG_CACHE_SIZE) {
146 		/* easier than dealing with overflow as it happens */
147 		if (EREG(lru_counter) >= (1 << 31) || zend_hash_sort(&EREG(ht_rc), zend_qsort, ereg_lru_cmp, 0 TSRMLS_CC) == FAILURE) {
148 			zend_hash_clean(&EREG(ht_rc));
149 			EREG(lru_counter) = 0;
150 		} else {
151 			int num_clean = EREG_CACHE_SIZE / 4;
152 			zend_hash_apply_with_argument(&EREG(ht_rc), ereg_clean_cache, &num_clean TSRMLS_CC);
153 		}
154 	}
155 
156 	if(zend_hash_find(&EREG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
157 	   && rc->cflags == cflags) {
158 #ifdef HAVE_REGEX_T_RE_MAGIC
159 		/*
160 		 * We use a saved magic number to see whether cache is corrupted, and if it
161 		 * is, we flush it and compile the pattern from scratch.
162 		 */
163 		if (rc->preg.re_magic != reg_magic) {
164 			zend_hash_clean(&EREG(ht_rc));
165 			EREG(lru_counter) = 0;
166 		} else {
167 			memcpy(preg, &rc->preg, sizeof(*preg));
168 			return r;
169 		}
170 	}
171 
172 	r = regcomp(preg, pattern, cflags);
173 	if(!r) {
174 		reg_cache rcp;
175 
176 		rcp.cflags = cflags;
177 		rcp.lastuse = ++(EREG(lru_counter));
178 		memcpy(&rcp.preg, preg, sizeof(*preg));
179 		/*
180 		 * Since we don't have access to the actual MAGIC1 definition in the private
181 		 * header file, we save the magic value immediately after compilation. Hopefully,
182 		 * it's good.
183 		 */
184 		if (!reg_magic) reg_magic = preg->re_magic;
185 		zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
186 						 (void *) &rcp, sizeof(rcp), NULL);
187 	}
188 #else
189 		memcpy(preg, &rc->preg, sizeof(*preg));
190 	} else {
191 		r = regcomp(preg, pattern, cflags);
192 		if(!r) {
193 			reg_cache rcp;
194 
195 			rcp.cflags = cflags;
196 			rcp.lastuse = ++(EREG(lru_counter));
197 			memcpy(&rcp.preg, preg, sizeof(*preg));
198 			zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
199 							 (void *) &rcp, sizeof(rcp), NULL);
200 		}
201 	}
202 #endif
203 	return r;
204 }
205 /* }}} */
206 
_free_ereg_cache(reg_cache * rc)207 static void _free_ereg_cache(reg_cache *rc)
208 {
209 	regfree(&rc->preg);
210 }
211 
212 #undef regfree
213 #define regfree(a);
214 #undef regcomp
215 #define regcomp(a, b, c) _php_regcomp(a, b, c TSRMLS_CC)
216 
217 /* {{{ PHP_GINIT_FUNCTION
218  */
PHP_GINIT_FUNCTION(ereg)219 static PHP_GINIT_FUNCTION(ereg)
220 {
221 	zend_hash_init(&ereg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_ereg_cache, 1);
222 	ereg_globals->lru_counter = 0;
223 }
224 /* }}} */
225 
226 /* {{{ PHP_GSHUTDOWN_FUNCTION
227  */
PHP_GSHUTDOWN_FUNCTION(ereg)228 static PHP_GSHUTDOWN_FUNCTION(ereg)
229 {
230 	zend_hash_destroy(&ereg_globals->ht_rc);
231 }
232 /* }}} */
233 
PHP_MINFO_FUNCTION(ereg)234 PHP_MINFO_FUNCTION(ereg)
235 {
236 	php_info_print_table_start();
237 #if HSREGEX
238 	php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
239 #else
240 	php_info_print_table_row(2, "Regex Library", "System library enabled");
241 #endif
242 	php_info_print_table_end();
243 }
244 
245 
246 /* {{{ php_ereg_eprint
247  * php_ereg_eprint - convert error number to name
248  */
php_ereg_eprint(int err,regex_t * re TSRMLS_DC)249 static void php_ereg_eprint(int err, regex_t *re TSRMLS_DC) {
250 	char *buf = NULL, *message = NULL;
251 	size_t len;
252 	size_t buf_len;
253 
254 #ifdef REG_ITOA
255 	/* get the length of the message */
256 	buf_len = regerror(REG_ITOA | err, re, NULL, 0);
257 	if (buf_len) {
258 		buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
259 		if (!buf) return; /* fail silently */
260 		/* finally, get the error message */
261 		regerror(REG_ITOA | err, re, buf, buf_len);
262 	}
263 #else
264 	buf_len = 0;
265 #endif
266 	len = regerror(err, re, NULL, 0);
267 	if (len) {
268 		message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
269 		if (!message) {
270 			return; /* fail silently */
271 		}
272 		if (buf_len) {
273 			snprintf(message, buf_len, "%s: ", buf);
274 			buf_len += 1; /* so pointer math below works */
275 		}
276 		/* drop the message into place */
277 		regerror(err, re, message + buf_len, len);
278 
279 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
280 	}
281 
282 	STR_FREE(buf);
283 	STR_FREE(message);
284 }
285 /* }}} */
286 
287 /* {{{ php_ereg
288  */
php_ereg(INTERNAL_FUNCTION_PARAMETERS,int icase)289 static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
290 {
291 	zval **regex,			/* Regular expression */
292 		**array = NULL;		/* Optional register array */
293 	char *findin;		/* String to apply expression to */
294 	int findin_len;
295 	regex_t re;
296 	regmatch_t *subs;
297 	int err, match_len, string_len;
298 	uint i;
299 	int copts = 0;
300 	off_t start, end;
301 	char *buf = NULL;
302 	char *string = NULL;
303 	int   argc = ZEND_NUM_ARGS();
304 
305 	if (zend_parse_parameters(argc TSRMLS_CC, "Zs|Z", &regex, &findin, &findin_len, &array) == FAILURE) {
306 		return;
307 	}
308 
309 	if (icase) {
310 		copts |= REG_ICASE;
311 	}
312 
313 	if (argc == 2) {
314 		copts |= REG_NOSUB;
315 	}
316 
317 	/* compile the regular expression from the supplied regex */
318 	if (Z_TYPE_PP(regex) == IS_STRING) {
319 		err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
320 	} else {
321 		/* we convert numbers to integers and treat them as a string */
322 		if (Z_TYPE_PP(regex) == IS_DOUBLE) {
323 			convert_to_long_ex(regex);	/* get rid of decimal places */
324 		}
325 		convert_to_string_ex(regex);
326 		/* don't bother doing an extended regex with just a number */
327 		err = regcomp(&re, Z_STRVAL_PP(regex), copts);
328 	}
329 
330 	if (err) {
331 		php_ereg_eprint(err, &re TSRMLS_CC);
332 		RETURN_FALSE;
333 	}
334 
335 	/* make a copy of the string we're looking in */
336 	string = estrndup(findin, findin_len);
337 
338 	/* allocate storage for (sub-)expression-matches */
339 	subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
340 
341 	/* actually execute the regular expression */
342 	err = regexec(&re, string, re.re_nsub+1, subs, 0);
343 	if (err && err != REG_NOMATCH) {
344 		php_ereg_eprint(err, &re TSRMLS_CC);
345 		regfree(&re);
346 		efree(subs);
347 		RETURN_FALSE;
348 	}
349 	match_len = 1;
350 
351 	if (array && err != REG_NOMATCH) {
352 		match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
353 		string_len = findin_len + 1;
354 
355 		buf = emalloc(string_len);
356 
357 		zval_dtor(*array);	/* start with clean array */
358 		array_init(*array);
359 
360 		for (i = 0; i <= re.re_nsub; i++) {
361 			start = subs[i].rm_so;
362 			end = subs[i].rm_eo;
363 			if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
364 				add_index_stringl(*array, i, string+start, end-start, 1);
365 			} else {
366 				add_index_bool(*array, i, 0);
367 			}
368 		}
369 		efree(buf);
370 	}
371 
372 	efree(subs);
373 	efree(string);
374 	if (err == REG_NOMATCH) {
375 		RETVAL_FALSE;
376 	} else {
377 		if (match_len == 0)
378 			match_len = 1;
379 		RETVAL_LONG(match_len);
380 	}
381 	regfree(&re);
382 }
383 /* }}} */
384 
385 /* {{{ proto int ereg(string pattern, string string [, array registers])
386    Regular expression match */
PHP_FUNCTION(ereg)387 PHP_FUNCTION(ereg)
388 {
389 	php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
390 }
391 /* }}} */
392 
393 /* {{{ proto int eregi(string pattern, string string [, array registers])
394    Case-insensitive regular expression match */
PHP_FUNCTION(eregi)395 PHP_FUNCTION(eregi)
396 {
397 	php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
398 }
399 /* }}} */
400 
401 /* {{{ php_ereg_replace
402  * this is the meat and potatoes of regex replacement! */
php_ereg_replace(const char * pattern,const char * replace,const char * string,int icase,int extended TSRMLS_DC)403 PHP_EREG_API char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended TSRMLS_DC)
404 {
405 	regex_t re;
406 	regmatch_t *subs;
407 
408 	char *buf,	/* buf is where we build the replaced string */
409 		 *nbuf,	/* nbuf is used when we grow the buffer */
410 		 *walkbuf; /* used to walk buf when replacing backrefs */
411 	const char *walk; /* used to walk replacement string for backrefs */
412 	size_t buf_len, new_l;
413 	int pos, tmp, string_len;
414 	int err, copts = 0;
415 
416 	string_len = strlen(string);
417 
418 	if (icase) {
419 		copts = REG_ICASE;
420 	}
421 	if (extended) {
422 		copts |= REG_EXTENDED;
423 	}
424 
425 	err = regcomp(&re, pattern, copts);
426 	if (err) {
427 		php_ereg_eprint(err, &re TSRMLS_CC);
428 		return ((char *) -1);
429 	}
430 
431 
432 	/* allocate storage for (sub-)expression-matches */
433 	subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
434 
435 	/* start with a buffer that is twice the size of the stringo
436 	   we're doing replacements in */
437 	buf = safe_emalloc(string_len, 2, 1);
438 	buf_len = 2 * string_len + 1;
439 
440 	err = pos = 0;
441 	buf[0] = '\0';
442 	while (!err) {
443 		err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
444 
445 		if (err && err != REG_NOMATCH) {
446 			php_ereg_eprint(err, &re TSRMLS_CC);
447 			efree(subs);
448 			efree(buf);
449 			regfree(&re);
450 			return ((char *) -1);
451 		}
452 
453 		if (!err) {
454 			/* backref replacement is done in two passes:
455 			   1) find out how long the string will be, and allocate buf
456 			   2) copy the part before match, replacement and backrefs to buf
457 
458 			   Jaakko Hyv�tti <Jaakko.Hyvatti@iki.fi>
459 			   */
460 
461 			new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
462 			walk = replace;
463 			while (*walk) {
464 				if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
465 					if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
466 						new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
467 					}
468 					walk += 2;
469 				} else {
470 					new_l++;
471 					walk++;
472 				}
473 			}
474 			if (new_l + 1 > buf_len) {
475 				nbuf = safe_emalloc(new_l + 1, 2, buf_len);
476 				buf_len = 1 + buf_len + 2 * new_l;
477 				strncpy(nbuf, buf, buf_len - 1);
478 				nbuf[buf_len - 1] = '\0';
479 				efree(buf);
480 				buf = nbuf;
481 			}
482 			tmp = strlen(buf);
483 			/* copy the part of the string before the match */
484 			strncat(buf, &string[pos], subs[0].rm_so);
485 
486 			/* copy replacement and backrefs */
487 			walkbuf = &buf[tmp + subs[0].rm_so];
488 			walk = replace;
489 			while (*walk) {
490 				if ('\\' == *walk && isdigit((unsigned char)walk[1]) && (unsigned char)walk[1] - '0' <= (int)re.re_nsub) {
491 					if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
492 						/* this next case shouldn't happen. it does. */
493 						&& subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
494 
495 						tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
496 						memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
497 						walkbuf += tmp;
498 					}
499 					walk += 2;
500 				} else {
501 					*walkbuf++ = *walk++;
502 				}
503 			}
504 			*walkbuf = '\0';
505 
506 			/* and get ready to keep looking for replacements */
507 			if (subs[0].rm_so == subs[0].rm_eo) {
508 				if (subs[0].rm_so + pos >= string_len) {
509 					break;
510 				}
511 				new_l = strlen (buf) + 1;
512 				if (new_l + 1 > buf_len) {
513 					nbuf = safe_emalloc(new_l + 1, 2, buf_len);
514 					buf_len = 1 + buf_len + 2 * new_l;
515 					strncpy(nbuf, buf, buf_len-1);
516 					efree(buf);
517 					buf = nbuf;
518 				}
519 				pos += subs[0].rm_eo + 1;
520 				buf [new_l-1] = string [pos-1];
521 				buf [new_l] = '\0';
522 			} else {
523 				pos += subs[0].rm_eo;
524 			}
525 		} else { /* REG_NOMATCH */
526 			new_l = strlen(buf) + strlen(&string[pos]);
527 			if (new_l + 1 > buf_len) {
528 				buf_len = new_l + 1; /* now we know exactly how long it is */
529 				nbuf = safe_emalloc(new_l, 1, 1);
530 				strncpy(nbuf, buf, buf_len-1);
531 				efree(buf);
532 				buf = nbuf;
533 			}
534 			/* stick that last bit of string on our output */
535 			strlcat(buf, &string[pos], buf_len);
536 		}
537 	}
538 
539 	/* don't want to leak memory .. */
540 	efree(subs);
541 	regfree(&re);
542 
543 	/* whew. */
544 	return (buf);
545 }
546 /* }}} */
547 
548 /* {{{ php_do_ereg_replace
549  */
php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS,int icase)550 static void php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
551 {
552 	zval **arg_pattern,
553 		**arg_replace;
554 	char *pattern, *arg_string;
555 	char *string;
556 	char *replace;
557 	char *ret;
558 	int arg_string_len;
559 
560 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZs", &arg_pattern, &arg_replace, &arg_string, &arg_string_len) == FAILURE) {
561 		return;
562 	}
563 
564 	if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
565 		if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern)) {
566 			pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
567 		} else {
568 			pattern = STR_EMPTY_ALLOC();
569 		}
570 	} else {
571 		convert_to_long_ex(arg_pattern);
572 		pattern = emalloc(2);
573 		pattern[0] = (char) Z_LVAL_PP(arg_pattern);
574 		pattern[1] = '\0';
575 	}
576 
577 	if (Z_TYPE_PP(arg_replace) == IS_STRING) {
578 		if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace)) {
579 			replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
580 		} else {
581 			replace = STR_EMPTY_ALLOC();
582 		}
583 	} else {
584 		convert_to_long_ex(arg_replace);
585 		replace = emalloc(2);
586 		replace[0] = (char) Z_LVAL_PP(arg_replace);
587 		replace[1] = '\0';
588 	}
589 
590 	if (arg_string && arg_string_len) {
591 		string = estrndup(arg_string, arg_string_len);
592 	} else {
593 		string = STR_EMPTY_ALLOC();
594 	}
595 
596 	/* do the actual work */
597 	ret = php_ereg_replace(pattern, replace, string, icase, 1 TSRMLS_CC);
598 	if (ret == (char *) -1) {
599 		RETVAL_FALSE;
600 	} else {
601 		RETVAL_STRINGL_CHECK(ret, strlen(ret), 1);
602 		STR_FREE(ret);
603 	}
604 
605 	STR_FREE(string);
606 	STR_FREE(replace);
607 	STR_FREE(pattern);
608 }
609 /* }}} */
610 
611 /* {{{ proto string ereg_replace(string pattern, string replacement, string string)
612    Replace regular expression */
PHP_FUNCTION(ereg_replace)613 PHP_FUNCTION(ereg_replace)
614 {
615 	php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
616 }
617 /* }}} */
618 
619 /* {{{ proto string eregi_replace(string pattern, string replacement, string string)
620    Case insensitive replace regular expression */
PHP_FUNCTION(eregi_replace)621 PHP_FUNCTION(eregi_replace)
622 {
623 	php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
624 }
625 /* }}} */
626 
627 /* {{{ php_split
628  */
php_split(INTERNAL_FUNCTION_PARAMETERS,int icase)629 static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
630 {
631 	long count = -1;
632 	regex_t re;
633 	regmatch_t subs[1];
634 	char *spliton, *str, *strp, *endp;
635 	int spliton_len, str_len;
636 	int err, size, copts = 0;
637 
638 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &spliton, &spliton_len, &str, &str_len, &count) == FAILURE) {
639 		return;
640 	}
641 
642 	if (icase) {
643 		copts = REG_ICASE;
644 	}
645 
646 	strp = str;
647 	endp = strp + str_len;
648 
649 	err = regcomp(&re, spliton, REG_EXTENDED | copts);
650 	if (err) {
651 		php_ereg_eprint(err, &re TSRMLS_CC);
652 		RETURN_FALSE;
653 	}
654 
655 	array_init(return_value);
656 
657 	/* churn through str, generating array entries as we go */
658 	while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
659 		if (subs[0].rm_so == 0 && subs[0].rm_eo) {
660 			/* match is at start of string, return empty string */
661 			add_next_index_stringl(return_value, "", 0, 1);
662 			/* skip ahead the length of the regex match */
663 			strp += subs[0].rm_eo;
664 		} else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
665 			/* No more matches */
666 			regfree(&re);
667 
668 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression");
669 
670 			zend_hash_destroy(Z_ARRVAL_P(return_value));
671 			efree(Z_ARRVAL_P(return_value));
672 			RETURN_FALSE;
673 		} else {
674 			/* On a real match */
675 
676 			/* make a copy of the substring */
677 			size = subs[0].rm_so;
678 
679 			/* add it to the array */
680 			add_next_index_stringl(return_value, strp, size, 1);
681 
682 			/* point at our new starting point */
683 			strp = strp + subs[0].rm_eo;
684 		}
685 
686 		/* if we're only looking for a certain number of points,
687 		   stop looking once we hit it */
688 		if (count != -1) {
689 			count--;
690 		}
691 	}
692 
693 	/* see if we encountered an error */
694 	if (err && err != REG_NOMATCH) {
695 		php_ereg_eprint(err, &re TSRMLS_CC);
696 		regfree(&re);
697 		zend_hash_destroy(Z_ARRVAL_P(return_value));
698 		efree(Z_ARRVAL_P(return_value));
699 		RETURN_FALSE;
700 	}
701 
702 	/* otherwise we just have one last element to add to the array */
703 	size = endp - strp;
704 
705 	add_next_index_stringl(return_value, strp, size, 1);
706 
707 	regfree(&re);
708 }
709 /* }}} */
710 
711 /* {{{ proto array split(string pattern, string string [, int limit])
712    Split string into array by regular expression */
PHP_FUNCTION(split)713 PHP_FUNCTION(split)
714 {
715 	php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
716 }
717 /* }}} */
718 
719 /* {{{ proto array spliti(string pattern, string string [, int limit])
720    Split string into array by regular expression case-insensitive */
721 
PHP_FUNCTION(spliti)722 PHP_FUNCTION(spliti)
723 {
724 	php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
725 }
726 
727 /* }}} */
728 
729 /* {{{ proto string sql_regcase(string string)
730    Make regular expression for case insensitive match */
PHP_FUNCTION(sql_regcase)731 PHP_EREG_API PHP_FUNCTION(sql_regcase)
732 {
733 	char *string, *tmp;
734 	int string_len;
735 	unsigned char c;
736 	register int i, j;
737 
738 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &string, &string_len) == FAILURE) {
739 		return;
740 	}
741 
742 	tmp = safe_emalloc(string_len, 4, 1);
743 
744 	for (i = j = 0; i < string_len; i++) {
745 		c = (unsigned char) string[i];
746 		if ( j >= INT_MAX - 1 || (isalpha(c) && j >= INT_MAX - 4)) {
747 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "String too long, max length is %d", INT_MAX);
748 			efree(tmp);
749 			RETURN_FALSE;
750 		}
751 		if (isalpha(c)) {
752 			tmp[j++] = '[';
753 			tmp[j++] = toupper(c);
754 			tmp[j++] = tolower(c);
755 			tmp[j++] = ']';
756 		} else {
757 			tmp[j++] = c;
758 		}
759 	}
760 	tmp[j] = 0;
761 
762 	RETVAL_STRINGL(tmp, j, 1);
763 	efree(tmp);
764 }
765 /* }}} */
766 
767 /*
768  * Local variables:
769  * tab-width: 4
770  * c-basic-offset: 4
771  * End:
772  * vim600: noet sw=4 ts=4 fdm=marker
773  * vim<600: noet sw=4 ts=4
774  */
775