xref: /PHP-5.3/ext/ereg/regex/main.c (revision aa3eee1d)
1 #include <stdio.h>
2 #include <string.h>
3 #include <sys/types.h>
4 #include <regex.h>
5 #include <assert.h>
6 #include <stdlib.h>
7 
8 #include "main.ih"
9 
10 char *progname;
11 int debug = 0;
12 int line = 0;
13 int status = 0;
14 
15 int copts = REG_EXTENDED;
16 int eopts = 0;
17 regoff_t startoff = 0;
18 regoff_t endoff = 0;
19 
20 
21 extern int split();
22 extern void regprint();
23 
24 /*
25  - main - do the simple case, hand off to regress() for regression
26  */
main(argc,argv)27 int main(argc, argv)
28 int argc;
29 char *argv[];
30 {
31 	regex_t re;
32 #	define	NS	10
33 	regmatch_t subs[NS];
34 	char erbuf[100];
35 	int err;
36 	size_t len;
37 	int c;
38 	int errflg = 0;
39 	register int i;
40 	extern int optind;
41 	extern char *optarg;
42 
43 	progname = argv[0];
44 
45 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
46 		switch (c) {
47 		case 'c':	/* compile options */
48 			copts = options('c', optarg);
49 			break;
50 		case 'e':	/* execute options */
51 			eopts = options('e', optarg);
52 			break;
53 		case 'S':	/* start offset */
54 			startoff = (regoff_t)atoi(optarg);
55 			break;
56 		case 'E':	/* end offset */
57 			endoff = (regoff_t)atoi(optarg);
58 			break;
59 		case 'x':	/* Debugging. */
60 			debug++;
61 			break;
62 		case '?':
63 		default:
64 			errflg++;
65 			break;
66 		}
67 	if (errflg) {
68 		fprintf(stderr, "usage: %s ", progname);
69 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
70 		exit(2);
71 	}
72 
73 	if (optind >= argc) {
74 		regress(stdin);
75 		exit(status);
76 	}
77 
78 	err = regcomp(&re, argv[optind++], copts);
79 	if (err) {
80 		len = regerror(err, &re, erbuf, sizeof(erbuf));
81 		fprintf(stderr, "error %s, %d/%d `%s'\n",
82 			eprint(err), len, sizeof(erbuf), erbuf);
83 		exit(status);
84 	}
85 	regprint(&re, stdout);
86 
87 	if (optind >= argc) {
88 		regfree(&re);
89 		exit(status);
90 	}
91 
92 	if (eopts&REG_STARTEND) {
93 		subs[0].rm_so = startoff;
94 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
95 	}
96 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
97 	if (err) {
98 		len = regerror(err, &re, erbuf, sizeof(erbuf));
99 		fprintf(stderr, "error %s, %d/%d `%s'\n",
100 			eprint(err), len, sizeof(erbuf), erbuf);
101 		exit(status);
102 	}
103 	if (!(copts&REG_NOSUB)) {
104 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
105 		if (subs[0].rm_so != -1) {
106 			if (len != 0)
107 				printf("match `%.*s'\n", (int)len,
108 					argv[optind] + subs[0].rm_so);
109 			else
110 				printf("match `'@%.1s\n",
111 					argv[optind] + subs[0].rm_so);
112 		}
113 		for (i = 1; i < NS; i++)
114 			if (subs[i].rm_so != -1)
115 				printf("(%d) `%.*s'\n", i,
116 					(int)(subs[i].rm_eo - subs[i].rm_so),
117 					argv[optind] + subs[i].rm_so);
118 	}
119 	exit(status);
120 }
121 
122 /*
123  - regress - main loop of regression test
124  == void regress(FILE *in);
125  */
126 void
regress(in)127 regress(in)
128 FILE *in;
129 {
130 	char inbuf[1000];
131 #	define	MAXF	10
132 	char *f[MAXF];
133 	int nf;
134 	int i;
135 	char erbuf[100];
136 	size_t ne;
137 	char *badpat = "invalid regular expression";
138 #	define	SHORT	10
139 	char *bpname = "REG_BADPAT";
140 	regex_t re;
141 
142 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
143 		line++;
144 		if (inbuf[0] == '#' || inbuf[0] == '\n')
145 			continue;			/* NOTE CONTINUE */
146 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
147 		if (debug)
148 			fprintf(stdout, "%d:\n", line);
149 		nf = split(inbuf, f, MAXF, "\t\t");
150 		if (nf < 3) {
151 			fprintf(stderr, "bad input, line %d\n", line);
152 			exit(1);
153 		}
154 		for (i = 0; i < nf; i++)
155 			if (strcmp(f[i], "\"\"") == 0)
156 				f[i] = "";
157 		if (nf <= 3)
158 			f[3] = NULL;
159 		if (nf <= 4)
160 			f[4] = NULL;
161 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
162 		if (opt('&', f[1]))	/* try with either type of RE */
163 			try(f[0], f[1], f[2], f[3], f[4],
164 					options('c', f[1]) &~ REG_EXTENDED);
165 	}
166 
167 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
168 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
169 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
170 							erbuf, badpat);
171 		status = 1;
172 	}
173 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
174 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
175 						ne != strlen(badpat)+1) {
176 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
177 						erbuf, SHORT-1, badpat);
178 		status = 1;
179 	}
180 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
181 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
182 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
183 						erbuf, bpname);
184 		status = 1;
185 	}
186 	re.re_endp = bpname;
187 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
188 	if (atoi(erbuf) != (int)REG_BADPAT) {
189 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
190 						erbuf, (long)REG_BADPAT);
191 		status = 1;
192 	} else if (ne != strlen(erbuf)+1) {
193 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
194 						erbuf, (long)REG_BADPAT);
195 		status = 1;
196 	}
197 }
198 
199 /*
200  - try - try it, and report on problems
201  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
202  */
203 void
try(f0,f1,f2,f3,f4,opts)204 try(f0, f1, f2, f3, f4, opts)
205 char *f0;
206 char *f1;
207 char *f2;
208 char *f3;
209 char *f4;
210 int opts;			/* may not match f1 */
211 {
212 	regex_t re;
213 #	define	NSUBS	10
214 	regmatch_t subs[NSUBS];
215 #	define	NSHOULD	15
216 	char *should[NSHOULD];
217 	int nshould;
218 	char erbuf[100];
219 	int err;
220 	int len;
221 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
222 	register int i;
223 	char *grump;
224 	char f0copy[1000];
225 	char f2copy[1000];
226 
227 	strcpy(f0copy, f0);
228 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
229 	fixstr(f0copy);
230 	err = regcomp(&re, f0copy, opts);
231 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
232 		/* unexpected error or wrong error */
233 		len = regerror(err, &re, erbuf, sizeof(erbuf));
234 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
235 					line, type, eprint(err), len,
236 					sizeof(erbuf), erbuf);
237 		status = 1;
238 	} else if (err == 0 && opt('C', f1)) {
239 		/* unexpected success */
240 		fprintf(stderr, "%d: %s should have given REG_%s\n",
241 						line, type, f2);
242 		status = 1;
243 		err = 1;	/* so we won't try regexec */
244 	}
245 
246 	if (err != 0) {
247 		regfree(&re);
248 		return;
249 	}
250 
251 	strcpy(f2copy, f2);
252 	fixstr(f2copy);
253 
254 	if (options('e', f1)&REG_STARTEND) {
255 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
256 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
257 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
258 		subs[0].rm_eo = strchr(f2, ')') - f2;
259 	}
260 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
261 
262 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
263 		/* unexpected error or wrong error */
264 		len = regerror(err, &re, erbuf, sizeof(erbuf));
265 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
266 					line, type, eprint(err), len,
267 					sizeof(erbuf), erbuf);
268 		status = 1;
269 	} else if (err != 0) {
270 		/* nothing more to check */
271 	} else if (f3 == NULL) {
272 		/* unexpected success */
273 		fprintf(stderr, "%d: %s exec should have failed\n",
274 						line, type);
275 		status = 1;
276 		err = 1;		/* just on principle */
277 	} else if (opts&REG_NOSUB) {
278 		/* nothing more to check */
279 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
280 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
281 		status = 1;
282 		err = 1;
283 	}
284 
285 	if (err != 0 || f4 == NULL) {
286 		regfree(&re);
287 		return;
288 	}
289 
290 	for (i = 1; i < NSHOULD; i++)
291 		should[i] = NULL;
292 	nshould = split(f4, should+1, NSHOULD-1, ",");
293 	if (nshould == 0) {
294 		nshould = 1;
295 		should[1] = "";
296 	}
297 	for (i = 1; i < NSUBS; i++) {
298 		grump = check(f2, subs[i], should[i]);
299 		if (grump != NULL) {
300 			fprintf(stderr, "%d: %s $%d %s\n", line,
301 							type, i, grump);
302 			status = 1;
303 			err = 1;
304 		}
305 	}
306 
307 	regfree(&re);
308 }
309 
310 /*
311  - options - pick options out of a regression-test string
312  == int options(int type, char *s);
313  */
314 int
options(type,s)315 options(type, s)
316 int type;			/* 'c' compile, 'e' exec */
317 char *s;
318 {
319 	register char *p;
320 	register int o = (type == 'c') ? copts : eopts;
321 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
322 
323 	for (p = s; *p != '\0'; p++)
324 		if (strchr(legal, *p) != NULL)
325 			switch (*p) {
326 			case 'b':
327 				o &= ~REG_EXTENDED;
328 				break;
329 			case 'i':
330 				o |= REG_ICASE;
331 				break;
332 			case 's':
333 				o |= REG_NOSUB;
334 				break;
335 			case 'n':
336 				o |= REG_NEWLINE;
337 				break;
338 			case 'm':
339 				o &= ~REG_EXTENDED;
340 				o |= REG_NOSPEC;
341 				break;
342 			case 'p':
343 				o |= REG_PEND;
344 				break;
345 			case '^':
346 				o |= REG_NOTBOL;
347 				break;
348 			case '$':
349 				o |= REG_NOTEOL;
350 				break;
351 			case '#':
352 				o |= REG_STARTEND;
353 				break;
354 			case 't':	/* trace */
355 				o |= REG_TRACE;
356 				break;
357 			case 'l':	/* force long representation */
358 				o |= REG_LARGE;
359 				break;
360 			case 'r':	/* force backref use */
361 				o |= REG_BACKR;
362 				break;
363 			}
364 	return(o);
365 }
366 
367 /*
368  - opt - is a particular option in a regression string?
369  == int opt(int c, char *s);
370  */
371 int				/* predicate */
opt(c,s)372 opt(c, s)
373 int c;
374 char *s;
375 {
376 	return(strchr(s, c) != NULL);
377 }
378 
379 /*
380  - fixstr - transform magic characters in strings
381  == void fixstr(register char *p);
382  */
383 void
fixstr(p)384 fixstr(p)
385 register char *p;
386 {
387 	if (p == NULL)
388 		return;
389 
390 	for (; *p != '\0'; p++)
391 		if (*p == 'N')
392 			*p = '\n';
393 		else if (*p == 'T')
394 			*p = '\t';
395 		else if (*p == 'S')
396 			*p = ' ';
397 		else if (*p == 'Z')
398 			*p = '\0';
399 }
400 
401 /*
402  - check - check a substring match
403  == char *check(char *str, regmatch_t sub, char *should);
404  */
405 char *				/* NULL or complaint */
check(str,sub,should)406 check(str, sub, should)
407 char *str;
408 regmatch_t sub;
409 char *should;
410 {
411 	register int len;
412 	register int shlen;
413 	register char *p;
414 	static char grump[500];
415 	register char *at = NULL;
416 
417 	if (should != NULL && strcmp(should, "-") == 0)
418 		should = NULL;
419 	if (should != NULL && should[0] == '@') {
420 		at = should + 1;
421 		should = "";
422 	}
423 
424 	/* check rm_so and rm_eo for consistency */
425 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
426 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
427 				(sub.rm_so != -1 && sub.rm_so < 0) ||
428 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
429 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
430 							(long)sub.rm_eo);
431 		return(grump);
432 	}
433 
434 	/* check for no match */
435 	if (sub.rm_so == -1 && should == NULL)
436 		return(NULL);
437 	if (sub.rm_so == -1)
438 		return("did not match");
439 
440 	/* check for in range */
441 	if (sub.rm_eo > strlen(str)) {
442 		sprintf(grump, "start %ld end %ld, past end of string",
443 					(long)sub.rm_so, (long)sub.rm_eo);
444 		return(grump);
445 	}
446 
447 	len = (int)(sub.rm_eo - sub.rm_so);
448 	shlen = (int)strlen(should);
449 	p = str + sub.rm_so;
450 
451 	/* check for not supposed to match */
452 	if (should == NULL) {
453 		sprintf(grump, "matched `%.*s'", len, p);
454 		return(grump);
455 	}
456 
457 	/* check for wrong match */
458 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
459 		sprintf(grump, "matched `%.*s' instead", len, p);
460 		return(grump);
461 	}
462 	if (shlen > 0)
463 		return(NULL);
464 
465 	/* check null match in right place */
466 	if (at == NULL)
467 		return(NULL);
468 	shlen = strlen(at);
469 	if (shlen == 0)
470 		shlen = 1;	/* force check for end-of-string */
471 	if (strncmp(p, at, shlen) != 0) {
472 		sprintf(grump, "matched null at `%.20s'", p);
473 		return(grump);
474 	}
475 	return(NULL);
476 }
477 
478 /*
479  - eprint - convert error number to name
480  == static char *eprint(int err);
481  */
482 static char *
eprint(err)483 eprint(err)
484 int err;
485 {
486 	static char epbuf[100];
487 	size_t len;
488 
489 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
490 	assert(len <= sizeof(epbuf));
491 	return(epbuf);
492 }
493 
494 /*
495  - efind - convert error name to number
496  == static int efind(char *name);
497  */
498 static int
efind(name)499 efind(name)
500 char *name;
501 {
502 	static char efbuf[100];
503 	regex_t re;
504 
505 	sprintf(efbuf, "REG_%s", name);
506 	assert(strlen(efbuf) < sizeof(efbuf));
507 	re.re_endp = efbuf;
508 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
509 	return(atoi(efbuf));
510 }
511