1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2019 K.Kosako
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include "regint.h"
30
31 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
32 ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
33
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39
40 #define CHECK_INTERRUPT_IN_MATCH
41
42 #define STACK_MEM_START(reg, i) \
43 (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \
44 STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i])))
45
46 #define STACK_MEM_END(reg, i) \
47 (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \
48 STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i])))
49
50 static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev);
51
52 static int
53 search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
54
55
56 #ifdef USE_CALLOUT
57 typedef struct {
58 int last_match_at_call_counter;
59 struct {
60 OnigType type;
61 OnigValue val;
62 } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
63 } CalloutData;
64 #endif
65
66 struct OnigMatchParamStruct {
67 unsigned int match_stack_limit;
68 unsigned long retry_limit_in_match;
69 #ifdef USE_CALLOUT
70 OnigCalloutFunc progress_callout_of_contents;
71 OnigCalloutFunc retraction_callout_of_contents;
72 int match_at_call_counter;
73 void* callout_user_data;
74 CalloutData* callout_data;
75 int callout_data_alloc_num;
76 #endif
77 };
78
79 extern int
onig_set_match_stack_limit_size_of_match_param(OnigMatchParam * param,unsigned int limit)80 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
81 unsigned int limit)
82 {
83 param->match_stack_limit = limit;
84 return ONIG_NORMAL;
85 }
86
87 extern int
onig_set_retry_limit_in_match_of_match_param(OnigMatchParam * param,unsigned long limit)88 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
89 unsigned long limit)
90 {
91 param->retry_limit_in_match = limit;
92 return ONIG_NORMAL;
93 }
94
95 extern int
onig_set_progress_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)96 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
97 {
98 #ifdef USE_CALLOUT
99 param->progress_callout_of_contents = f;
100 return ONIG_NORMAL;
101 #else
102 return ONIG_NO_SUPPORT_CONFIG;
103 #endif
104 }
105
106 extern int
onig_set_retraction_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)107 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
108 {
109 #ifdef USE_CALLOUT
110 param->retraction_callout_of_contents = f;
111 return ONIG_NORMAL;
112 #else
113 return ONIG_NO_SUPPORT_CONFIG;
114 #endif
115 }
116
117 extern int
onig_set_callout_user_data_of_match_param(OnigMatchParam * param,void * user_data)118 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
119 {
120 #ifdef USE_CALLOUT
121 param->callout_user_data = user_data;
122 return ONIG_NORMAL;
123 #else
124 return ONIG_NO_SUPPORT_CONFIG;
125 #endif
126 }
127
128
129 typedef struct {
130 void* stack_p;
131 int stack_n;
132 OnigOptionType options;
133 OnigRegion* region;
134 int ptr_num;
135 const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
136 unsigned int match_stack_limit;
137 unsigned long retry_limit_in_match;
138 OnigMatchParam* mp;
139 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
140 int best_len; /* for ONIG_OPTION_FIND_LONGEST */
141 UChar* best_s;
142 #endif
143 } MatchArg;
144
145
146 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
147
148 /* arguments type */
149 typedef enum {
150 ARG_SPECIAL = -1,
151 ARG_NON = 0,
152 ARG_RELADDR = 1,
153 ARG_ABSADDR = 2,
154 ARG_LENGTH = 3,
155 ARG_MEMNUM = 4,
156 ARG_OPTION = 5,
157 ARG_MODE = 6
158 } OpArgType;
159
160 typedef struct {
161 short int opcode;
162 char* name;
163 } OpInfoType;
164
165 static OpInfoType OpInfo[] = {
166 { OP_FINISH, "finish"},
167 { OP_END, "end"},
168 { OP_STR_1, "str_1"},
169 { OP_STR_2, "str_2"},
170 { OP_STR_3, "str_3"},
171 { OP_STR_4, "str_4"},
172 { OP_STR_5, "str_5"},
173 { OP_STR_N, "str_n"},
174 { OP_STR_MB2N1, "str_mb2-n1"},
175 { OP_STR_MB2N2, "str_mb2-n2"},
176 { OP_STR_MB2N3, "str_mb2-n3"},
177 { OP_STR_MB2N, "str_mb2-n"},
178 { OP_STR_MB3N, "str_mb3n"},
179 { OP_STR_MBN, "str_mbn"},
180 { OP_STR_1_IC, "str_1-ic"},
181 { OP_STR_N_IC, "str_n-ic"},
182 { OP_CCLASS, "cclass"},
183 { OP_CCLASS_MB, "cclass-mb"},
184 { OP_CCLASS_MIX, "cclass-mix"},
185 { OP_CCLASS_NOT, "cclass-not"},
186 { OP_CCLASS_MB_NOT, "cclass-mb-not"},
187 { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
188 { OP_ANYCHAR, "anychar"},
189 { OP_ANYCHAR_ML, "anychar-ml"},
190 { OP_ANYCHAR_STAR, "anychar*"},
191 { OP_ANYCHAR_ML_STAR, "anychar-ml*"},
192 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next"},
193 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
194 { OP_WORD, "word"},
195 { OP_WORD_ASCII, "word-ascii"},
196 { OP_NO_WORD, "not-word"},
197 { OP_NO_WORD_ASCII, "not-word-ascii"},
198 { OP_WORD_BOUNDARY, "word-boundary"},
199 { OP_NO_WORD_BOUNDARY, "not-word-boundary"},
200 { OP_WORD_BEGIN, "word-begin"},
201 { OP_WORD_END, "word-end"},
202 { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
203 { OP_BEGIN_BUF, "begin-buf"},
204 { OP_END_BUF, "end-buf"},
205 { OP_BEGIN_LINE, "begin-line"},
206 { OP_END_LINE, "end-line"},
207 { OP_SEMI_END_BUF, "semi-end-buf"},
208 { OP_BEGIN_POSITION, "begin-position"},
209 { OP_BACKREF1, "backref1"},
210 { OP_BACKREF2, "backref2"},
211 { OP_BACKREF_N, "backref-n"},
212 { OP_BACKREF_N_IC, "backref-n-ic"},
213 { OP_BACKREF_MULTI, "backref_multi"},
214 { OP_BACKREF_MULTI_IC, "backref_multi-ic"},
215 { OP_BACKREF_WITH_LEVEL, "backref_with_level"},
216 { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
217 { OP_BACKREF_CHECK, "backref_check"},
218 { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
219 { OP_MEM_START_PUSH, "mem-start-push"},
220 { OP_MEM_START, "mem-start"},
221 { OP_MEM_END_PUSH, "mem-end-push"},
222 #ifdef USE_CALL
223 { OP_MEM_END_PUSH_REC, "mem-end-push-rec"},
224 #endif
225 { OP_MEM_END, "mem-end"},
226 #ifdef USE_CALL
227 { OP_MEM_END_REC, "mem-end-rec"},
228 #endif
229 { OP_FAIL, "fail"},
230 { OP_JUMP, "jump"},
231 { OP_PUSH, "push"},
232 { OP_PUSH_SUPER, "push-super"},
233 { OP_POP_OUT, "pop-out"},
234 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
235 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1"},
236 #endif
237 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next"},
238 { OP_REPEAT, "repeat"},
239 { OP_REPEAT_NG, "repeat-ng"},
240 { OP_REPEAT_INC, "repeat-inc"},
241 { OP_REPEAT_INC_NG, "repeat-inc-ng"},
242 { OP_EMPTY_CHECK_START, "empty-check-start"},
243 { OP_EMPTY_CHECK_END, "empty-check-end"},
244 { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
245 #ifdef USE_CALL
246 { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
247 #endif
248 { OP_PREC_READ_START, "push-pos"},
249 { OP_PREC_READ_END, "pop-pos"},
250 { OP_PREC_READ_NOT_START, "prec-read-not-start"},
251 { OP_PREC_READ_NOT_END, "prec-read-not-end"},
252 { OP_ATOMIC_START, "atomic-start"},
253 { OP_ATOMIC_END, "atomic-end"},
254 { OP_LOOK_BEHIND, "look-behind"},
255 { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"},
256 { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end"},
257 { OP_PUSH_SAVE_VAL, "push-save-val"},
258 { OP_UPDATE_VAR, "update-var"},
259 #ifdef USE_CALL
260 { OP_CALL, "call"},
261 { OP_RETURN, "return"},
262 #endif
263 #ifdef USE_CALLOUT
264 { OP_CALLOUT_CONTENTS, "callout-contents"},
265 { OP_CALLOUT_NAME, "callout-name"},
266 #endif
267 { -1, ""}
268 };
269
270 static char*
op2name(int opcode)271 op2name(int opcode)
272 {
273 int i;
274
275 for (i = 0; OpInfo[i].opcode >= 0; i++) {
276 if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
277 }
278
279 return "";
280 }
281
282 static void
p_string(FILE * f,int len,UChar * s)283 p_string(FILE* f, int len, UChar* s)
284 {
285 fputs(":", f);
286 while (len-- > 0) { fputc(*s++, f); }
287 }
288
289 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)290 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
291 {
292 int x = len * mb_len;
293
294 fprintf(f, ":%d:", len);
295 while (x-- > 0) { fputc(*s++, f); }
296 }
297
298 static void
p_rel_addr(FILE * f,RelAddrType rel_addr,Operation * p,Operation * start)299 p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
300 {
301 RelAddrType curr = (RelAddrType )(p - start);
302
303 fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
304 }
305
306 static int
bitset_on_num(BitSetRef bs)307 bitset_on_num(BitSetRef bs)
308 {
309 int i, n;
310
311 n = 0;
312 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
313 if (BITSET_AT(bs, i)) n++;
314 }
315
316 return n;
317 }
318
319 static void
print_compiled_byte_code(FILE * f,regex_t * reg,int index,Operation * start,OnigEncoding enc)320 print_compiled_byte_code(FILE* f, regex_t* reg, int index,
321 Operation* start, OnigEncoding enc)
322 {
323 int i, n;
324 RelAddrType addr;
325 LengthType len;
326 MemNumType mem;
327 OnigCodePoint code;
328 ModeType mode;
329 UChar *q;
330 Operation* p;
331 enum OpCode opcode;
332
333 p = reg->ops + index;
334
335 #ifdef USE_DIRECT_THREADED_CODE
336 opcode = reg->ocs[index];
337 #else
338 opcode = p->opcode;
339 #endif
340
341 fprintf(f, "%s", op2name(opcode));
342 switch (opcode) {
343 case OP_STR_1:
344 p_string(f, 1, p->exact.s); break;
345 case OP_STR_2:
346 p_string(f, 2, p->exact.s); break;
347 case OP_STR_3:
348 p_string(f, 3, p->exact.s); break;
349 case OP_STR_4:
350 p_string(f, 4, p->exact.s); break;
351 case OP_STR_5:
352 p_string(f, 5, p->exact.s); break;
353 case OP_STR_N:
354 len = p->exact_n.n;
355 p_string(f, len, p->exact_n.s); break;
356 case OP_STR_MB2N1:
357 p_string(f, 2, p->exact.s); break;
358 case OP_STR_MB2N2:
359 p_string(f, 4, p->exact.s); break;
360 case OP_STR_MB2N3:
361 p_string(f, 3, p->exact.s); break;
362 case OP_STR_MB2N:
363 len = p->exact_n.n;
364 p_len_string(f, len, 2, p->exact_n.s); break;
365 case OP_STR_MB3N:
366 len = p->exact_n.n;
367 p_len_string(f, len, 3, p->exact_n.s); break;
368 case OP_STR_MBN:
369 {
370 int mb_len;
371
372 mb_len = p->exact_len_n.len;
373 len = p->exact_len_n.n;
374 q = p->exact_len_n.s;
375 fprintf(f, ":%d:%d:", mb_len, len);
376 n = len * mb_len;
377 while (n-- > 0) { fputc(*q++, f); }
378 }
379 break;
380 case OP_STR_1_IC:
381 len = enclen(enc, p->exact.s);
382 p_string(f, len, p->exact.s);
383 break;
384 case OP_STR_N_IC:
385 len = p->exact_n.n;
386 p_len_string(f, len, 1, p->exact_n.s);
387 break;
388
389 case OP_CCLASS:
390 case OP_CCLASS_NOT:
391 n = bitset_on_num(p->cclass.bsp);
392 fprintf(f, ":%d", n);
393 break;
394 case OP_CCLASS_MB:
395 case OP_CCLASS_MB_NOT:
396 {
397 OnigCodePoint ncode;
398 OnigCodePoint* codes;
399
400 codes = (OnigCodePoint* )p->cclass_mb.mb;
401 GET_CODE_POINT(ncode, codes);
402 codes++;
403 GET_CODE_POINT(code, codes);
404 fprintf(f, ":%d:0x%x", ncode, code);
405 }
406 break;
407 case OP_CCLASS_MIX:
408 case OP_CCLASS_MIX_NOT:
409 {
410 OnigCodePoint ncode;
411 OnigCodePoint* codes;
412
413 codes = (OnigCodePoint* )p->cclass_mix.mb;
414 n = bitset_on_num(p->cclass_mix.bsp);
415
416 GET_CODE_POINT(ncode, codes);
417 codes++;
418 GET_CODE_POINT(code, codes);
419 fprintf(f, ":%d:%u:%u", n, code, ncode);
420 }
421 break;
422
423 case OP_ANYCHAR_STAR_PEEK_NEXT:
424 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
425 p_string(f, 1, &(p->anychar_star_peek_next.c));
426 break;
427
428 case OP_WORD_BOUNDARY:
429 case OP_NO_WORD_BOUNDARY:
430 case OP_WORD_BEGIN:
431 case OP_WORD_END:
432 mode = p->word_boundary.mode;
433 fprintf(f, ":%d", mode);
434 break;
435
436 case OP_BACKREF_N:
437 case OP_BACKREF_N_IC:
438 mem = p->backref_n.n1;
439 fprintf(f, ":%d", mem);
440 break;
441 case OP_BACKREF_MULTI_IC:
442 case OP_BACKREF_MULTI:
443 case OP_BACKREF_CHECK:
444 fputs(" ", f);
445 n = p->backref_general.num;
446 for (i = 0; i < n; i++) {
447 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
448 if (i > 0) fputs(", ", f);
449 fprintf(f, "%d", mem);
450 }
451 break;
452 case OP_BACKREF_WITH_LEVEL:
453 case OP_BACKREF_WITH_LEVEL_IC:
454 case OP_BACKREF_CHECK_WITH_LEVEL:
455 {
456 LengthType level;
457
458 level = p->backref_general.nest_level;
459 fprintf(f, ":%d", level);
460 fputs(" ", f);
461 n = p->backref_general.num;
462 for (i = 0; i < n; i++) {
463 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
464 if (i > 0) fputs(", ", f);
465 fprintf(f, "%d", mem);
466 }
467 }
468 break;
469
470 case OP_MEM_START:
471 case OP_MEM_START_PUSH:
472 mem = p->memory_start.num;
473 fprintf(f, ":%d", mem);
474 break;
475
476 case OP_MEM_END:
477 case OP_MEM_END_PUSH:
478 #ifdef USE_CALL
479 case OP_MEM_END_REC:
480 case OP_MEM_END_PUSH_REC:
481 #endif
482 mem = p->memory_end.num;
483 fprintf(f, ":%d", mem);
484 break;
485
486 case OP_JUMP:
487 addr = p->jump.addr;
488 fputc(':', f);
489 p_rel_addr(f, addr, p, start);
490 break;
491
492 case OP_PUSH:
493 case OP_PUSH_SUPER:
494 addr = p->push.addr;
495 fputc(':', f);
496 p_rel_addr(f, addr, p, start);
497 break;
498
499 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
500 case OP_PUSH_OR_JUMP_EXACT1:
501 addr = p->push_or_jump_exact1.addr;
502 fputc(':', f);
503 p_rel_addr(f, addr, p, start);
504 p_string(f, 1, &(p->push_or_jump_exact1.c));
505 break;
506 #endif
507
508 case OP_PUSH_IF_PEEK_NEXT:
509 addr = p->push_if_peek_next.addr;
510 fputc(':', f);
511 p_rel_addr(f, addr, p, start);
512 p_string(f, 1, &(p->push_if_peek_next.c));
513 break;
514
515 case OP_REPEAT:
516 case OP_REPEAT_NG:
517 mem = p->repeat.id;
518 addr = p->repeat.addr;
519 fprintf(f, ":%d:", mem);
520 p_rel_addr(f, addr, p, start);
521 break;
522
523 case OP_REPEAT_INC:
524 case OP_REPEAT_INC_NG:
525 mem = p->repeat.id;
526 fprintf(f, ":%d", mem);
527 break;
528
529 case OP_EMPTY_CHECK_START:
530 mem = p->empty_check_start.mem;
531 fprintf(f, ":%d", mem);
532 break;
533 case OP_EMPTY_CHECK_END:
534 case OP_EMPTY_CHECK_END_MEMST:
535 #ifdef USE_CALL
536 case OP_EMPTY_CHECK_END_MEMST_PUSH:
537 #endif
538 mem = p->empty_check_end.mem;
539 fprintf(f, ":%d", mem);
540 break;
541
542 case OP_PREC_READ_NOT_START:
543 addr = p->prec_read_not_start.addr;
544 fputc(':', f);
545 p_rel_addr(f, addr, p, start);
546 break;
547
548 case OP_LOOK_BEHIND:
549 len = p->look_behind.len;
550 fprintf(f, ":%d", len);
551 break;
552
553 case OP_LOOK_BEHIND_NOT_START:
554 addr = p->look_behind_not_start.addr;
555 len = p->look_behind_not_start.len;
556 fprintf(f, ":%d:", len);
557 p_rel_addr(f, addr, p, start);
558 break;
559
560 #ifdef USE_CALL
561 case OP_CALL:
562 addr = p->call.addr;
563 fprintf(f, ":{/%d}", addr);
564 break;
565 #endif
566
567 case OP_PUSH_SAVE_VAL:
568 {
569 SaveType type;
570
571 type = p->push_save_val.type;
572 mem = p->push_save_val.id;
573 fprintf(f, ":%d:%d", type, mem);
574 }
575 break;
576
577 case OP_UPDATE_VAR:
578 {
579 UpdateVarType type;
580
581 type = p->update_var.type;
582 mem = p->update_var.id;
583 fprintf(f, ":%d:%d", type, mem);
584 }
585 break;
586
587 #ifdef USE_CALLOUT
588 case OP_CALLOUT_CONTENTS:
589 mem = p->callout_contents.num;
590 fprintf(f, ":%d", mem);
591 break;
592
593 case OP_CALLOUT_NAME:
594 {
595 int id;
596
597 id = p->callout_name.id;
598 mem = p->callout_name.num;
599 fprintf(f, ":%d:%d", id, mem);
600 }
601 break;
602 #endif
603
604 case OP_TEXT_SEGMENT_BOUNDARY:
605 if (p->text_segment_boundary.not != 0)
606 fprintf(f, ":not");
607 break;
608
609 case OP_FINISH:
610 case OP_END:
611 case OP_ANYCHAR:
612 case OP_ANYCHAR_ML:
613 case OP_ANYCHAR_STAR:
614 case OP_ANYCHAR_ML_STAR:
615 case OP_WORD:
616 case OP_WORD_ASCII:
617 case OP_NO_WORD:
618 case OP_NO_WORD_ASCII:
619 case OP_BEGIN_BUF:
620 case OP_END_BUF:
621 case OP_BEGIN_LINE:
622 case OP_END_LINE:
623 case OP_SEMI_END_BUF:
624 case OP_BEGIN_POSITION:
625 case OP_BACKREF1:
626 case OP_BACKREF2:
627 case OP_FAIL:
628 case OP_POP_OUT:
629 case OP_PREC_READ_START:
630 case OP_PREC_READ_END:
631 case OP_PREC_READ_NOT_END:
632 case OP_ATOMIC_START:
633 case OP_ATOMIC_END:
634 case OP_LOOK_BEHIND_NOT_END:
635 #ifdef USE_CALL
636 case OP_RETURN:
637 #endif
638 break;
639
640 default:
641 fprintf(stderr, "print_compiled_byte_code: undefined code %d\n", opcode);
642 break;
643 }
644 }
645 #endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
646
647 #ifdef ONIG_DEBUG_COMPILE
648 extern void
onig_print_compiled_byte_code_list(FILE * f,regex_t * reg)649 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
650 {
651 Operation* bp;
652 Operation* start = reg->ops;
653 Operation* end = reg->ops + reg->ops_used;
654
655 fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
656 reg->push_mem_start, reg->push_mem_end);
657 fprintf(f, "code-length: %d\n", reg->ops_used);
658
659 bp = start;
660 while (bp < end) {
661 int pos = bp - start;
662
663 fprintf(f, "%4d: ", pos);
664 print_compiled_byte_code(f, reg, pos, start, reg->enc);
665 fprintf(f, "\n");
666 bp++;
667 }
668 fprintf(f, "\n");
669 }
670 #endif
671
672
673 #ifdef USE_CAPTURE_HISTORY
674 static void history_tree_free(OnigCaptureTreeNode* node);
675
676 static void
history_tree_clear(OnigCaptureTreeNode * node)677 history_tree_clear(OnigCaptureTreeNode* node)
678 {
679 int i;
680
681 if (IS_NULL(node)) return ;
682
683 for (i = 0; i < node->num_childs; i++) {
684 if (IS_NOT_NULL(node->childs[i])) {
685 history_tree_free(node->childs[i]);
686 }
687 }
688 for (i = 0; i < node->allocated; i++) {
689 node->childs[i] = (OnigCaptureTreeNode* )0;
690 }
691 node->num_childs = 0;
692 node->beg = ONIG_REGION_NOTPOS;
693 node->end = ONIG_REGION_NOTPOS;
694 node->group = -1;
695 }
696
697 static void
history_tree_free(OnigCaptureTreeNode * node)698 history_tree_free(OnigCaptureTreeNode* node)
699 {
700 history_tree_clear(node);
701 if (IS_NOT_NULL(node->childs)) xfree(node->childs);
702
703 xfree(node);
704 }
705
706 static void
history_root_free(OnigRegion * r)707 history_root_free(OnigRegion* r)
708 {
709 if (IS_NULL(r->history_root)) return ;
710
711 history_tree_free(r->history_root);
712 r->history_root = (OnigCaptureTreeNode* )0;
713 }
714
715 static OnigCaptureTreeNode*
history_node_new(void)716 history_node_new(void)
717 {
718 OnigCaptureTreeNode* node;
719
720 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
721 CHECK_NULL_RETURN(node);
722
723 node->childs = (OnigCaptureTreeNode** )0;
724 node->allocated = 0;
725 node->num_childs = 0;
726 node->group = -1;
727 node->beg = ONIG_REGION_NOTPOS;
728 node->end = ONIG_REGION_NOTPOS;
729
730 return node;
731 }
732
733 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)734 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
735 {
736 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
737
738 if (parent->num_childs >= parent->allocated) {
739 int n, i;
740
741 if (IS_NULL(parent->childs)) {
742 n = HISTORY_TREE_INIT_ALLOC_SIZE;
743 parent->childs =
744 (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
745 }
746 else {
747 n = parent->allocated * 2;
748 parent->childs =
749 (OnigCaptureTreeNode** )xrealloc(parent->childs,
750 sizeof(parent->childs[0]) * n);
751 }
752 CHECK_NULL_RETURN_MEMERR(parent->childs);
753 for (i = parent->allocated; i < n; i++) {
754 parent->childs[i] = (OnigCaptureTreeNode* )0;
755 }
756 parent->allocated = n;
757 }
758
759 parent->childs[parent->num_childs] = child;
760 parent->num_childs++;
761 return 0;
762 }
763
764 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)765 history_tree_clone(OnigCaptureTreeNode* node)
766 {
767 int i;
768 OnigCaptureTreeNode *clone, *child;
769
770 clone = history_node_new();
771 CHECK_NULL_RETURN(clone);
772
773 clone->beg = node->beg;
774 clone->end = node->end;
775 for (i = 0; i < node->num_childs; i++) {
776 child = history_tree_clone(node->childs[i]);
777 if (IS_NULL(child)) {
778 history_tree_free(clone);
779 return (OnigCaptureTreeNode* )0;
780 }
781 history_tree_add_child(clone, child);
782 }
783
784 return clone;
785 }
786
787 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)788 onig_get_capture_tree(OnigRegion* region)
789 {
790 return region->history_root;
791 }
792 #endif /* USE_CAPTURE_HISTORY */
793
794 extern void
onig_region_clear(OnigRegion * region)795 onig_region_clear(OnigRegion* region)
796 {
797 int i;
798
799 for (i = 0; i < region->num_regs; i++) {
800 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
801 }
802 #ifdef USE_CAPTURE_HISTORY
803 history_root_free(region);
804 #endif
805 }
806
807 extern int
onig_region_resize(OnigRegion * region,int n)808 onig_region_resize(OnigRegion* region, int n)
809 {
810 region->num_regs = n;
811
812 if (n < ONIG_NREGION)
813 n = ONIG_NREGION;
814
815 if (region->allocated == 0) {
816 region->beg = (int* )xmalloc(n * sizeof(int));
817 region->end = (int* )xmalloc(n * sizeof(int));
818
819 if (region->beg == 0 || region->end == 0)
820 return ONIGERR_MEMORY;
821
822 region->allocated = n;
823 }
824 else if (region->allocated < n) {
825 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
826 region->end = (int* )xrealloc(region->end, n * sizeof(int));
827
828 if (region->beg == 0 || region->end == 0)
829 return ONIGERR_MEMORY;
830
831 region->allocated = n;
832 }
833
834 return 0;
835 }
836
837 static int
onig_region_resize_clear(OnigRegion * region,int n)838 onig_region_resize_clear(OnigRegion* region, int n)
839 {
840 int r;
841
842 r = onig_region_resize(region, n);
843 if (r != 0) return r;
844 onig_region_clear(region);
845 return 0;
846 }
847
848 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)849 onig_region_set(OnigRegion* region, int at, int beg, int end)
850 {
851 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
852
853 if (at >= region->allocated) {
854 int r = onig_region_resize(region, at + 1);
855 if (r < 0) return r;
856 }
857
858 region->beg[at] = beg;
859 region->end[at] = end;
860 return 0;
861 }
862
863 extern void
onig_region_init(OnigRegion * region)864 onig_region_init(OnigRegion* region)
865 {
866 region->num_regs = 0;
867 region->allocated = 0;
868 region->beg = (int* )0;
869 region->end = (int* )0;
870 region->history_root = (OnigCaptureTreeNode* )0;
871 }
872
873 extern OnigRegion*
onig_region_new(void)874 onig_region_new(void)
875 {
876 OnigRegion* r;
877
878 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
879 CHECK_NULL_RETURN(r);
880 onig_region_init(r);
881 return r;
882 }
883
884 extern void
onig_region_free(OnigRegion * r,int free_self)885 onig_region_free(OnigRegion* r, int free_self)
886 {
887 if (r != 0) {
888 if (r->allocated > 0) {
889 if (r->beg) xfree(r->beg);
890 if (r->end) xfree(r->end);
891 r->allocated = 0;
892 }
893 #ifdef USE_CAPTURE_HISTORY
894 history_root_free(r);
895 #endif
896 if (free_self) xfree(r);
897 }
898 }
899
900 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)901 onig_region_copy(OnigRegion* to, OnigRegion* from)
902 {
903 #define RREGC_SIZE (sizeof(int) * from->num_regs)
904 int i;
905
906 if (to == from) return;
907
908 if (to->allocated == 0) {
909 if (from->num_regs > 0) {
910 to->beg = (int* )xmalloc(RREGC_SIZE);
911 if (IS_NULL(to->beg)) return;
912 to->end = (int* )xmalloc(RREGC_SIZE);
913 if (IS_NULL(to->end)) return;
914 to->allocated = from->num_regs;
915 }
916 }
917 else if (to->allocated < from->num_regs) {
918 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
919 if (IS_NULL(to->beg)) return;
920 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
921 if (IS_NULL(to->end)) return;
922 to->allocated = from->num_regs;
923 }
924
925 for (i = 0; i < from->num_regs; i++) {
926 to->beg[i] = from->beg[i];
927 to->end[i] = from->end[i];
928 }
929 to->num_regs = from->num_regs;
930
931 #ifdef USE_CAPTURE_HISTORY
932 history_root_free(to);
933
934 if (IS_NOT_NULL(from->history_root)) {
935 to->history_root = history_tree_clone(from->history_root);
936 }
937 #endif
938 }
939
940 #ifdef USE_CALLOUT
941 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
942 args.in = (ain);\
943 args.name_id = (aname_id);\
944 args.num = anum;\
945 args.regex = reg;\
946 args.string = str;\
947 args.string_end = end;\
948 args.start = sstart;\
949 args.right_range = right_range;\
950 args.current = s;\
951 args.retry_in_match_counter = retry_in_match_counter;\
952 args.msa = msa;\
953 args.stk_base = stk_base;\
954 args.stk = stk;\
955 args.mem_start_stk = mem_start_stk;\
956 args.mem_end_stk = mem_end_stk;\
957 result = (func)(&args, user);\
958 } while (0)
959
960 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
961 int result;\
962 OnigCalloutArgs args;\
963 CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
964 switch (result) {\
965 case ONIG_CALLOUT_FAIL:\
966 case ONIG_CALLOUT_SUCCESS:\
967 break;\
968 default:\
969 if (result > 0) {\
970 result = ONIGERR_INVALID_ARGUMENT;\
971 }\
972 best_len = result;\
973 goto match_at_end;\
974 break;\
975 }\
976 } while(0)
977 #endif
978
979
980 /** stack **/
981 #define INVALID_STACK_INDEX -1
982
983 #define STK_ALT_FLAG 0x0001
984
985 /* stack type */
986 /* used by normal-POP */
987 #define STK_SUPER_ALT STK_ALT_FLAG
988 #define STK_ALT (0x0002 | STK_ALT_FLAG)
989 #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
990 #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
991
992 /* handled by normal-POP */
993 #define STK_MEM_START 0x0010
994 #define STK_MEM_END 0x8030
995 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
996 #define STK_REPEAT_INC (0x0040 | STK_MASK_POP_HANDLED)
997 #else
998 #define STK_REPEAT_INC 0x0040
999 #endif
1000 #ifdef USE_CALLOUT
1001 #define STK_CALLOUT 0x0070
1002 #endif
1003
1004 /* avoided by normal-POP */
1005 #define STK_VOID 0x0000 /* for fill a blank */
1006 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1007 #define STK_EMPTY_CHECK_START (0x3000 | STK_MASK_POP_HANDLED)
1008 #else
1009 #define STK_EMPTY_CHECK_START 0x3000
1010 #endif
1011 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
1012 #define STK_MEM_END_MARK 0x8100
1013 #define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
1014 /* #define STK_REPEAT 0x0300 */
1015 #define STK_CALL_FRAME 0x0400
1016 #define STK_RETURN 0x0500
1017 #define STK_SAVE_VAL 0x0600
1018 #define STK_PREC_READ_START 0x0700
1019 #define STK_PREC_READ_END 0x0800
1020
1021 /* stack type check mask */
1022 #define STK_MASK_POP_USED STK_ALT_FLAG
1023 #define STK_MASK_POP_HANDLED 0x0010
1024 #define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
1025 #define STK_MASK_TO_VOID_TARGET 0x100e
1026 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1027
1028 typedef intptr_t StackIndex;
1029
1030 typedef struct _StackType {
1031 unsigned int type;
1032 int zid;
1033 union {
1034 struct {
1035 Operation* pcode; /* byte code position */
1036 UChar* pstr; /* string position */
1037 UChar* pstr_prev; /* previous char position of pstr */
1038 } state;
1039 struct {
1040 int count;
1041 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1042 StackIndex prev_index; /* index of stack */
1043 #endif
1044 } repeat_inc;
1045 struct {
1046 UChar *pstr; /* start/end position */
1047 /* Following information is set, if this stack type is MEM-START */
1048 StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */
1049 StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
1050 } mem;
1051 struct {
1052 UChar *pstr; /* start position */
1053 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1054 StackIndex prev_index; /* index of stack */
1055 #endif
1056 } empty_check;
1057 #ifdef USE_CALL
1058 struct {
1059 Operation *ret_addr; /* byte code position */
1060 UChar *pstr; /* string position */
1061 } call_frame;
1062 #endif
1063 struct {
1064 enum SaveType type;
1065 UChar* v;
1066 UChar* v2;
1067 } val;
1068 #ifdef USE_CALLOUT
1069 struct {
1070 int num;
1071 OnigCalloutFunc func;
1072 } callout;
1073 #endif
1074 } u;
1075 } StackType;
1076
1077 #ifdef USE_CALLOUT
1078
1079 struct OnigCalloutArgsStruct {
1080 OnigCalloutIn in;
1081 int name_id; /* name id or ONIG_NON_NAME_ID */
1082 int num;
1083 OnigRegex regex;
1084 const OnigUChar* string;
1085 const OnigUChar* string_end;
1086 const OnigUChar* start;
1087 const OnigUChar* right_range;
1088 const OnigUChar* current; /* current matching position */
1089 unsigned long retry_in_match_counter;
1090
1091 /* invisible to users */
1092 MatchArg* msa;
1093 StackType* stk_base;
1094 StackType* stk;
1095 StackIndex* mem_start_stk;
1096 StackIndex* mem_end_stk;
1097 };
1098
1099 #endif
1100
1101 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1102
1103 #define PTR_NUM_SIZE(reg) ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
1104 #define UPDATE_FOR_STACK_REALLOC do{\
1105 repeat_stk = (StackIndex* )alloc_base;\
1106 empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1107 mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
1108 mem_end_stk = mem_start_stk + num_mem + 1;\
1109 } while(0)
1110
1111 #define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
1112 #define LOAD_TO_REPEAT_STK_VAR(sid) repeat_stk[sid] = GET_STACK_INDEX(stk)
1113 #define POP_REPEAT_INC else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
1114
1115 #define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
1116 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) empty_check_stk[sid] = GET_STACK_INDEX(stk)
1117 #define POP_EMPTY_CHECK_START else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
1118
1119 #else
1120
1121 #define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
1122 #define UPDATE_FOR_STACK_REALLOC do{\
1123 mem_start_stk = (StackIndex* )alloc_base;\
1124 mem_end_stk = mem_start_stk + num_mem + 1;\
1125 } while(0)
1126
1127 #define SAVE_REPEAT_STK_VAR(sid)
1128 #define LOAD_TO_REPEAT_STK_VAR(sid)
1129 #define POP_REPEAT_INC
1130
1131 #define SAVE_EMPTY_CHECK_STK_VAR(sid)
1132 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
1133 #define POP_EMPTY_CHECK_START
1134
1135 #endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
1136
1137 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1138 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1139 (msa).stack_p = (void* )0;\
1140 (msa).options = (arg_option);\
1141 (msa).region = (arg_region);\
1142 (msa).start = (arg_start);\
1143 (msa).match_stack_limit = (mpv)->match_stack_limit;\
1144 (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
1145 (msa).mp = mpv;\
1146 (msa).best_len = ONIG_MISMATCH;\
1147 (msa).ptr_num = PTR_NUM_SIZE(reg);\
1148 } while(0)
1149 #else
1150 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1151 (msa).stack_p = (void* )0;\
1152 (msa).options = (arg_option);\
1153 (msa).region = (arg_region);\
1154 (msa).start = (arg_start);\
1155 (msa).match_stack_limit = (mpv)->match_stack_limit;\
1156 (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
1157 (msa).mp = mpv;\
1158 (msa).ptr_num = PTR_NUM_SIZE(reg);\
1159 } while(0)
1160 #endif
1161
1162 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
1163
1164
1165 #define ALLOCA_PTR_NUM_LIMIT 50
1166
1167 #define STACK_INIT(stack_num) do {\
1168 if (msa->stack_p) {\
1169 is_alloca = 0;\
1170 alloc_base = msa->stack_p;\
1171 stk_base = (StackType* )(alloc_base\
1172 + (sizeof(StackIndex) * msa->ptr_num));\
1173 stk = stk_base;\
1174 stk_end = stk_base + msa->stack_n;\
1175 }\
1176 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1177 is_alloca = 0;\
1178 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1179 + sizeof(StackType) * (stack_num));\
1180 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1181 stk_base = (StackType* )(alloc_base\
1182 + (sizeof(StackIndex) * msa->ptr_num));\
1183 stk = stk_base;\
1184 stk_end = stk_base + (stack_num);\
1185 }\
1186 else {\
1187 is_alloca = 1;\
1188 alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\
1189 + sizeof(StackType) * (stack_num));\
1190 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1191 stk_base = (StackType* )(alloc_base\
1192 + (sizeof(StackIndex) * msa->ptr_num));\
1193 stk = stk_base;\
1194 stk_end = stk_base + (stack_num);\
1195 }\
1196 } while(0);
1197
1198
1199 #define STACK_SAVE do{\
1200 msa->stack_n = (int )(stk_end - stk_base);\
1201 if (is_alloca != 0) {\
1202 size_t size = sizeof(StackIndex) * msa->ptr_num \
1203 + sizeof(StackType) * msa->stack_n;\
1204 msa->stack_p = xmalloc(size);\
1205 CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
1206 xmemcpy(msa->stack_p, alloc_base, size);\
1207 }\
1208 else {\
1209 msa->stack_p = alloc_base;\
1210 };\
1211 } while(0)
1212
1213 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1214
1215 extern unsigned int
onig_get_match_stack_limit_size(void)1216 onig_get_match_stack_limit_size(void)
1217 {
1218 return MatchStackLimit;
1219 }
1220
1221 extern int
onig_set_match_stack_limit_size(unsigned int size)1222 onig_set_match_stack_limit_size(unsigned int size)
1223 {
1224 MatchStackLimit = size;
1225 return 0;
1226 }
1227
1228 #ifdef USE_RETRY_LIMIT_IN_MATCH
1229
1230 static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
1231
1232 #define CHECK_RETRY_LIMIT_IN_MATCH do {\
1233 if (retry_in_match_counter++ > retry_limit_in_match) {\
1234 MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\
1235 }\
1236 } while (0)
1237
1238 #else
1239
1240 #define CHECK_RETRY_LIMIT_IN_MATCH
1241
1242 #endif /* USE_RETRY_LIMIT_IN_MATCH */
1243
1244 extern unsigned long
onig_get_retry_limit_in_match(void)1245 onig_get_retry_limit_in_match(void)
1246 {
1247 #ifdef USE_RETRY_LIMIT_IN_MATCH
1248 return RetryLimitInMatch;
1249 #else
1250 /* return ONIG_NO_SUPPORT_CONFIG; */
1251 return 0;
1252 #endif
1253 }
1254
1255 extern int
onig_set_retry_limit_in_match(unsigned long size)1256 onig_set_retry_limit_in_match(unsigned long size)
1257 {
1258 #ifdef USE_RETRY_LIMIT_IN_MATCH
1259 RetryLimitInMatch = size;
1260 return 0;
1261 #else
1262 return ONIG_NO_SUPPORT_CONFIG;
1263 #endif
1264 }
1265
1266 #ifdef USE_CALLOUT
1267 static OnigCalloutFunc DefaultProgressCallout;
1268 static OnigCalloutFunc DefaultRetractionCallout;
1269 #endif
1270
1271 extern OnigMatchParam*
onig_new_match_param(void)1272 onig_new_match_param(void)
1273 {
1274 OnigMatchParam* p;
1275
1276 p = (OnigMatchParam* )xmalloc(sizeof(*p));
1277 if (IS_NOT_NULL(p)) {
1278 onig_initialize_match_param(p);
1279 }
1280
1281 return p;
1282 }
1283
1284 extern void
onig_free_match_param_content(OnigMatchParam * p)1285 onig_free_match_param_content(OnigMatchParam* p)
1286 {
1287 #ifdef USE_CALLOUT
1288 if (IS_NOT_NULL(p->callout_data)) {
1289 xfree(p->callout_data);
1290 p->callout_data = 0;
1291 }
1292 #endif
1293 }
1294
1295 extern void
onig_free_match_param(OnigMatchParam * p)1296 onig_free_match_param(OnigMatchParam* p)
1297 {
1298 if (IS_NOT_NULL(p)) {
1299 onig_free_match_param_content(p);
1300 xfree(p);
1301 }
1302 }
1303
1304 extern int
onig_initialize_match_param(OnigMatchParam * mp)1305 onig_initialize_match_param(OnigMatchParam* mp)
1306 {
1307 mp->match_stack_limit = MatchStackLimit;
1308 #ifdef USE_RETRY_LIMIT_IN_MATCH
1309 mp->retry_limit_in_match = RetryLimitInMatch;
1310 #endif
1311
1312 #ifdef USE_CALLOUT
1313 mp->progress_callout_of_contents = DefaultProgressCallout;
1314 mp->retraction_callout_of_contents = DefaultRetractionCallout;
1315 mp->match_at_call_counter = 0;
1316 mp->callout_user_data = 0;
1317 mp->callout_data = 0;
1318 mp->callout_data_alloc_num = 0;
1319 #endif
1320
1321 return ONIG_NORMAL;
1322 }
1323
1324 #ifdef USE_CALLOUT
1325
1326 static int
adjust_match_param(regex_t * reg,OnigMatchParam * mp)1327 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1328 {
1329 RegexExt* ext = reg->extp;
1330
1331 mp->match_at_call_counter = 0;
1332
1333 if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1334
1335 if (ext->callout_num > mp->callout_data_alloc_num) {
1336 CalloutData* d;
1337 size_t n = ext->callout_num * sizeof(*d);
1338 if (IS_NOT_NULL(mp->callout_data))
1339 d = (CalloutData* )xrealloc(mp->callout_data, n);
1340 else
1341 d = (CalloutData* )xmalloc(n);
1342 CHECK_NULL_RETURN_MEMERR(d);
1343
1344 mp->callout_data = d;
1345 mp->callout_data_alloc_num = ext->callout_num;
1346 }
1347
1348 xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1349 return ONIG_NORMAL;
1350 }
1351
1352 #define ADJUST_MATCH_PARAM(reg, mp) \
1353 r = adjust_match_param(reg, mp);\
1354 if (r != ONIG_NORMAL) return r;
1355
1356 #define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
1357
1358 extern int
onig_check_callout_data_and_clear_old_values(OnigCalloutArgs * args)1359 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1360 {
1361 OnigMatchParam* mp;
1362 int num;
1363 CalloutData* d;
1364
1365 mp = args->msa->mp;
1366 num = args->num;
1367
1368 d = CALLOUT_DATA_AT_NUM(mp, num);
1369 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1370 xmemset(d, 0, sizeof(*d));
1371 d->last_match_at_call_counter = mp->match_at_call_counter;
1372 return d->last_match_at_call_counter;
1373 }
1374
1375 return 0;
1376 }
1377
1378 extern int
onig_get_callout_data_dont_clear_old(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1379 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1380 int callout_num, int slot,
1381 OnigType* type, OnigValue* val)
1382 {
1383 OnigType t;
1384 CalloutData* d;
1385
1386 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1387
1388 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1389 t = d->slot[slot].type;
1390 if (IS_NOT_NULL(type)) *type = t;
1391 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1392 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1393 }
1394
1395 extern int
onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1396 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
1397 int slot, OnigType* type,
1398 OnigValue* val)
1399 {
1400 return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1401 args->num, slot, type, val);
1402 }
1403
1404 extern int
onig_get_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1405 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1406 int callout_num, int slot,
1407 OnigType* type, OnigValue* val)
1408 {
1409 OnigType t;
1410 CalloutData* d;
1411
1412 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1413
1414 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1415 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1416 xmemset(d, 0, sizeof(*d));
1417 d->last_match_at_call_counter = mp->match_at_call_counter;
1418 }
1419
1420 t = d->slot[slot].type;
1421 if (IS_NOT_NULL(type)) *type = t;
1422 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1423 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1424 }
1425
1426 extern int
onig_get_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType * type,OnigValue * val)1427 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1428 const UChar* tag, const UChar* tag_end, int slot,
1429 OnigType* type, OnigValue* val)
1430 {
1431 int num;
1432
1433 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1434 if (num < 0) return num;
1435 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1436
1437 return onig_get_callout_data(reg, mp, num, slot, type, val);
1438 }
1439
1440 extern int
onig_get_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType * type,OnigValue * val)1441 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1442 int callout_num, int slot,
1443 OnigType* type, OnigValue* val)
1444 {
1445 return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1446 type, val);
1447 }
1448
1449 extern int
onig_get_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1450 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1451 int slot, OnigType* type, OnigValue* val)
1452 {
1453 return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1454 type, val);
1455 }
1456
1457 extern int
onig_set_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType type,OnigValue * val)1458 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1459 int callout_num, int slot,
1460 OnigType type, OnigValue* val)
1461 {
1462 CalloutData* d;
1463
1464 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1465
1466 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1467 d->slot[slot].type = type;
1468 d->slot[slot].val = *val;
1469 d->last_match_at_call_counter = mp->match_at_call_counter;
1470
1471 return ONIG_NORMAL;
1472 }
1473
1474 extern int
onig_set_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType type,OnigValue * val)1475 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1476 const UChar* tag, const UChar* tag_end, int slot,
1477 OnigType type, OnigValue* val)
1478 {
1479 int num;
1480
1481 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1482 if (num < 0) return num;
1483 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1484
1485 return onig_set_callout_data(reg, mp, num, slot, type, val);
1486 }
1487
1488 extern int
onig_set_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType type,OnigValue * val)1489 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1490 int callout_num, int slot,
1491 OnigType type, OnigValue* val)
1492 {
1493 return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1494 type, val);
1495 }
1496
1497 extern int
onig_set_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType type,OnigValue * val)1498 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1499 int slot, OnigType type, OnigValue* val)
1500 {
1501 return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1502 type, val);
1503 }
1504
1505 #else
1506 #define ADJUST_MATCH_PARAM(reg, mp)
1507 #endif /* USE_CALLOUT */
1508
1509
1510 static int
stack_double(int is_alloca,char ** arg_alloc_base,StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,MatchArg * msa)1511 stack_double(int is_alloca, char** arg_alloc_base,
1512 StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk,
1513 MatchArg* msa)
1514 {
1515 unsigned int n;
1516 int used;
1517 size_t size;
1518 size_t new_size;
1519 char* alloc_base;
1520 char* new_alloc_base;
1521 StackType *stk_base, *stk_end, *stk;
1522
1523 alloc_base = *arg_alloc_base;
1524 stk_base = *arg_stk_base;
1525 stk_end = *arg_stk_end;
1526 stk = *arg_stk;
1527
1528 n = (unsigned int )(stk_end - stk_base);
1529 size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1530 n *= 2;
1531 new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1532 if (is_alloca != 0) {
1533 new_alloc_base = (char* )xmalloc(new_size);
1534 if (IS_NULL(new_alloc_base)) {
1535 STACK_SAVE;
1536 return ONIGERR_MEMORY;
1537 }
1538 xmemcpy(new_alloc_base, alloc_base, size);
1539 }
1540 else {
1541 if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1542 if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)
1543 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1544 else
1545 n = msa->match_stack_limit;
1546 }
1547 new_alloc_base = (char* )xrealloc(alloc_base, new_size);
1548 if (IS_NULL(new_alloc_base)) {
1549 STACK_SAVE;
1550 return ONIGERR_MEMORY;
1551 }
1552 }
1553
1554 alloc_base = new_alloc_base;
1555 used = (int )(stk - stk_base);
1556 *arg_alloc_base = alloc_base;
1557 *arg_stk_base = (StackType* )(alloc_base
1558 + (sizeof(StackIndex) * msa->ptr_num));
1559 *arg_stk = *arg_stk_base + used;
1560 *arg_stk_end = *arg_stk_base + n;
1561 return 0;
1562 }
1563
1564 #define STACK_ENSURE(n) do {\
1565 if ((int )(stk_end - stk) < (n)) {\
1566 int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1567 if (r != 0) { STACK_SAVE; return r; } \
1568 is_alloca = 0;\
1569 UPDATE_FOR_STACK_REALLOC;\
1570 }\
1571 } while(0)
1572
1573 #define STACK_AT(index) (stk_base + (index))
1574 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1575
1576 #define STACK_PUSH_TYPE(stack_type) do {\
1577 STACK_ENSURE(1);\
1578 stk->type = (stack_type);\
1579 STACK_INC;\
1580 } while(0)
1581
1582 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1583
1584 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
1585 STACK_ENSURE(1);\
1586 stk->type = (stack_type);\
1587 stk->u.state.pcode = (pat);\
1588 stk->u.state.pstr = (s);\
1589 stk->u.state.pstr_prev = (sprev);\
1590 STACK_INC;\
1591 } while(0)
1592
1593 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1594 stk->type = (stack_type);\
1595 stk->u.state.pcode = (pat);\
1596 STACK_INC;\
1597 } while(0)
1598
1599 #ifdef ONIG_DEBUG_MATCH
1600 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1601 stk->type = (stack_type);\
1602 stk->u.state.pcode = (pat);\
1603 stk->u.state.pstr = s;\
1604 stk->u.state.pstr_prev = sprev;\
1605 STACK_INC;\
1606 } while (0)
1607 #else
1608 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1609 stk->type = (stack_type);\
1610 stk->u.state.pcode = (pat);\
1611 STACK_INC;\
1612 } while (0)
1613 #endif
1614
1615 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
1616 #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
1617 #define STACK_PUSH_PREC_READ_START(s,sprev) \
1618 STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
1619 #define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
1620 STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
1621 #define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
1622 #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
1623 STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
1624
1625 #if 0
1626 #define STACK_PUSH_REPEAT(sid, pat) do {\
1627 STACK_ENSURE(1);\
1628 stk->type = STK_REPEAT;\
1629 stk->zid = (sid);\
1630 stk->u.repeat.pcode = (pat);\
1631 STACK_INC;\
1632 } while(0)
1633 #endif
1634
1635 #define STACK_PUSH_REPEAT_INC(sid, ct) do {\
1636 STACK_ENSURE(1);\
1637 stk->type = STK_REPEAT_INC;\
1638 stk->zid = (sid);\
1639 stk->u.repeat_inc.count = (ct);\
1640 SAVE_REPEAT_STK_VAR(sid);\
1641 LOAD_TO_REPEAT_STK_VAR(sid);\
1642 STACK_INC;\
1643 } while(0)
1644
1645 #define STACK_PUSH_MEM_START(mnum, s) do {\
1646 STACK_ENSURE(1);\
1647 stk->type = STK_MEM_START;\
1648 stk->zid = (mnum);\
1649 stk->u.mem.pstr = (s);\
1650 stk->u.mem.prev_start = mem_start_stk[mnum];\
1651 stk->u.mem.prev_end = mem_end_stk[mnum];\
1652 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1653 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1654 STACK_INC;\
1655 } while(0)
1656
1657 #define STACK_PUSH_MEM_END(mnum, s) do {\
1658 STACK_ENSURE(1);\
1659 stk->type = STK_MEM_END;\
1660 stk->zid = (mnum);\
1661 stk->u.mem.pstr = (s);\
1662 stk->u.mem.prev_start = mem_start_stk[mnum];\
1663 stk->u.mem.prev_end = mem_end_stk[mnum];\
1664 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1665 STACK_INC;\
1666 } while(0)
1667
1668 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1669 STACK_ENSURE(1);\
1670 stk->type = STK_MEM_END_MARK;\
1671 stk->zid = (mnum);\
1672 STACK_INC;\
1673 } while(0)
1674
1675 #define STACK_GET_MEM_START(mnum, k) do {\
1676 int level = 0;\
1677 k = stk;\
1678 while (k > stk_base) {\
1679 k--;\
1680 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1681 && k->zid == (mnum)) {\
1682 level++;\
1683 }\
1684 else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1685 if (level == 0) break;\
1686 level--;\
1687 }\
1688 }\
1689 } while(0)
1690
1691 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1692 int level = 0;\
1693 while (k < stk) {\
1694 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1695 if (level == 0) (start) = k->u.mem.pstr;\
1696 level++;\
1697 }\
1698 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1699 level--;\
1700 if (level == 0) {\
1701 (end) = k->u.mem.pstr;\
1702 break;\
1703 }\
1704 }\
1705 k++;\
1706 }\
1707 } while(0)
1708
1709 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1710 STACK_ENSURE(1);\
1711 stk->type = STK_EMPTY_CHECK_START;\
1712 stk->zid = (cnum);\
1713 stk->u.empty_check.pstr = (s);\
1714 SAVE_EMPTY_CHECK_STK_VAR(cnum);\
1715 LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
1716 STACK_INC;\
1717 } while(0)
1718
1719 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1720 STACK_ENSURE(1);\
1721 stk->type = STK_EMPTY_CHECK_END;\
1722 stk->zid = (cnum);\
1723 STACK_INC;\
1724 } while(0)
1725
1726 #define STACK_PUSH_CALL_FRAME(pat) do {\
1727 STACK_ENSURE(1);\
1728 stk->type = STK_CALL_FRAME;\
1729 stk->u.call_frame.ret_addr = (pat);\
1730 STACK_INC;\
1731 } while(0)
1732
1733 #define STACK_PUSH_RETURN do {\
1734 STACK_ENSURE(1);\
1735 stk->type = STK_RETURN;\
1736 STACK_INC;\
1737 } while(0)
1738
1739 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1740 STACK_ENSURE(1);\
1741 stk->type = STK_SAVE_VAL;\
1742 stk->zid = (sid);\
1743 stk->u.val.type = (stype);\
1744 stk->u.val.v = (UChar* )(sval);\
1745 STACK_INC;\
1746 } while(0)
1747
1748 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1749 STACK_ENSURE(1);\
1750 stk->type = STK_SAVE_VAL;\
1751 stk->zid = (sid);\
1752 stk->u.val.type = (stype);\
1753 stk->u.val.v = (UChar* )(sval);\
1754 stk->u.val.v2 = sprev;\
1755 STACK_INC;\
1756 } while(0)
1757
1758 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1759 StackType *k = stk;\
1760 while (k > stk_base) {\
1761 k--;\
1762 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1763 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1764 (sval) = k->u.val.v;\
1765 break;\
1766 }\
1767 }\
1768 } while (0)
1769
1770 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
1771 int level = 0;\
1772 StackType *k = stk;\
1773 while (k > stk_base) {\
1774 k--;\
1775 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1776 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1777 && k->zid == (sid)) {\
1778 if (level == 0) {\
1779 (sval) = k->u.val.v;\
1780 break;\
1781 }\
1782 }\
1783 else if (k->type == STK_CALL_FRAME)\
1784 level--;\
1785 else if (k->type == STK_RETURN)\
1786 level++;\
1787 }\
1788 } while (0)
1789
1790 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1791 int level = 0;\
1792 StackType *k = stk;\
1793 while (k > stk_base) {\
1794 k--;\
1795 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1796 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1797 && k->zid == (sid)) {\
1798 if (level == 0) {\
1799 (sval) = k->u.val.v;\
1800 sprev = k->u.val.v2;\
1801 break;\
1802 }\
1803 }\
1804 else if (k->type == STK_CALL_FRAME)\
1805 level--;\
1806 else if (k->type == STK_RETURN)\
1807 level++;\
1808 }\
1809 } while (0)
1810
1811 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
1812 int level = 0;\
1813 StackType *k = (stk_from);\
1814 while (k > stk_base) {\
1815 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
1816 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1817 && k->u.val.id == (sid)) {\
1818 if (level == 0) {\
1819 (sval) = k->u.val.v;\
1820 break;\
1821 }\
1822 }\
1823 else if (k->type == STK_CALL_FRAME)\
1824 level--;\
1825 else if (k->type == STK_RETURN)\
1826 level++;\
1827 k--;\
1828 }\
1829 } while (0)
1830
1831 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1832 STACK_ENSURE(1);\
1833 stk->type = STK_CALLOUT;\
1834 stk->zid = ONIG_NON_NAME_ID;\
1835 stk->u.callout.num = (anum);\
1836 stk->u.callout.func = (func);\
1837 STACK_INC;\
1838 } while(0)
1839
1840 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1841 STACK_ENSURE(1);\
1842 stk->type = STK_CALLOUT;\
1843 stk->zid = (aid);\
1844 stk->u.callout.num = (anum);\
1845 stk->u.callout.func = (func);\
1846 STACK_INC;\
1847 } while(0)
1848
1849 #ifdef ONIG_DEBUG
1850 #define STACK_BASE_CHECK(p, at) \
1851 if ((p) < stk_base) {\
1852 fprintf(stderr, "at %s\n", at);\
1853 MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
1854 }
1855 #else
1856 #define STACK_BASE_CHECK(p, at)
1857 #endif
1858
1859 #define STACK_POP_ONE do {\
1860 stk--;\
1861 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1862 } while(0)
1863
1864
1865 #ifdef USE_CALLOUT
1866 #define POP_CALLOUT_CASE \
1867 else if (stk->type == STK_CALLOUT) {\
1868 RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
1869 }
1870 #else
1871 #define POP_CALLOUT_CASE
1872 #endif
1873
1874 #define STACK_POP do {\
1875 switch (pop_level) {\
1876 case STACK_POP_LEVEL_FREE:\
1877 while (1) {\
1878 stk--;\
1879 STACK_BASE_CHECK(stk, "STACK_POP"); \
1880 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1881 }\
1882 break;\
1883 case STACK_POP_LEVEL_MEM_START:\
1884 while (1) {\
1885 stk--;\
1886 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1887 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1888 else if (stk->type == STK_MEM_START) {\
1889 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1890 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1891 }\
1892 }\
1893 break;\
1894 default:\
1895 while (1) {\
1896 stk--;\
1897 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1898 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1899 else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
1900 if (stk->type == STK_MEM_START) {\
1901 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1902 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1903 }\
1904 else if (stk->type == STK_MEM_END) {\
1905 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1906 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1907 }\
1908 POP_REPEAT_INC \
1909 POP_EMPTY_CHECK_START \
1910 POP_CALLOUT_CASE\
1911 }\
1912 }\
1913 break;\
1914 }\
1915 } while(0)
1916
1917 #define POP_TIL_BODY(aname, til_type) do {\
1918 while (1) {\
1919 stk--;\
1920 STACK_BASE_CHECK(stk, (aname));\
1921 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
1922 if (stk->type == (til_type)) break;\
1923 else {\
1924 if (stk->type == STK_MEM_START) {\
1925 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1926 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1927 }\
1928 else if (stk->type == STK_MEM_END) {\
1929 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1930 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1931 }\
1932 POP_REPEAT_INC \
1933 POP_EMPTY_CHECK_START \
1934 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
1935 }\
1936 }\
1937 }\
1938 } while(0)
1939
1940 #define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
1941 POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
1942 } while(0)
1943
1944 #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
1945 POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
1946 } while(0)
1947
1948
1949 #define STACK_EXEC_TO_VOID(k) do {\
1950 k = stk;\
1951 while (1) {\
1952 k--;\
1953 STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
1954 if (IS_TO_VOID_TARGET(k)) {\
1955 if (k->type == STK_TO_VOID_START) {\
1956 k->type = STK_VOID;\
1957 break;\
1958 }\
1959 k->type = STK_VOID;\
1960 }\
1961 }\
1962 } while(0)
1963
1964 #define STACK_GET_PREC_READ_START(k) do {\
1965 int level = 0;\
1966 k = stk;\
1967 while (1) {\
1968 k--;\
1969 STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
1970 if (IS_TO_VOID_TARGET(k)) {\
1971 k->type = STK_VOID;\
1972 }\
1973 else if (k->type == STK_PREC_READ_START) {\
1974 if (level == 0) {\
1975 break;\
1976 }\
1977 level--;\
1978 }\
1979 else if (k->type == STK_PREC_READ_END) {\
1980 level++;\
1981 }\
1982 }\
1983 } while(0)
1984
1985
1986 #define EMPTY_CHECK_START_SEARCH(sid, k) do {\
1987 k = stk;\
1988 while (1) {\
1989 k--;\
1990 STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
1991 if (k->type == STK_EMPTY_CHECK_START) {\
1992 if (k->zid == (sid)) break;\
1993 }\
1994 }\
1995 } while(0)
1996
1997 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1998
1999 #define GET_EMPTY_CHECK_START(sid, k) do {\
2000 if (reg->num_call == 0) {\
2001 k = STACK_AT(empty_check_stk[sid]);\
2002 }\
2003 else {\
2004 EMPTY_CHECK_START_SEARCH(sid, k);\
2005 }\
2006 } while(0)
2007 #else
2008
2009 #define GET_EMPTY_CHECK_START(sid, k) EMPTY_CHECK_START_SEARCH(sid, k)
2010
2011 #endif
2012
2013
2014 #define STACK_EMPTY_CHECK(isnull, sid, s) do {\
2015 StackType* k;\
2016 GET_EMPTY_CHECK_START(sid, k);\
2017 (isnull) = (k->u.empty_check.pstr == (s));\
2018 } while(0)
2019
2020 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
2021 if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
2022 (addr) = 0;\
2023 }\
2024 else {\
2025 if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
2026 (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
2027 else\
2028 (addr) = (UChar* )k->u.mem.prev_end;\
2029 }\
2030 } while (0)
2031
2032 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
2033 #define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
2034 StackType* k;\
2035 GET_EMPTY_CHECK_START(sid, k);\
2036 if (k->u.empty_check.pstr != (s)) {\
2037 (isnull) = 0;\
2038 }\
2039 else {\
2040 UChar* endp;\
2041 (isnull) = 1;\
2042 while (k < stk) {\
2043 if (k->type == STK_MEM_START &&\
2044 MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
2045 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2046 if (endp == 0) {\
2047 (isnull) = 0; break;\
2048 }\
2049 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
2050 (isnull) = 0; break;\
2051 }\
2052 else if (endp != s) {\
2053 (isnull) = -1; /* empty, but position changed */ \
2054 }\
2055 }\
2056 k++;\
2057 }\
2058 }\
2059 } while(0)
2060
2061 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
2062 int level = 0;\
2063 StackType* k = stk;\
2064 while (1) {\
2065 k--;\
2066 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
2067 if (k->type == STK_EMPTY_CHECK_START) {\
2068 if (k->zid == (sid)) {\
2069 if (level == 0) {\
2070 if (k->u.empty_check.pstr != (s)) {\
2071 (isnull) = 0;\
2072 break;\
2073 }\
2074 else {\
2075 UChar* endp;\
2076 (isnull) = 1;\
2077 while (k < stk) {\
2078 if (k->type == STK_MEM_START) {\
2079 if (level == 0 && \
2080 MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\
2081 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2082 if (endp == 0) {\
2083 (isnull) = 0; break;\
2084 }\
2085 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
2086 (isnull) = 0; break;\
2087 }\
2088 else if (endp != s) {\
2089 (isnull) = -1; /* empty, but position changed */\
2090 }\
2091 }\
2092 }\
2093 else if (k->type == STK_EMPTY_CHECK_START) {\
2094 if (k->zid == (sid)) level++;\
2095 }\
2096 else if (k->type == STK_EMPTY_CHECK_END) {\
2097 if (k->zid == (sid)) level--;\
2098 }\
2099 k++;\
2100 }\
2101 break;\
2102 }\
2103 }\
2104 else {\
2105 level--;\
2106 }\
2107 }\
2108 }\
2109 else if (k->type == STK_EMPTY_CHECK_END) {\
2110 if (k->zid == (sid)) level++;\
2111 }\
2112 }\
2113 } while(0)
2114 #else
2115 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2116 int level = 0;\
2117 StackType* k = stk;\
2118 while (1) {\
2119 k--;\
2120 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2121 if (k->type == STK_EMPTY_CHECK_START) {\
2122 if (k->u.empty_check.num == (id)) {\
2123 if (level == 0) {\
2124 (isnull) = (k->u.empty_check.pstr == (s));\
2125 break;\
2126 }\
2127 }\
2128 level--;\
2129 }\
2130 else if (k->type == STK_EMPTY_CHECK_END) {\
2131 level++;\
2132 }\
2133 }\
2134 } while(0)
2135 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2136
2137 #define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
2138 StackType* k = stk;\
2139 while (1) {\
2140 (k)--;\
2141 STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
2142 if ((k)->type == STK_REPEAT_INC) {\
2143 if ((k)->zid == (sid)) {\
2144 (c) = (k)->u.repeat_inc.count;\
2145 break;\
2146 }\
2147 }\
2148 else if ((k)->type == STK_RETURN) {\
2149 int level = -1;\
2150 while (1) {\
2151 (k)--;\
2152 if ((k)->type == STK_CALL_FRAME) {\
2153 level++;\
2154 if (level == 0) break;\
2155 }\
2156 else if ((k)->type == STK_RETURN) level--;\
2157 }\
2158 }\
2159 }\
2160 } while(0)
2161
2162 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2163
2164 #define STACK_GET_REPEAT_COUNT(sid, c) do {\
2165 if (reg->num_call == 0) {\
2166 (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
2167 }\
2168 else {\
2169 STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
2170 }\
2171 } while(0)
2172 #else
2173 #define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
2174 #endif
2175
2176 #define STACK_RETURN(addr) do {\
2177 int level = 0;\
2178 StackType* k = stk;\
2179 while (1) {\
2180 k--;\
2181 STACK_BASE_CHECK(k, "STACK_RETURN"); \
2182 if (k->type == STK_CALL_FRAME) {\
2183 if (level == 0) {\
2184 (addr) = k->u.call_frame.ret_addr;\
2185 break;\
2186 }\
2187 else level--;\
2188 }\
2189 else if (k->type == STK_RETURN)\
2190 level++;\
2191 }\
2192 } while(0)
2193
2194
2195 #define STRING_CMP(s1,s2,len) do {\
2196 while (len-- > 0) {\
2197 if (*s1++ != *s2++) goto fail;\
2198 }\
2199 } while(0)
2200
2201 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2202 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2203 goto fail; \
2204 } while(0)
2205
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)2206 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2207 UChar* s1, UChar** ps2, int mblen)
2208 {
2209 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2210 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2211 UChar *p1, *p2, *end1, *s2, *end2;
2212 int len1, len2;
2213
2214 s2 = *ps2;
2215 end1 = s1 + mblen;
2216 end2 = s2 + mblen;
2217 while (s1 < end1) {
2218 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2219 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2220 if (len1 != len2) return 0;
2221 p1 = buf1;
2222 p2 = buf2;
2223 while (len1-- > 0) {
2224 if (*p1 != *p2) return 0;
2225 p1++;
2226 p2++;
2227 }
2228 }
2229
2230 *ps2 = s2;
2231 return 1;
2232 }
2233
2234 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2235 is_fail = 0;\
2236 while (len-- > 0) {\
2237 if (*s1++ != *s2++) {\
2238 is_fail = 1; break;\
2239 }\
2240 }\
2241 } while(0)
2242
2243 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2244 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2245 is_fail = 1; \
2246 else \
2247 is_fail = 0; \
2248 } while(0)
2249
2250
2251 #define IS_EMPTY_STR (str == end)
2252 #define ON_STR_BEGIN(s) ((s) == str)
2253 #define ON_STR_END(s) ((s) == end)
2254 #define DATA_ENSURE_CHECK1 (s < right_range)
2255 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
2256 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
2257
2258 #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
2259
2260 #ifdef USE_CAPTURE_HISTORY
2261 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)2262 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2263 StackType* stk_top, UChar* str, regex_t* reg)
2264 {
2265 int n, r;
2266 OnigCaptureTreeNode* child;
2267 StackType* k = *kp;
2268
2269 while (k < stk_top) {
2270 if (k->type == STK_MEM_START) {
2271 n = k->zid;
2272 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2273 MEM_STATUS_AT(reg->capture_history, n) != 0) {
2274 child = history_node_new();
2275 CHECK_NULL_RETURN_MEMERR(child);
2276 child->group = n;
2277 child->beg = (int )(k->u.mem.pstr - str);
2278 r = history_tree_add_child(node, child);
2279 if (r != 0) return r;
2280 *kp = (k + 1);
2281 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2282 if (r != 0) return r;
2283
2284 k = *kp;
2285 child->end = (int )(k->u.mem.pstr - str);
2286 }
2287 }
2288 else if (k->type == STK_MEM_END) {
2289 if (k->zid == node->group) {
2290 node->end = (int )(k->u.mem.pstr - str);
2291 *kp = k;
2292 return 0;
2293 }
2294 }
2295 k++;
2296 }
2297
2298 return 1; /* 1: root node ending. */
2299 }
2300 #endif
2301
2302 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,MemNumType * memp)2303 static int mem_is_in_memp(int mem, int num, MemNumType* memp)
2304 {
2305 int i;
2306
2307 for (i = 0; i < num; i++) {
2308 if (mem == (int )memp[i]) return 1;
2309 }
2310 return 0;
2311 }
2312
2313 static int
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,MemNumType * memp,UChar ** s,const UChar * send)2314 backref_match_at_nested_level(regex_t* reg,
2315 StackType* top, StackType* stk_base,
2316 int ignore_case, int case_fold_flag,
2317 int nest, int mem_num, MemNumType* memp,
2318 UChar** s, const UChar* send)
2319 {
2320 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2321 int level;
2322 StackType* k;
2323
2324 level = 0;
2325 k = top;
2326 k--;
2327 while (k >= stk_base) {
2328 if (k->type == STK_CALL_FRAME) {
2329 level--;
2330 }
2331 else if (k->type == STK_RETURN) {
2332 level++;
2333 }
2334 else if (level == nest) {
2335 if (k->type == STK_MEM_START) {
2336 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2337 pstart = k->u.mem.pstr;
2338 if (IS_NOT_NULL(pend)) {
2339 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2340 p = pstart;
2341 ss = *s;
2342
2343 if (ignore_case != 0) {
2344 if (string_cmp_ic(reg->enc, case_fold_flag,
2345 pstart, &ss, (int )(pend - pstart)) == 0)
2346 return 0; /* or goto next_mem; */
2347 }
2348 else {
2349 while (p < pend) {
2350 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2351 }
2352 }
2353
2354 *s = ss;
2355 return 1;
2356 }
2357 }
2358 }
2359 else if (k->type == STK_MEM_END) {
2360 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2361 pend = k->u.mem.pstr;
2362 }
2363 }
2364 }
2365 k--;
2366 }
2367
2368 return 0;
2369 }
2370
2371 static int
backref_check_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int nest,int mem_num,MemNumType * memp)2372 backref_check_at_nested_level(regex_t* reg,
2373 StackType* top, StackType* stk_base,
2374 int nest, int mem_num, MemNumType* memp)
2375 {
2376 int level;
2377 StackType* k;
2378
2379 level = 0;
2380 k = top;
2381 k--;
2382 while (k >= stk_base) {
2383 if (k->type == STK_CALL_FRAME) {
2384 level--;
2385 }
2386 else if (k->type == STK_RETURN) {
2387 level++;
2388 }
2389 else if (level == nest) {
2390 if (k->type == STK_MEM_END) {
2391 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2392 return 1;
2393 }
2394 }
2395 }
2396 k--;
2397 }
2398
2399 return 0;
2400 }
2401 #endif /* USE_BACKREF_WITH_LEVEL */
2402
2403
2404 #ifdef ONIG_DEBUG_STATISTICS
2405
2406 #define USE_TIMEOFDAY
2407
2408 #ifdef USE_TIMEOFDAY
2409 #ifdef HAVE_SYS_TIME_H
2410 #include <sys/time.h>
2411 #endif
2412 #ifdef HAVE_UNISTD_H
2413 #include <unistd.h>
2414 #endif
2415 static struct timeval ts, te;
2416 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2417 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2418 (((te).tv_sec - (ts).tv_sec)*1000000))
2419 #else
2420 #ifdef HAVE_SYS_TIMES_H
2421 #include <sys/times.h>
2422 #endif
2423 static struct tms ts, te;
2424 #define GETTIME(t) times(&(t))
2425 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2426 #endif
2427
2428 static int OpCounter[256];
2429 static int OpPrevCounter[256];
2430 static unsigned long OpTime[256];
2431 static int OpCurr = OP_FINISH;
2432 static int OpPrevTarget = OP_FAIL;
2433 static int MaxStackDepth = 0;
2434
2435 #define SOP_IN(opcode) do {\
2436 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2437 OpCurr = opcode;\
2438 OpCounter[opcode]++;\
2439 GETTIME(ts);\
2440 } while(0)
2441
2442 #define SOP_OUT do {\
2443 GETTIME(te);\
2444 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2445 } while(0)
2446
2447 extern void
onig_statistics_init(void)2448 onig_statistics_init(void)
2449 {
2450 int i;
2451 for (i = 0; i < 256; i++) {
2452 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2453 }
2454 MaxStackDepth = 0;
2455 }
2456
2457 extern int
onig_print_statistics(FILE * f)2458 onig_print_statistics(FILE* f)
2459 {
2460 int r;
2461 int i;
2462
2463 r = fprintf(f, " count prev time\n");
2464 if (r < 0) return -1;
2465
2466 for (i = 0; OpInfo[i].opcode >= 0; i++) {
2467 r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2468 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2469 if (r < 0) return -1;
2470 }
2471 r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2472 if (r < 0) return -1;
2473
2474 return 0;
2475 }
2476
2477 #define STACK_INC do {\
2478 stk++;\
2479 if (stk - stk_base > MaxStackDepth) \
2480 MaxStackDepth = stk - stk_base;\
2481 } while(0)
2482
2483 #else
2484 #define STACK_INC stk++
2485
2486 #define SOP_IN(opcode)
2487 #define SOP_OUT
2488 #endif
2489
2490
2491 /* matching region of POSIX API */
2492 typedef int regoff_t;
2493
2494 typedef struct {
2495 regoff_t rm_so;
2496 regoff_t rm_eo;
2497 } posix_regmatch_t;
2498
2499
2500
2501 #ifdef USE_THREADED_CODE
2502
2503 #define BYTECODE_INTERPRETER_START GOTO_OP;
2504 #define BYTECODE_INTERPRETER_END
2505 #define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
2506 #define DEFAULT_OP /* L_DEFAULT: */
2507 #define NEXT_OP sprev = sbegin; JUMP_OP
2508 #define JUMP_OP GOTO_OP
2509 #ifdef USE_DIRECT_THREADED_CODE
2510 #define GOTO_OP goto *(p->opaddr)
2511 #else
2512 #define GOTO_OP goto *opcode_to_label[p->opcode]
2513 #endif
2514 #define BREAK_OP /* Nothing */
2515
2516 #else
2517
2518 #define BYTECODE_INTERPRETER_START \
2519 while (1) {\
2520 MATCH_DEBUG_OUT(0)\
2521 sbegin = s;\
2522 switch (p->opcode) {
2523 #define BYTECODE_INTERPRETER_END } sprev = sbegin; }
2524 #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
2525 #define DEFAULT_OP default:
2526 #define NEXT_OP break
2527 #define JUMP_OP GOTO_OP
2528 #define GOTO_OP continue; break
2529 #define BREAK_OP break
2530
2531 #endif /* USE_THREADED_CODE */
2532
2533 #define INC_OP p++
2534 #define NEXT_OUT SOP_OUT; NEXT_OP
2535 #define JUMP_OUT SOP_OUT; JUMP_OP
2536 #define BREAK_OUT SOP_OUT; BREAK_OP
2537 #define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2538
2539
2540 #ifdef ONIG_DEBUG_MATCH
2541 #define MATCH_DEBUG_OUT(offset) do {\
2542 Operation *xp;\
2543 UChar *q, *bp, buf[50];\
2544 int len, spos;\
2545 spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2546 xp = p - (offset);\
2547 fprintf(stderr, "%7u: %7ld: %4d> \"",\
2548 counter, GET_STACK_INDEX(stk), spos);\
2549 counter++;\
2550 bp = buf;\
2551 if (IS_NOT_NULL(s)) {\
2552 for (i = 0, q = s; i < 7 && q < end; i++) {\
2553 len = enclen(encode, q);\
2554 while (len-- > 0) *bp++ = *q++;\
2555 }\
2556 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2557 else { xmemcpy(bp, "\"", 1); bp += 1; }\
2558 }\
2559 else {\
2560 xmemcpy(bp, "\"", 1); bp += 1;\
2561 }\
2562 *bp = 0;\
2563 fputs((char* )buf, stderr);\
2564 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
2565 if (xp == FinishCode)\
2566 fprintf(stderr, "----: finish");\
2567 else {\
2568 fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\
2569 print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\
2570 }\
2571 fprintf(stderr, "\n");\
2572 } while(0);
2573 #else
2574 #define MATCH_DEBUG_OUT(offset)
2575 #endif
2576
2577 #define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end
2578
2579
2580 /* match data(str - end) from position (sstart). */
2581 /* if sstart == str then set sprev to NULL. */
2582 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * in_right_range,const UChar * sstart,UChar * sprev,MatchArg * msa)2583 match_at(regex_t* reg, const UChar* str, const UChar* end,
2584 const UChar* in_right_range, const UChar* sstart, UChar* sprev,
2585 MatchArg* msa)
2586 {
2587
2588 #if defined(USE_DIRECT_THREADED_CODE)
2589 static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
2590 #else
2591 static Operation FinishCode[] = { { OP_FINISH } };
2592 #endif
2593
2594 #ifdef USE_THREADED_CODE
2595 static const void *opcode_to_label[] = {
2596 &&L_FINISH,
2597 &&L_END,
2598 &&L_STR_1,
2599 &&L_STR_2,
2600 &&L_STR_3,
2601 &&L_STR_4,
2602 &&L_STR_5,
2603 &&L_STR_N,
2604 &&L_STR_MB2N1,
2605 &&L_STR_MB2N2,
2606 &&L_STR_MB2N3,
2607 &&L_STR_MB2N,
2608 &&L_STR_MB3N,
2609 &&L_STR_MBN,
2610 &&L_STR_1_IC,
2611 &&L_STR_N_IC,
2612 &&L_CCLASS,
2613 &&L_CCLASS_MB,
2614 &&L_CCLASS_MIX,
2615 &&L_CCLASS_NOT,
2616 &&L_CCLASS_MB_NOT,
2617 &&L_CCLASS_MIX_NOT,
2618 &&L_ANYCHAR,
2619 &&L_ANYCHAR_ML,
2620 &&L_ANYCHAR_STAR,
2621 &&L_ANYCHAR_ML_STAR,
2622 &&L_ANYCHAR_STAR_PEEK_NEXT,
2623 &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
2624 &&L_WORD,
2625 &&L_WORD_ASCII,
2626 &&L_NO_WORD,
2627 &&L_NO_WORD_ASCII,
2628 &&L_WORD_BOUNDARY,
2629 &&L_NO_WORD_BOUNDARY,
2630 &&L_WORD_BEGIN,
2631 &&L_WORD_END,
2632 &&L_TEXT_SEGMENT_BOUNDARY,
2633 &&L_BEGIN_BUF,
2634 &&L_END_BUF,
2635 &&L_BEGIN_LINE,
2636 &&L_END_LINE,
2637 &&L_SEMI_END_BUF,
2638 &&L_BEGIN_POSITION,
2639 &&L_BACKREF1,
2640 &&L_BACKREF2,
2641 &&L_BACKREF_N,
2642 &&L_BACKREF_N_IC,
2643 &&L_BACKREF_MULTI,
2644 &&L_BACKREF_MULTI_IC,
2645 &&L_BACKREF_WITH_LEVEL,
2646 &&L_BACKREF_WITH_LEVEL_IC,
2647 &&L_BACKREF_CHECK,
2648 &&L_BACKREF_CHECK_WITH_LEVEL,
2649 &&L_MEM_START,
2650 &&L_MEM_START_PUSH,
2651 &&L_MEM_END_PUSH,
2652 #ifdef USE_CALL
2653 &&L_MEM_END_PUSH_REC,
2654 #endif
2655 &&L_MEM_END,
2656 #ifdef USE_CALL
2657 &&L_MEM_END_REC,
2658 #endif
2659 &&L_FAIL,
2660 &&L_JUMP,
2661 &&L_PUSH,
2662 &&L_PUSH_SUPER,
2663 &&L_POP_OUT,
2664 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2665 &&L_PUSH_OR_JUMP_EXACT1,
2666 #endif
2667 &&L_PUSH_IF_PEEK_NEXT,
2668 &&L_REPEAT,
2669 &&L_REPEAT_NG,
2670 &&L_REPEAT_INC,
2671 &&L_REPEAT_INC_NG,
2672 &&L_EMPTY_CHECK_START,
2673 &&L_EMPTY_CHECK_END,
2674 &&L_EMPTY_CHECK_END_MEMST,
2675 #ifdef USE_CALL
2676 &&L_EMPTY_CHECK_END_MEMST_PUSH,
2677 #endif
2678 &&L_PREC_READ_START,
2679 &&L_PREC_READ_END,
2680 &&L_PREC_READ_NOT_START,
2681 &&L_PREC_READ_NOT_END,
2682 &&L_ATOMIC_START,
2683 &&L_ATOMIC_END,
2684 &&L_LOOK_BEHIND,
2685 &&L_LOOK_BEHIND_NOT_START,
2686 &&L_LOOK_BEHIND_NOT_END,
2687 &&L_PUSH_SAVE_VAL,
2688 &&L_UPDATE_VAR,
2689 #ifdef USE_CALL
2690 &&L_CALL,
2691 &&L_RETURN,
2692 #endif
2693 #ifdef USE_CALLOUT
2694 &&L_CALLOUT_CONTENTS,
2695 &&L_CALLOUT_NAME,
2696 #endif
2697 };
2698 #endif
2699
2700 int i, n, num_mem, best_len, pop_level;
2701 LengthType tlen, tlen2;
2702 MemNumType mem;
2703 RelAddrType addr;
2704 UChar *s, *q, *ps, *sbegin;
2705 UChar *right_range;
2706 int is_alloca;
2707 char *alloc_base;
2708 StackType *stk_base, *stk, *stk_end;
2709 StackType *stkp; /* used as any purpose. */
2710 StackIndex *mem_start_stk, *mem_end_stk;
2711 UChar* keep;
2712
2713 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2714 StackIndex *repeat_stk;
2715 StackIndex *empty_check_stk;
2716 #endif
2717 #ifdef USE_RETRY_LIMIT_IN_MATCH
2718 unsigned long retry_limit_in_match;
2719 unsigned long retry_in_match_counter;
2720 #endif
2721 #ifdef USE_CALLOUT
2722 int of;
2723 #endif
2724
2725 Operation* p = reg->ops;
2726 OnigOptionType option = reg->options;
2727 OnigEncoding encode = reg->enc;
2728 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2729
2730 #ifdef ONIG_DEBUG_MATCH
2731 static unsigned int counter = 1;
2732 #endif
2733
2734 #ifdef USE_DIRECT_THREADED_CODE
2735 if (IS_NULL(msa)) {
2736 for (i = 0; i < reg->ops_used; i++) {
2737 const void* addr;
2738 addr = opcode_to_label[reg->ocs[i]];
2739 p->opaddr = addr;
2740 p++;
2741 }
2742 return ONIG_NORMAL;
2743 }
2744 #endif
2745
2746 #ifdef USE_CALLOUT
2747 msa->mp->match_at_call_counter++;
2748 #endif
2749
2750 #ifdef USE_RETRY_LIMIT_IN_MATCH
2751 retry_limit_in_match = msa->retry_limit_in_match;
2752 #endif
2753
2754 pop_level = reg->stack_pop_level;
2755 num_mem = reg->num_mem;
2756 STACK_INIT(INIT_MATCH_STACK_SIZE);
2757 UPDATE_FOR_STACK_REALLOC;
2758 for (i = 1; i <= num_mem; i++) {
2759 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
2760 }
2761
2762 #ifdef ONIG_DEBUG_MATCH
2763 fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
2764 str, end, sstart, sprev);
2765 fprintf(stderr, "size: %d, start offset: %d\n",
2766 (int )(end - str), (int )(sstart - str));
2767 #endif
2768
2769 best_len = ONIG_MISMATCH;
2770 keep = s = (UChar* )sstart;
2771 STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
2772 INIT_RIGHT_RANGE;
2773
2774 #ifdef USE_RETRY_LIMIT_IN_MATCH
2775 retry_in_match_counter = 0;
2776 #endif
2777
2778 BYTECODE_INTERPRETER_START {
2779 CASE_OP(END)
2780 n = (int )(s - sstart);
2781 if (n > best_len) {
2782 OnigRegion* region;
2783 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2784 if (IS_FIND_LONGEST(option)) {
2785 if (n > msa->best_len) {
2786 msa->best_len = n;
2787 msa->best_s = (UChar* )sstart;
2788 }
2789 else
2790 goto end_best_len;
2791 }
2792 #endif
2793 best_len = n;
2794 region = msa->region;
2795 if (region) {
2796 if (keep > s) keep = s;
2797
2798 #ifdef USE_POSIX_API_REGION_OPTION
2799 if (IS_POSIX_REGION(msa->options)) {
2800 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
2801
2802 rmt[0].rm_so = (regoff_t )(keep - str);
2803 rmt[0].rm_eo = (regoff_t )(s - str);
2804 for (i = 1; i <= num_mem; i++) {
2805 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2806 rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
2807 rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str);
2808 }
2809 else {
2810 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
2811 }
2812 }
2813 }
2814 else {
2815 #endif /* USE_POSIX_API_REGION_OPTION */
2816 region->beg[0] = (int )(keep - str);
2817 region->end[0] = (int )(s - str);
2818 for (i = 1; i <= num_mem; i++) {
2819 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2820 region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
2821 region->end[i] = (int )(STACK_MEM_END(reg, i) - str);
2822 }
2823 else {
2824 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2825 }
2826 }
2827
2828 #ifdef USE_CAPTURE_HISTORY
2829 if (reg->capture_history != 0) {
2830 int r;
2831 OnigCaptureTreeNode* node;
2832
2833 if (IS_NULL(region->history_root)) {
2834 region->history_root = node = history_node_new();
2835 CHECK_NULL_RETURN_MEMERR(node);
2836 }
2837 else {
2838 node = region->history_root;
2839 history_tree_clear(node);
2840 }
2841
2842 node->group = 0;
2843 node->beg = (int )(keep - str);
2844 node->end = (int )(s - str);
2845
2846 stkp = stk_base;
2847 r = make_capture_history_tree(region->history_root, &stkp,
2848 stk, (UChar* )str, reg);
2849 if (r < 0) MATCH_AT_ERROR_RETURN(r);
2850 }
2851 #endif /* USE_CAPTURE_HISTORY */
2852 #ifdef USE_POSIX_API_REGION_OPTION
2853 } /* else IS_POSIX_REGION() */
2854 #endif
2855 } /* if (region) */
2856 } /* n > best_len */
2857
2858 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2859 end_best_len:
2860 #endif
2861 SOP_OUT;
2862
2863 if (IS_FIND_CONDITION(option)) {
2864 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2865 best_len = ONIG_MISMATCH;
2866 goto fail; /* for retry */
2867 }
2868 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2869 goto fail; /* for retry */
2870 }
2871 }
2872
2873 /* default behavior: return first-matching result. */
2874 goto match_at_end;
2875
2876 CASE_OP(STR_1)
2877 DATA_ENSURE(1);
2878 ps = p->exact.s;
2879 if (*ps != *s) goto fail;
2880 s++;
2881 INC_OP;
2882 NEXT_OUT;
2883
2884 CASE_OP(STR_1_IC)
2885 {
2886 int len;
2887 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2888
2889 DATA_ENSURE(1);
2890 len = ONIGENC_MBC_CASE_FOLD(encode,
2891 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2892 case_fold_flag,
2893 &s, end, lowbuf);
2894 DATA_ENSURE(0);
2895 q = lowbuf;
2896 ps = p->exact.s;
2897 while (len-- > 0) {
2898 if (*ps != *q) goto fail;
2899 ps++; q++;
2900 }
2901 }
2902 INC_OP;
2903 NEXT_OUT;
2904
2905 CASE_OP(STR_2)
2906 DATA_ENSURE(2);
2907 ps = p->exact.s;
2908 if (*ps != *s) goto fail;
2909 ps++; s++;
2910 if (*ps != *s) goto fail;
2911 sprev = s;
2912 s++;
2913 INC_OP;
2914 JUMP_OUT;
2915
2916 CASE_OP(STR_3)
2917 DATA_ENSURE(3);
2918 ps = p->exact.s;
2919 if (*ps != *s) goto fail;
2920 ps++; s++;
2921 if (*ps != *s) goto fail;
2922 ps++; s++;
2923 if (*ps != *s) goto fail;
2924 sprev = s;
2925 s++;
2926 INC_OP;
2927 JUMP_OUT;
2928
2929 CASE_OP(STR_4)
2930 DATA_ENSURE(4);
2931 ps = p->exact.s;
2932 if (*ps != *s) goto fail;
2933 ps++; s++;
2934 if (*ps != *s) goto fail;
2935 ps++; s++;
2936 if (*ps != *s) goto fail;
2937 ps++; s++;
2938 if (*ps != *s) goto fail;
2939 sprev = s;
2940 s++;
2941 INC_OP;
2942 JUMP_OUT;
2943
2944 CASE_OP(STR_5)
2945 DATA_ENSURE(5);
2946 ps = p->exact.s;
2947 if (*ps != *s) goto fail;
2948 ps++; s++;
2949 if (*ps != *s) goto fail;
2950 ps++; s++;
2951 if (*ps != *s) goto fail;
2952 ps++; s++;
2953 if (*ps != *s) goto fail;
2954 ps++; s++;
2955 if (*ps != *s) goto fail;
2956 sprev = s;
2957 s++;
2958 INC_OP;
2959 JUMP_OUT;
2960
2961 CASE_OP(STR_N)
2962 tlen = p->exact_n.n;
2963 DATA_ENSURE(tlen);
2964 ps = p->exact_n.s;
2965 while (tlen-- > 0) {
2966 if (*ps++ != *s++) goto fail;
2967 }
2968 sprev = s - 1;
2969 INC_OP;
2970 JUMP_OUT;
2971
2972 CASE_OP(STR_N_IC)
2973 {
2974 int len;
2975 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2976
2977 tlen = p->exact_n.n;
2978 ps = p->exact_n.s;
2979 endp = ps + tlen;
2980 while (ps < endp) {
2981 sprev = s;
2982 DATA_ENSURE(1);
2983 len = ONIGENC_MBC_CASE_FOLD(encode,
2984 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2985 case_fold_flag,
2986 &s, end, lowbuf);
2987 DATA_ENSURE(0);
2988 q = lowbuf;
2989 while (len-- > 0) {
2990 if (ps >= endp) goto fail;
2991 if (*ps != *q) goto fail;
2992 ps++; q++;
2993 }
2994 }
2995 }
2996
2997 INC_OP;
2998 JUMP_OUT;
2999
3000 CASE_OP(STR_MB2N1)
3001 DATA_ENSURE(2);
3002 ps = p->exact.s;
3003 if (*ps != *s) goto fail;
3004 ps++; s++;
3005 if (*ps != *s) goto fail;
3006 s++;
3007 INC_OP;
3008 NEXT_OUT;
3009
3010 CASE_OP(STR_MB2N2)
3011 DATA_ENSURE(4);
3012 ps = p->exact.s;
3013 if (*ps != *s) goto fail;
3014 ps++; s++;
3015 if (*ps != *s) goto fail;
3016 ps++; s++;
3017 sprev = s;
3018 if (*ps != *s) goto fail;
3019 ps++; s++;
3020 if (*ps != *s) goto fail;
3021 s++;
3022 INC_OP;
3023 JUMP_OUT;
3024
3025 CASE_OP(STR_MB2N3)
3026 DATA_ENSURE(6);
3027 ps = p->exact.s;
3028 if (*ps != *s) goto fail;
3029 ps++; s++;
3030 if (*ps != *s) goto fail;
3031 ps++; s++;
3032 if (*ps != *s) goto fail;
3033 ps++; s++;
3034 if (*ps != *s) goto fail;
3035 ps++; s++;
3036 sprev = s;
3037 if (*ps != *s) goto fail;
3038 ps++; s++;
3039 if (*ps != *s) goto fail;
3040 ps++; s++;
3041 INC_OP;
3042 JUMP_OUT;
3043
3044 CASE_OP(STR_MB2N)
3045 tlen = p->exact_n.n;
3046 DATA_ENSURE(tlen * 2);
3047 ps = p->exact_n.s;
3048 while (tlen-- > 0) {
3049 if (*ps != *s) goto fail;
3050 ps++; s++;
3051 if (*ps != *s) goto fail;
3052 ps++; s++;
3053 }
3054 sprev = s - 2;
3055 INC_OP;
3056 JUMP_OUT;
3057
3058 CASE_OP(STR_MB3N)
3059 tlen = p->exact_n.n;
3060 DATA_ENSURE(tlen * 3);
3061 ps = p->exact_n.s;
3062 while (tlen-- > 0) {
3063 if (*ps != *s) goto fail;
3064 ps++; s++;
3065 if (*ps != *s) goto fail;
3066 ps++; s++;
3067 if (*ps != *s) goto fail;
3068 ps++; s++;
3069 }
3070 sprev = s - 3;
3071 INC_OP;
3072 JUMP_OUT;
3073
3074 CASE_OP(STR_MBN)
3075 tlen = p->exact_len_n.len; /* mb byte len */
3076 tlen2 = p->exact_len_n.n; /* number of chars */
3077 tlen2 *= tlen;
3078 DATA_ENSURE(tlen2);
3079 ps = p->exact_len_n.s;
3080 while (tlen2-- > 0) {
3081 if (*ps != *s) goto fail;
3082 ps++; s++;
3083 }
3084 sprev = s - tlen;
3085 INC_OP;
3086 JUMP_OUT;
3087
3088 CASE_OP(CCLASS)
3089 DATA_ENSURE(1);
3090 if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
3091 s++;
3092 INC_OP;
3093 NEXT_OUT;
3094
3095 CASE_OP(CCLASS_MB)
3096 DATA_ENSURE(1);
3097 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3098
3099 cclass_mb:
3100 {
3101 OnigCodePoint code;
3102 UChar *ss;
3103 int mb_len;
3104
3105 mb_len = enclen(encode, s);
3106 DATA_ENSURE(mb_len);
3107 ss = s;
3108 s += mb_len;
3109 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3110 if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3111 }
3112 INC_OP;
3113 NEXT_OUT;
3114
3115 CASE_OP(CCLASS_MIX)
3116 DATA_ENSURE(1);
3117 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3118 goto cclass_mb;
3119 }
3120 else {
3121 if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
3122 goto fail;
3123
3124 s++;
3125 }
3126 INC_OP;
3127 NEXT_OUT;
3128
3129 CASE_OP(CCLASS_NOT)
3130 DATA_ENSURE(1);
3131 if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
3132 s += enclen(encode, s);
3133 INC_OP;
3134 NEXT_OUT;
3135
3136 CASE_OP(CCLASS_MB_NOT)
3137 DATA_ENSURE(1);
3138 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
3139 s++;
3140 goto cc_mb_not_success;
3141 }
3142
3143 cclass_mb_not:
3144 {
3145 OnigCodePoint code;
3146 UChar *ss;
3147 int mb_len = enclen(encode, s);
3148
3149 if (! DATA_ENSURE_CHECK(mb_len)) {
3150 DATA_ENSURE(1);
3151 s = (UChar* )end;
3152 goto cc_mb_not_success;
3153 }
3154
3155 ss = s;
3156 s += mb_len;
3157 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3158 if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3159 }
3160
3161 cc_mb_not_success:
3162 INC_OP;
3163 NEXT_OUT;
3164
3165 CASE_OP(CCLASS_MIX_NOT)
3166 DATA_ENSURE(1);
3167 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3168 goto cclass_mb_not;
3169 }
3170 else {
3171 if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
3172 goto fail;
3173
3174 s++;
3175 }
3176 INC_OP;
3177 NEXT_OUT;
3178
3179 CASE_OP(ANYCHAR)
3180 DATA_ENSURE(1);
3181 n = enclen(encode, s);
3182 DATA_ENSURE(n);
3183 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3184 s += n;
3185 INC_OP;
3186 NEXT_OUT;
3187
3188 CASE_OP(ANYCHAR_ML)
3189 DATA_ENSURE(1);
3190 n = enclen(encode, s);
3191 DATA_ENSURE(n);
3192 s += n;
3193 INC_OP;
3194 NEXT_OUT;
3195
3196 CASE_OP(ANYCHAR_STAR)
3197 INC_OP;
3198 while (DATA_ENSURE_CHECK1) {
3199 STACK_PUSH_ALT(p, s, sprev);
3200 n = enclen(encode, s);
3201 DATA_ENSURE(n);
3202 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3203 sprev = s;
3204 s += n;
3205 }
3206 JUMP_OUT;
3207
3208 CASE_OP(ANYCHAR_ML_STAR)
3209 INC_OP;
3210 while (DATA_ENSURE_CHECK1) {
3211 STACK_PUSH_ALT(p, s, sprev);
3212 n = enclen(encode, s);
3213 if (n > 1) {
3214 DATA_ENSURE(n);
3215 sprev = s;
3216 s += n;
3217 }
3218 else {
3219 sprev = s;
3220 s++;
3221 }
3222 }
3223 JUMP_OUT;
3224
3225 CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
3226 {
3227 UChar c;
3228
3229 c = p->anychar_star_peek_next.c;
3230 INC_OP;
3231 while (DATA_ENSURE_CHECK1) {
3232 if (c == *s) {
3233 STACK_PUSH_ALT(p, s, sprev);
3234 }
3235 n = enclen(encode, s);
3236 DATA_ENSURE(n);
3237 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3238 sprev = s;
3239 s += n;
3240 }
3241 }
3242 NEXT_OUT;
3243
3244 CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
3245 {
3246 UChar c;
3247
3248 c = p->anychar_star_peek_next.c;
3249 INC_OP;
3250 while (DATA_ENSURE_CHECK1) {
3251 if (c == *s) {
3252 STACK_PUSH_ALT(p, s, sprev);
3253 }
3254 n = enclen(encode, s);
3255 if (n > 1) {
3256 DATA_ENSURE(n);
3257 sprev = s;
3258 s += n;
3259 }
3260 else {
3261 sprev = s;
3262 s++;
3263 }
3264 }
3265 }
3266 NEXT_OUT;
3267
3268 CASE_OP(WORD)
3269 DATA_ENSURE(1);
3270 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3271 goto fail;
3272
3273 s += enclen(encode, s);
3274 INC_OP;
3275 NEXT_OUT;
3276
3277 CASE_OP(WORD_ASCII)
3278 DATA_ENSURE(1);
3279 if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3280 goto fail;
3281
3282 s += enclen(encode, s);
3283 INC_OP;
3284 NEXT_OUT;
3285
3286 CASE_OP(NO_WORD)
3287 DATA_ENSURE(1);
3288 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3289 goto fail;
3290
3291 s += enclen(encode, s);
3292 INC_OP;
3293 NEXT_OUT;
3294
3295 CASE_OP(NO_WORD_ASCII)
3296 DATA_ENSURE(1);
3297 if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3298 goto fail;
3299
3300 s += enclen(encode, s);
3301 INC_OP;
3302 NEXT_OUT;
3303
3304 CASE_OP(WORD_BOUNDARY)
3305 {
3306 ModeType mode;
3307
3308 mode = p->word_boundary.mode;
3309 if (ON_STR_BEGIN(s)) {
3310 DATA_ENSURE(1);
3311 if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3312 goto fail;
3313 }
3314 else if (ON_STR_END(s)) {
3315 if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3316 goto fail;
3317 }
3318 else {
3319 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3320 == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3321 goto fail;
3322 }
3323 }
3324 INC_OP;
3325 JUMP_OUT;
3326
3327 CASE_OP(NO_WORD_BOUNDARY)
3328 {
3329 ModeType mode;
3330
3331 mode = p->word_boundary.mode;
3332 if (ON_STR_BEGIN(s)) {
3333 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3334 goto fail;
3335 }
3336 else if (ON_STR_END(s)) {
3337 if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3338 goto fail;
3339 }
3340 else {
3341 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3342 != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3343 goto fail;
3344 }
3345 }
3346 INC_OP;
3347 JUMP_OUT;
3348
3349 #ifdef USE_WORD_BEGIN_END
3350 CASE_OP(WORD_BEGIN)
3351 {
3352 ModeType mode;
3353
3354 mode = p->word_boundary.mode;
3355 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3356 if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3357 INC_OP;
3358 JUMP_OUT;
3359 }
3360 }
3361 }
3362 goto fail;
3363
3364 CASE_OP(WORD_END)
3365 {
3366 ModeType mode;
3367
3368 mode = p->word_boundary.mode;
3369 if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3370 if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3371 INC_OP;
3372 JUMP_OUT;
3373 }
3374 }
3375 }
3376 goto fail;
3377 #endif
3378
3379 CASE_OP(TEXT_SEGMENT_BOUNDARY)
3380 {
3381 int is_break;
3382
3383 switch (p->text_segment_boundary.type) {
3384 case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3385 is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
3386 break;
3387 #ifdef USE_UNICODE_WORD_BREAK
3388 case WORD_BOUNDARY:
3389 is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
3390 break;
3391 #endif
3392 default:
3393 MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
3394 break;
3395 }
3396
3397 if (p->text_segment_boundary.not != 0)
3398 is_break = ! is_break;
3399
3400 if (is_break != 0) {
3401 INC_OP;
3402 JUMP_OUT;
3403 }
3404 else {
3405 goto fail;
3406 }
3407 }
3408
3409 CASE_OP(BEGIN_BUF)
3410 if (! ON_STR_BEGIN(s)) goto fail;
3411
3412 INC_OP;
3413 JUMP_OUT;
3414
3415 CASE_OP(END_BUF)
3416 if (! ON_STR_END(s)) goto fail;
3417
3418 INC_OP;
3419 JUMP_OUT;
3420
3421 CASE_OP(BEGIN_LINE)
3422 if (ON_STR_BEGIN(s)) {
3423 if (IS_NOTBOL(msa->options)) goto fail;
3424 INC_OP;
3425 JUMP_OUT;
3426 }
3427 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
3428 INC_OP;
3429 JUMP_OUT;
3430 }
3431 goto fail;
3432
3433 CASE_OP(END_LINE)
3434 if (ON_STR_END(s)) {
3435 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3436 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3437 #endif
3438 if (IS_NOTEOL(msa->options)) goto fail;
3439 INC_OP;
3440 JUMP_OUT;
3441 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3442 }
3443 #endif
3444 }
3445 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3446 INC_OP;
3447 JUMP_OUT;
3448 }
3449 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3450 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3451 INC_OP;
3452 JUMP_OUT;
3453 }
3454 #endif
3455 goto fail;
3456
3457 CASE_OP(SEMI_END_BUF)
3458 if (ON_STR_END(s)) {
3459 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3460 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3461 #endif
3462 if (IS_NOTEOL(msa->options)) goto fail;
3463 INC_OP;
3464 JUMP_OUT;
3465 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3466 }
3467 #endif
3468 }
3469 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3470 ON_STR_END(s + enclen(encode, s))) {
3471 INC_OP;
3472 JUMP_OUT;
3473 }
3474 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3475 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3476 UChar* ss = s + enclen(encode, s);
3477 ss += enclen(encode, ss);
3478 if (ON_STR_END(ss)) {
3479 INC_OP;
3480 JUMP_OUT;
3481 }
3482 }
3483 #endif
3484 goto fail;
3485
3486 CASE_OP(BEGIN_POSITION)
3487 if (s != msa->start)
3488 goto fail;
3489
3490 INC_OP;
3491 JUMP_OUT;
3492
3493 CASE_OP(MEM_START_PUSH)
3494 mem = p->memory_start.num;
3495 STACK_PUSH_MEM_START(mem, s);
3496 INC_OP;
3497 JUMP_OUT;
3498
3499 CASE_OP(MEM_START)
3500 mem = p->memory_start.num;
3501 mem_start_stk[mem] = (StackIndex )((void* )s);
3502 INC_OP;
3503 JUMP_OUT;
3504
3505 CASE_OP(MEM_END_PUSH)
3506 mem = p->memory_end.num;
3507 STACK_PUSH_MEM_END(mem, s);
3508 INC_OP;
3509 JUMP_OUT;
3510
3511 CASE_OP(MEM_END)
3512 mem = p->memory_end.num;
3513 mem_end_stk[mem] = (StackIndex )((void* )s);
3514 INC_OP;
3515 JUMP_OUT;
3516
3517 #ifdef USE_CALL
3518 CASE_OP(MEM_END_PUSH_REC)
3519 {
3520 StackIndex si;
3521
3522 mem = p->memory_end.num;
3523 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3524 si = GET_STACK_INDEX(stkp);
3525 STACK_PUSH_MEM_END(mem, s);
3526 mem_start_stk[mem] = si;
3527 INC_OP;
3528 JUMP_OUT;
3529 }
3530
3531 CASE_OP(MEM_END_REC)
3532 mem = p->memory_end.num;
3533 mem_end_stk[mem] = (StackIndex )((void* )s);
3534 STACK_GET_MEM_START(mem, stkp);
3535
3536 if (MEM_STATUS_AT(reg->push_mem_start, mem))
3537 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3538 else
3539 mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
3540
3541 STACK_PUSH_MEM_END_MARK(mem);
3542 INC_OP;
3543 JUMP_OUT;
3544 #endif
3545
3546 CASE_OP(BACKREF1)
3547 mem = 1;
3548 goto backref;
3549
3550 CASE_OP(BACKREF2)
3551 mem = 2;
3552 goto backref;
3553
3554 CASE_OP(BACKREF_N)
3555 mem = p->backref_n.n1;
3556 backref:
3557 {
3558 int len;
3559 UChar *pstart, *pend;
3560
3561 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3562 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3563
3564 pstart = STACK_MEM_START(reg, mem);
3565 pend = STACK_MEM_END(reg, mem);
3566 n = (int )(pend - pstart);
3567 if (n != 0) {
3568 DATA_ENSURE(n);
3569 sprev = s;
3570 STRING_CMP(s, pstart, n);
3571 while (sprev + (len = enclen(encode, sprev)) < s)
3572 sprev += len;
3573 }
3574 }
3575 INC_OP;
3576 JUMP_OUT;
3577
3578 CASE_OP(BACKREF_N_IC)
3579 mem = p->backref_n.n1;
3580 {
3581 int len;
3582 UChar *pstart, *pend;
3583
3584 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3585 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3586
3587 pstart = STACK_MEM_START(reg, mem);
3588 pend = STACK_MEM_END(reg, mem);
3589 n = (int )(pend - pstart);
3590 if (n != 0) {
3591 DATA_ENSURE(n);
3592 sprev = s;
3593 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3594 while (sprev + (len = enclen(encode, sprev)) < s)
3595 sprev += len;
3596 }
3597 }
3598 INC_OP;
3599 JUMP_OUT;
3600
3601 CASE_OP(BACKREF_MULTI)
3602 {
3603 int len, is_fail;
3604 UChar *pstart, *pend, *swork;
3605
3606 tlen = p->backref_general.num;
3607 for (i = 0; i < tlen; i++) {
3608 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3609
3610 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3611 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3612
3613 pstart = STACK_MEM_START(reg, mem);
3614 pend = STACK_MEM_END(reg, mem);
3615 n = (int )(pend - pstart);
3616 if (n != 0) {
3617 DATA_ENSURE(n);
3618 sprev = s;
3619 swork = s;
3620 STRING_CMP_VALUE(swork, pstart, n, is_fail);
3621 if (is_fail) continue;
3622 s = swork;
3623 while (sprev + (len = enclen(encode, sprev)) < s)
3624 sprev += len;
3625 }
3626 break; /* success */
3627 }
3628 if (i == tlen) goto fail;
3629 }
3630 INC_OP;
3631 JUMP_OUT;
3632
3633 CASE_OP(BACKREF_MULTI_IC)
3634 {
3635 int len, is_fail;
3636 UChar *pstart, *pend, *swork;
3637
3638 tlen = p->backref_general.num;
3639 for (i = 0; i < tlen; i++) {
3640 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3641
3642 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3643 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3644
3645 pstart = STACK_MEM_START(reg, mem);
3646 pend = STACK_MEM_END(reg, mem);
3647 n = (int )(pend - pstart);
3648 if (n != 0) {
3649 DATA_ENSURE(n);
3650 sprev = s;
3651 swork = s;
3652 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3653 if (is_fail) continue;
3654 s = swork;
3655 while (sprev + (len = enclen(encode, sprev)) < s)
3656 sprev += len;
3657 }
3658 break; /* success */
3659 }
3660 if (i == tlen) goto fail;
3661 }
3662 INC_OP;
3663 JUMP_OUT;
3664
3665 #ifdef USE_BACKREF_WITH_LEVEL
3666 CASE_OP(BACKREF_WITH_LEVEL_IC)
3667 n = 1; /* ignore case */
3668 goto backref_with_level;
3669 CASE_OP(BACKREF_WITH_LEVEL)
3670 {
3671 int len;
3672 int level;
3673 MemNumType* mems;
3674 UChar* ssave;
3675
3676 n = 0;
3677 backref_with_level:
3678 level = p->backref_general.nest_level;
3679 tlen = p->backref_general.num;
3680 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3681
3682 ssave = s;
3683 if (backref_match_at_nested_level(reg, stk, stk_base, n,
3684 case_fold_flag, level, (int )tlen, mems, &s, end)) {
3685 if (ssave != s) {
3686 sprev = ssave;
3687 while (sprev + (len = enclen(encode, sprev)) < s)
3688 sprev += len;
3689 }
3690 }
3691 else
3692 goto fail;
3693 }
3694 INC_OP;
3695 JUMP_OUT;
3696 #endif
3697
3698 CASE_OP(BACKREF_CHECK)
3699 {
3700 MemNumType* mems;
3701
3702 tlen = p->backref_general.num;
3703 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3704
3705 for (i = 0; i < tlen; i++) {
3706 mem = mems[i];
3707 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3708 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3709 break; /* success */
3710 }
3711 if (i == tlen) goto fail;
3712 }
3713 INC_OP;
3714 JUMP_OUT;
3715
3716 #ifdef USE_BACKREF_WITH_LEVEL
3717 CASE_OP(BACKREF_CHECK_WITH_LEVEL)
3718 {
3719 LengthType level;
3720 MemNumType* mems;
3721
3722 level = p->backref_general.nest_level;
3723 tlen = p->backref_general.num;
3724 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3725
3726 if (backref_check_at_nested_level(reg, stk, stk_base,
3727 (int )level, (int )tlen, mems) == 0)
3728 goto fail;
3729 }
3730 INC_OP;
3731 JUMP_OUT;
3732 #endif
3733
3734 CASE_OP(EMPTY_CHECK_START)
3735 mem = p->empty_check_start.mem; /* mem: null check id */
3736 STACK_PUSH_EMPTY_CHECK_START(mem, s);
3737 INC_OP;
3738 JUMP_OUT;
3739
3740 CASE_OP(EMPTY_CHECK_END)
3741 {
3742 int is_empty;
3743
3744 mem = p->empty_check_end.mem; /* mem: null check id */
3745 STACK_EMPTY_CHECK(is_empty, mem, s);
3746 INC_OP;
3747 if (is_empty) {
3748 #ifdef ONIG_DEBUG_MATCH
3749 fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
3750 #endif
3751 empty_check_found:
3752 /* empty loop founded, skip next instruction */
3753 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3754 switch (p->opcode) {
3755 case OP_JUMP:
3756 case OP_PUSH:
3757 case OP_REPEAT_INC:
3758 case OP_REPEAT_INC_NG:
3759 INC_OP;
3760 break;
3761 default:
3762 MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
3763 break;
3764 }
3765 #else
3766 INC_OP;
3767 #endif
3768 }
3769 }
3770 JUMP_OUT;
3771
3772 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3773 CASE_OP(EMPTY_CHECK_END_MEMST)
3774 {
3775 int is_empty;
3776
3777 mem = p->empty_check_end.mem; /* mem: null check id */
3778 STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
3779 INC_OP;
3780 if (is_empty) {
3781 #ifdef ONIG_DEBUG_MATCH
3782 fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
3783 #endif
3784 if (is_empty == -1) goto fail;
3785 goto empty_check_found;
3786 }
3787 }
3788 JUMP_OUT;
3789 #endif
3790
3791 #ifdef USE_CALL
3792 CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
3793 {
3794 int is_empty;
3795
3796 mem = p->empty_check_end.mem; /* mem: null check id */
3797 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3798 STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
3799 #else
3800 STACK_EMPTY_CHECK_REC(is_empty, mem, s);
3801 #endif
3802 INC_OP;
3803 if (is_empty) {
3804 #ifdef ONIG_DEBUG_MATCH
3805 fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
3806 (int )mem, s);
3807 #endif
3808 if (is_empty == -1) goto fail;
3809 goto empty_check_found;
3810 }
3811 else {
3812 STACK_PUSH_EMPTY_CHECK_END(mem);
3813 }
3814 }
3815 JUMP_OUT;
3816 #endif
3817
3818 CASE_OP(JUMP)
3819 addr = p->jump.addr;
3820 p += addr;
3821 CHECK_INTERRUPT_JUMP_OUT;
3822
3823 CASE_OP(PUSH)
3824 addr = p->push.addr;
3825 STACK_PUSH_ALT(p + addr, s, sprev);
3826 INC_OP;
3827 JUMP_OUT;
3828
3829 CASE_OP(PUSH_SUPER)
3830 addr = p->push.addr;
3831 STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
3832 INC_OP;
3833 JUMP_OUT;
3834
3835 CASE_OP(POP_OUT)
3836 STACK_POP_ONE;
3837 /* for stop backtrack */
3838 /* CHECK_RETRY_LIMIT_IN_MATCH; */
3839 INC_OP;
3840 JUMP_OUT;
3841
3842 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3843 CASE_OP(PUSH_OR_JUMP_EXACT1)
3844 {
3845 UChar c;
3846
3847 addr = p->push_or_jump_exact1.addr;
3848 c = p->push_or_jump_exact1.c;
3849 if (DATA_ENSURE_CHECK1 && c == *s) {
3850 STACK_PUSH_ALT(p + addr, s, sprev);
3851 INC_OP;
3852 JUMP_OUT;
3853 }
3854 }
3855 p += addr;
3856 JUMP_OUT;
3857 #endif
3858
3859 CASE_OP(PUSH_IF_PEEK_NEXT)
3860 {
3861 UChar c;
3862
3863 addr = p->push_if_peek_next.addr;
3864 c = p->push_if_peek_next.c;
3865 if (DATA_ENSURE_CHECK1 && c == *s) {
3866 STACK_PUSH_ALT(p + addr, s, sprev);
3867 INC_OP;
3868 JUMP_OUT;
3869 }
3870 }
3871 INC_OP;
3872 JUMP_OUT;
3873
3874 CASE_OP(REPEAT)
3875 mem = p->repeat.id; /* mem: OP_REPEAT ID */
3876 addr = p->repeat.addr;
3877
3878 STACK_PUSH_REPEAT_INC(mem, 0);
3879 if (reg->repeat_range[mem].lower == 0) {
3880 STACK_PUSH_ALT(p + addr, s, sprev);
3881 }
3882 INC_OP;
3883 JUMP_OUT;
3884
3885 CASE_OP(REPEAT_NG)
3886 mem = p->repeat.id; /* mem: OP_REPEAT ID */
3887 addr = p->repeat.addr;
3888
3889 STACK_PUSH_REPEAT_INC(mem, 0);
3890 if (reg->repeat_range[mem].lower == 0) {
3891 STACK_PUSH_ALT(p + 1, s, sprev);
3892 p += addr;
3893 }
3894 else
3895 INC_OP;
3896 JUMP_OUT;
3897
3898 CASE_OP(REPEAT_INC)
3899 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
3900 STACK_GET_REPEAT_COUNT(mem, n);
3901 n++;
3902 if (n >= reg->repeat_range[mem].upper) {
3903 /* end of repeat. Nothing to do. */
3904 INC_OP;
3905 }
3906 else if (n >= reg->repeat_range[mem].lower) {
3907 INC_OP;
3908 STACK_PUSH_ALT(p, s, sprev);
3909 p = reg->repeat_range[mem].u.pcode;
3910 }
3911 else {
3912 p = reg->repeat_range[mem].u.pcode;
3913 }
3914 STACK_PUSH_REPEAT_INC(mem, n);
3915 CHECK_INTERRUPT_JUMP_OUT;
3916
3917 CASE_OP(REPEAT_INC_NG)
3918 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
3919 STACK_GET_REPEAT_COUNT(mem, n);
3920 n++;
3921 STACK_PUSH_REPEAT_INC(mem, n);
3922 if (n == reg->repeat_range[mem].upper) {
3923 INC_OP;
3924 }
3925 else {
3926 if (n >= reg->repeat_range[mem].lower) {
3927 STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
3928 INC_OP;
3929 }
3930 else {
3931 p = reg->repeat_range[mem].u.pcode;
3932 }
3933 }
3934 CHECK_INTERRUPT_JUMP_OUT;
3935
3936 CASE_OP(PREC_READ_START)
3937 STACK_PUSH_PREC_READ_START(s, sprev);
3938 INC_OP;
3939 JUMP_OUT;
3940
3941 CASE_OP(PREC_READ_END)
3942 STACK_GET_PREC_READ_START(stkp);
3943 s = stkp->u.state.pstr;
3944 sprev = stkp->u.state.pstr_prev;
3945 STACK_PUSH(STK_PREC_READ_END,0,0,0);
3946 INC_OP;
3947 JUMP_OUT;
3948
3949 CASE_OP(PREC_READ_NOT_START)
3950 addr = p->prec_read_not_start.addr;
3951 STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
3952 INC_OP;
3953 JUMP_OUT;
3954
3955 CASE_OP(PREC_READ_NOT_END)
3956 STACK_POP_TIL_ALT_PREC_READ_NOT;
3957 goto fail;
3958
3959 CASE_OP(ATOMIC_START)
3960 STACK_PUSH_TO_VOID_START;
3961 INC_OP;
3962 JUMP_OUT;
3963
3964 CASE_OP(ATOMIC_END)
3965 STACK_EXEC_TO_VOID(stkp);
3966 INC_OP;
3967 JUMP_OUT;
3968
3969 CASE_OP(LOOK_BEHIND)
3970 tlen = p->look_behind.len;
3971 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3972 if (IS_NULL(s)) goto fail;
3973 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3974 INC_OP;
3975 JUMP_OUT;
3976
3977 CASE_OP(LOOK_BEHIND_NOT_START)
3978 addr = p->look_behind_not_start.addr;
3979 tlen = p->look_behind_not_start.len;
3980 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3981 if (IS_NULL(q)) {
3982 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3983 If you want to change to fail, replace following line. */
3984 p += addr;
3985 /* goto fail; */
3986 }
3987 else {
3988 STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev);
3989 s = q;
3990 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3991 INC_OP;
3992 }
3993 JUMP_OUT;
3994
3995 CASE_OP(LOOK_BEHIND_NOT_END)
3996 STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
3997 INC_OP;
3998 goto fail;
3999
4000 #ifdef USE_CALL
4001 CASE_OP(CALL)
4002 addr = p->call.addr;
4003 INC_OP; STACK_PUSH_CALL_FRAME(p);
4004 p = reg->ops + addr;
4005 JUMP_OUT;
4006
4007 CASE_OP(RETURN)
4008 STACK_RETURN(p);
4009 STACK_PUSH_RETURN;
4010 JUMP_OUT;
4011 #endif
4012
4013 CASE_OP(PUSH_SAVE_VAL)
4014 {
4015 SaveType type;
4016
4017 type = p->push_save_val.type;
4018 mem = p->push_save_val.id; /* mem: save id */
4019 switch ((enum SaveType )type) {
4020 case SAVE_KEEP:
4021 STACK_PUSH_SAVE_VAL(mem, type, s);
4022 break;
4023
4024 case SAVE_S:
4025 STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
4026 break;
4027
4028 case SAVE_RIGHT_RANGE:
4029 STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
4030 break;
4031 }
4032 }
4033 INC_OP;
4034 JUMP_OUT;
4035
4036 CASE_OP(UPDATE_VAR)
4037 {
4038 UpdateVarType type;
4039 enum SaveType save_type;
4040
4041 type = p->update_var.type;
4042 mem = p->update_var.id; /* mem: save id */
4043
4044 switch ((enum UpdateVarType )type) {
4045 case UPDATE_VAR_KEEP_FROM_STACK_LAST:
4046 STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
4047 break;
4048 case UPDATE_VAR_S_FROM_STACK:
4049 STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
4050 break;
4051 case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
4052 save_type = SAVE_S;
4053 goto get_save_val_type_last_id;
4054 break;
4055 case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
4056 save_type = SAVE_RIGHT_RANGE;
4057 get_save_val_type_last_id:
4058 STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range);
4059 break;
4060 case UPDATE_VAR_RIGHT_RANGE_INIT:
4061 INIT_RIGHT_RANGE;
4062 break;
4063 }
4064 }
4065 INC_OP;
4066 JUMP_OUT;
4067
4068 #ifdef USE_CALLOUT
4069 CASE_OP(CALLOUT_CONTENTS)
4070 of = ONIG_CALLOUT_OF_CONTENTS;
4071 mem = p->callout_contents.num;
4072 goto callout_common_entry;
4073 BREAK_OUT;
4074
4075 CASE_OP(CALLOUT_NAME)
4076 {
4077 int call_result;
4078 int name_id;
4079 int in;
4080 CalloutListEntry* e;
4081 OnigCalloutFunc func;
4082 OnigCalloutArgs args;
4083
4084 of = ONIG_CALLOUT_OF_NAME;
4085 mem = p->callout_name.num;
4086
4087 callout_common_entry:
4088 e = onig_reg_callout_list_at(reg, mem);
4089 in = e->in;
4090 if (of == ONIG_CALLOUT_OF_NAME) {
4091 name_id = p->callout_name.id;
4092 func = onig_get_callout_start_func(reg, mem);
4093 }
4094 else {
4095 name_id = ONIG_NON_NAME_ID;
4096 func = msa->mp->progress_callout_of_contents;
4097 }
4098
4099 if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
4100 CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
4101 (int )mem, msa->mp->callout_user_data, args, call_result);
4102 switch (call_result) {
4103 case ONIG_CALLOUT_FAIL:
4104 goto fail;
4105 break;
4106 case ONIG_CALLOUT_SUCCESS:
4107 goto retraction_callout2;
4108 break;
4109 default: /* error code */
4110 if (call_result > 0) {
4111 call_result = ONIGERR_INVALID_ARGUMENT;
4112 }
4113 best_len = call_result;
4114 goto match_at_end;
4115 break;
4116 }
4117 }
4118 else {
4119 retraction_callout2:
4120 if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
4121 if (of == ONIG_CALLOUT_OF_NAME) {
4122 if (IS_NOT_NULL(func)) {
4123 STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
4124 }
4125 }
4126 else {
4127 func = msa->mp->retraction_callout_of_contents;
4128 if (IS_NOT_NULL(func)) {
4129 STACK_PUSH_CALLOUT_CONTENTS(mem, func);
4130 }
4131 }
4132 }
4133 }
4134 }
4135 INC_OP;
4136 JUMP_OUT;
4137 #endif
4138
4139 CASE_OP(FINISH)
4140 goto match_at_end;
4141
4142 #ifdef ONIG_DEBUG_STATISTICS
4143 fail:
4144 SOP_OUT;
4145 goto fail2;
4146 #endif
4147 CASE_OP(FAIL)
4148 #ifdef ONIG_DEBUG_STATISTICS
4149 fail2:
4150 #else
4151 fail:
4152 #endif
4153 STACK_POP;
4154 p = stk->u.state.pcode;
4155 s = stk->u.state.pstr;
4156 sprev = stk->u.state.pstr_prev;
4157 CHECK_RETRY_LIMIT_IN_MATCH;
4158 JUMP_OUT;
4159
4160 DEFAULT_OP
4161 MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
4162
4163 } BYTECODE_INTERPRETER_END;
4164
4165 match_at_end:
4166 STACK_SAVE;
4167 return best_len;
4168 }
4169
4170 typedef struct {
4171 regex_t* reg;
4172 OnigRegion* region;
4173 } RR;
4174
4175 struct OnigRegSetStruct {
4176 RR* rs;
4177 int n;
4178 int alloc;
4179 OnigEncoding enc;
4180 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
4181 OnigLen anc_dmin; /* (SEMI_)END_BUF anchor distance */
4182 OnigLen anc_dmax; /* (SEMI_)END_BUF anchor distance */
4183 int all_low_high;
4184 int anychar_inf;
4185 };
4186
4187 enum SearchRangeStatus {
4188 SRS_DEAD = 0,
4189 SRS_LOW_HIGH = 1,
4190 SRS_ALL_RANGE = 2
4191 };
4192
4193 typedef struct {
4194 int state; /* value of enum SearchRangeStatus */
4195 UChar* low;
4196 UChar* high;
4197 UChar* low_prev;
4198 UChar* sch_range;
4199 } SearchRange;
4200
4201 #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
4202 r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
4203 if (r != ONIG_MISMATCH) {\
4204 if (r >= 0) {\
4205 goto match;\
4206 }\
4207 else goto finish; /* error */ \
4208 }
4209
4210 static inline int
regset_search_body_position_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * orig_range,OnigOptionType option,MatchArg * msas,int * rmatch_pos)4211 regset_search_body_position_lead(OnigRegSet* set,
4212 const UChar* str, const UChar* end,
4213 const UChar* start, const UChar* range, /* match start range */
4214 const UChar* orig_range, /* data range */
4215 OnigOptionType option, MatchArg* msas, int* rmatch_pos)
4216 {
4217 int r, n, i;
4218 UChar *s, *prev;
4219 UChar *low, *high, *low_prev;
4220 UChar* sch_range;
4221 regex_t* reg;
4222 OnigEncoding enc;
4223 SearchRange* sr;
4224
4225 n = set->n;
4226 enc = set->enc;
4227
4228 s = (UChar* )start;
4229 if (s > str)
4230 prev = onigenc_get_prev_char_head(enc, str, s);
4231 else
4232 prev = (UChar* )NULL;
4233
4234 sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
4235 CHECK_NULL_RETURN_MEMERR(sr);
4236
4237 for (i = 0; i < n; i++) {
4238 reg = set->rs[i].reg;
4239
4240 sr[i].state = SRS_DEAD;
4241 if (reg->optimize != OPTIMIZE_NONE) {
4242 if (reg->dist_max != INFINITE_LEN) {
4243 if (end - range > reg->dist_max)
4244 sch_range = (UChar* )range + reg->dist_max;
4245 else
4246 sch_range = (UChar* )end;
4247
4248 if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) {
4249 sr[i].state = SRS_LOW_HIGH;
4250 sr[i].low = low;
4251 sr[i].high = high;
4252 sr[i].low_prev = low_prev;
4253 sr[i].sch_range = sch_range;
4254 }
4255 }
4256 else {
4257 sch_range = (UChar* )end;
4258 if (forward_search(reg, str, end, s, sch_range,
4259 &low, &high, (UChar** )NULL)) {
4260 goto total_active;
4261 }
4262 }
4263 }
4264 else {
4265 total_active:
4266 sr[i].state = SRS_ALL_RANGE;
4267 sr[i].low = s;
4268 sr[i].high = (UChar* )range;
4269 sr[i].low_prev = prev;
4270 }
4271 }
4272
4273 #define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN 500
4274
4275 if (set->all_low_high != 0
4276 && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
4277 do {
4278 int try_count = 0;
4279 for (i = 0; i < n; i++) {
4280 if (sr[i].state == SRS_DEAD) continue;
4281
4282 if (s < sr[i].low) continue;
4283 if (s >= sr[i].high) {
4284 if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4285 &low, &high, &low_prev) != 0) {
4286 sr[i].low = low;
4287 sr[i].high = high;
4288 sr[i].low_prev = low_prev;
4289 if (s < low) continue;
4290 }
4291 else {
4292 sr[i].state = SRS_DEAD;
4293 continue;
4294 }
4295 }
4296
4297 reg = set->rs[i].reg;
4298 REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4299 try_count++;
4300 } /* for (i) */
4301
4302 if (s >= range) break;
4303
4304 if (try_count == 0) {
4305 low = (UChar* )range;
4306 for (i = 0; i < n; i++) {
4307 if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
4308 low = sr[i].low;
4309 low_prev = sr[i].low_prev;
4310 }
4311 }
4312 if (low == range) break;
4313
4314 s = low;
4315 prev = low_prev;
4316 }
4317 else {
4318 prev = s;
4319 s += enclen(enc, s);
4320 }
4321 } while (1);
4322 }
4323 else {
4324 int prev_is_newline = 1;
4325 do {
4326 for (i = 0; i < n; i++) {
4327 if (sr[i].state == SRS_DEAD) continue;
4328 if (sr[i].state == SRS_LOW_HIGH) {
4329 if (s < sr[i].low) continue;
4330 if (s >= sr[i].high) {
4331 if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4332 &low, &high, &low_prev) != 0) {
4333 sr[i].low = low;
4334 sr[i].high = high;
4335 /* sr[i].low_prev = low_prev; */
4336 if (s < low) continue;
4337 }
4338 else {
4339 sr[i].state = SRS_DEAD;
4340 continue;
4341 }
4342 }
4343 }
4344
4345 reg = set->rs[i].reg;
4346 if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
4347 REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4348 }
4349 }
4350
4351 if (s >= range) break;
4352
4353 if (set->anychar_inf != 0)
4354 prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
4355
4356 prev = s;
4357 s += enclen(enc, s);
4358 } while (1);
4359 }
4360
4361 xfree(sr);
4362 return ONIG_MISMATCH;
4363
4364 finish:
4365 xfree(sr);
4366 return r;
4367
4368 match:
4369 xfree(sr);
4370 *rmatch_pos = (int )(s - str);
4371 return i;
4372 }
4373
4374 static inline int
regset_search_body_regex_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * orig_range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4375 regset_search_body_regex_lead(OnigRegSet* set,
4376 const UChar* str, const UChar* end,
4377 const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
4378 OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
4379 {
4380 int r;
4381 int i;
4382 int n;
4383 int match_index;
4384 const UChar* ep;
4385 regex_t* reg;
4386 OnigRegion* region;
4387
4388 n = set->n;
4389
4390 match_index = ONIG_MISMATCH;
4391 ep = orig_range;
4392 for (i = 0; i < n; i++) {
4393 reg = set->rs[i].reg;
4394 region = set->rs[i].region;
4395 r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
4396 if (r > 0) {
4397 if (str + r < ep) {
4398 match_index = i;
4399 *rmatch_pos = r;
4400 if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
4401 break;
4402
4403 ep = str + r;
4404 }
4405 }
4406 else if (r == 0) {
4407 match_index = i;
4408 *rmatch_pos = r;
4409 break;
4410 }
4411 }
4412
4413 return match_index;
4414 }
4415
4416 extern int
onig_regset_search_with_param(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4417 onig_regset_search_with_param(OnigRegSet* set,
4418 const UChar* str, const UChar* end,
4419 const UChar* start, const UChar* range,
4420 OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
4421 int* rmatch_pos)
4422 {
4423 int r;
4424 int i;
4425 UChar *s, *prev;
4426 regex_t* reg;
4427 OnigEncoding enc;
4428 OnigRegion* region;
4429 MatchArg* msas;
4430 const UChar *orig_start = start;
4431 const UChar *orig_range = range;
4432
4433 if (set->n == 0)
4434 return ONIG_MISMATCH;
4435
4436 if (IS_POSIX_REGION(option))
4437 return ONIGERR_INVALID_ARGUMENT;
4438
4439 r = 0;
4440 enc = set->enc;
4441 msas = (MatchArg* )NULL;
4442
4443 for (i = 0; i < set->n; i++) {
4444 reg = set->rs[i].reg;
4445 region = set->rs[i].region;
4446 ADJUST_MATCH_PARAM(reg, mps[i]);
4447 if (IS_NOT_NULL(region)) {
4448 r = onig_region_resize_clear(region, reg->num_mem + 1);
4449 if (r != 0) goto finish_no_msa;
4450 }
4451 }
4452
4453 if (start > end || start < str) goto mismatch_no_msa;
4454 if (str < end) {
4455 /* forward search only */
4456 if (range <= start)
4457 return ONIGERR_INVALID_ARGUMENT;
4458 }
4459
4460 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4461 if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
4462 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4463 goto finish_no_msa;
4464 }
4465 }
4466
4467 if (set->anchor != OPTIMIZE_NONE && str < end) {
4468 UChar *min_semi_end, *max_semi_end;
4469
4470 if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
4471 /* search start-position only */
4472 begin_position:
4473 range = start + 1;
4474 }
4475 else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
4476 /* search str-position only */
4477 if (start != str) goto mismatch_no_msa;
4478 range = str + 1;
4479 }
4480 else if ((set->anchor & ANCR_END_BUF) != 0) {
4481 min_semi_end = max_semi_end = (UChar* )end;
4482
4483 end_buf:
4484 if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
4485 goto mismatch_no_msa;
4486
4487 if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
4488 start = min_semi_end - set->anc_dmax;
4489 if (start < end)
4490 start = onigenc_get_right_adjust_char_head(enc, str, start);
4491 }
4492 if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
4493 range = max_semi_end - set->anc_dmin + 1;
4494 }
4495 if (start > range) goto mismatch_no_msa;
4496 }
4497 else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
4498 UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
4499
4500 max_semi_end = (UChar* )end;
4501 if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
4502 min_semi_end = pre_end;
4503
4504 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4505 pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
4506 if (IS_NOT_NULL(pre_end) &&
4507 ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
4508 min_semi_end = pre_end;
4509 }
4510 #endif
4511 if (min_semi_end > str && start <= min_semi_end) {
4512 goto end_buf;
4513 }
4514 }
4515 else {
4516 min_semi_end = (UChar* )end;
4517 goto end_buf;
4518 }
4519 }
4520 else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
4521 goto begin_position;
4522 }
4523 }
4524 else if (str == end) { /* empty string */
4525 start = end = str;
4526 s = (UChar* )start;
4527 prev = (UChar* )NULL;
4528
4529 msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4530 CHECK_NULL_RETURN_MEMERR(msas);
4531 for (i = 0; i < set->n; i++) {
4532 reg = set->rs[i].reg;
4533 MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
4534 }
4535 for (i = 0; i < set->n; i++) {
4536 reg = set->rs[i].reg;
4537 if (reg->threshold_len == 0) {
4538 REGSET_MATCH_AND_RETURN_CHECK(end);
4539 }
4540 }
4541
4542 goto mismatch;
4543 }
4544
4545 if (lead == ONIG_REGSET_POSITION_LEAD) {
4546 msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4547 CHECK_NULL_RETURN_MEMERR(msas);
4548
4549 for (i = 0; i < set->n; i++) {
4550 MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
4551 orig_start, mps[i]);
4552 }
4553
4554 r = regset_search_body_position_lead(set, str, end, start, range,
4555 orig_range, option, msas, rmatch_pos);
4556 }
4557 else {
4558 r = regset_search_body_regex_lead(set, str, end, start, orig_range,
4559 lead, option, mps, rmatch_pos);
4560 }
4561 if (r < 0) goto finish;
4562 else goto match2;
4563
4564 mismatch:
4565 r = ONIG_MISMATCH;
4566 finish:
4567 for (i = 0; i < set->n; i++) {
4568 if (IS_NOT_NULL(msas))
4569 MATCH_ARG_FREE(msas[i]);
4570 if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4571 IS_NOT_NULL(set->rs[i].region)) {
4572 onig_region_clear(set->rs[i].region);
4573 }
4574 }
4575 if (IS_NOT_NULL(msas)) xfree(msas);
4576 return r;
4577
4578 mismatch_no_msa:
4579 r = ONIG_MISMATCH;
4580 finish_no_msa:
4581 return r;
4582
4583 match:
4584 *rmatch_pos = (int )(s - str);
4585 match2:
4586 for (i = 0; i < set->n; i++) {
4587 if (IS_NOT_NULL(msas))
4588 MATCH_ARG_FREE(msas[i]);
4589 if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4590 IS_NOT_NULL(set->rs[i].region)) {
4591 onig_region_clear(set->rs[i].region);
4592 }
4593 }
4594 if (IS_NOT_NULL(msas)) xfree(msas);
4595 return r; /* regex index */
4596 }
4597
4598 extern int
onig_regset_search(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,int * rmatch_pos)4599 onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
4600 const UChar* start, const UChar* range,
4601 OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
4602 {
4603 int r;
4604 int i;
4605 OnigMatchParam* mp;
4606 OnigMatchParam** mps;
4607
4608 mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
4609 CHECK_NULL_RETURN_MEMERR(mps);
4610
4611 mp = (OnigMatchParam* )(mps + set->n);
4612
4613 for (i = 0; i < set->n; i++) {
4614 onig_initialize_match_param(mp + i);
4615 mps[i] = mp + i;
4616 }
4617
4618 r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
4619 rmatch_pos);
4620 for (i = 0; i < set->n; i++)
4621 onig_free_match_param_content(mp + i);
4622
4623 xfree(mps);
4624
4625 return r;
4626 }
4627
4628 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4629 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4630 const UChar* text, const UChar* text_end, UChar* text_range)
4631 {
4632 UChar *t, *p, *s, *end;
4633
4634 end = (UChar* )text_end;
4635 end -= target_end - target - 1;
4636 if (end > text_range)
4637 end = text_range;
4638
4639 s = (UChar* )text;
4640
4641 while (s < end) {
4642 if (*s == *target) {
4643 p = s + 1;
4644 t = target + 1;
4645 while (t < target_end) {
4646 if (*t != *p++)
4647 break;
4648 t++;
4649 }
4650 if (t == target_end)
4651 return s;
4652 }
4653 s += enclen(enc, s);
4654 }
4655
4656 return (UChar* )NULL;
4657 }
4658
4659 static int
str_lower_case_match(OnigEncoding enc,int case_fold_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)4660 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4661 const UChar* t, const UChar* tend,
4662 const UChar* p, const UChar* end)
4663 {
4664 int lowlen;
4665 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4666
4667 while (t < tend) {
4668 if (p >= end) return 0;
4669 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4670 q = lowbuf;
4671 while (lowlen > 0) {
4672 if (t >= tend) return 0;
4673 if (*t++ != *q++) return 0;
4674 lowlen--;
4675 }
4676 }
4677
4678 return 1;
4679 }
4680
4681 static UChar*
slow_search_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4682 slow_search_ic(OnigEncoding enc, int case_fold_flag,
4683 UChar* target, UChar* target_end,
4684 const UChar* text, const UChar* text_end, UChar* text_range)
4685 {
4686 UChar *s;
4687
4688 s = (UChar* )text;
4689
4690 while (s < text_range) {
4691 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4692 s, text_end))
4693 return s;
4694
4695 s += enclen(enc, s);
4696 }
4697
4698 return (UChar* )NULL;
4699 }
4700
4701 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4702 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4703 const UChar* text, const UChar* adjust_text,
4704 const UChar* text_end, const UChar* text_start)
4705 {
4706 UChar *t, *p, *s;
4707
4708 s = (UChar* )text_end;
4709 s -= (target_end - target);
4710 if (s > text_start)
4711 s = (UChar* )text_start;
4712 else
4713 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4714
4715 while (s >= text) {
4716 if (*s == *target) {
4717 p = s + 1;
4718 t = target + 1;
4719 while (t < target_end) {
4720 if (*t != *p++)
4721 break;
4722 t++;
4723 }
4724 if (t == target_end)
4725 return s;
4726 }
4727 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4728 }
4729
4730 return (UChar* )NULL;
4731 }
4732
4733 static UChar*
slow_search_backward_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4734 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4735 UChar* target, UChar* target_end,
4736 const UChar* text, const UChar* adjust_text,
4737 const UChar* text_end, const UChar* text_start)
4738 {
4739 UChar *s;
4740
4741 s = (UChar* )text_end;
4742 s -= (target_end - target);
4743 if (s > text_start)
4744 s = (UChar* )text_start;
4745 else
4746 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4747
4748 while (s >= text) {
4749 if (str_lower_case_match(enc, case_fold_flag,
4750 target, target_end, s, text_end))
4751 return s;
4752
4753 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4754 }
4755
4756 return (UChar* )NULL;
4757 }
4758
4759
4760 static UChar*
sunday_quick_search_step_forward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4761 sunday_quick_search_step_forward(regex_t* reg,
4762 const UChar* target, const UChar* target_end,
4763 const UChar* text, const UChar* text_end,
4764 const UChar* text_range)
4765 {
4766 const UChar *s, *se, *t, *p, *end;
4767 const UChar *tail;
4768 int skip, tlen1;
4769 int map_offset;
4770 OnigEncoding enc;
4771
4772 #ifdef ONIG_DEBUG_SEARCH
4773 fprintf(stderr,
4774 "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
4775 #endif
4776
4777 enc = reg->enc;
4778
4779 tail = target_end - 1;
4780 tlen1 = (int )(tail - target);
4781 end = text_range;
4782 if (end + tlen1 > text_end)
4783 end = text_end - tlen1;
4784
4785 map_offset = reg->map_offset;
4786 s = text;
4787
4788 while (s < end) {
4789 p = se = s + tlen1;
4790 t = tail;
4791 while (*p == *t) {
4792 if (t == target) return (UChar* )s;
4793 p--; t--;
4794 }
4795 if (se + map_offset >= text_end) break;
4796 skip = reg->map[*(se + map_offset)];
4797 #if 0
4798 t = s;
4799 do {
4800 s += enclen(enc, s);
4801 } while ((s - t) < skip && s < end);
4802 #else
4803 s += skip;
4804 if (s < end)
4805 s = onigenc_get_right_adjust_char_head(enc, text, s);
4806 #endif
4807 }
4808
4809 return (UChar* )NULL;
4810 }
4811
4812 static UChar*
sunday_quick_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4813 sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
4814 const UChar* text, const UChar* text_end,
4815 const UChar* text_range)
4816 {
4817 const UChar *s, *t, *p, *end;
4818 const UChar *tail;
4819 int map_offset;
4820
4821 end = text_range + (target_end - target);
4822 if (end > text_end)
4823 end = text_end;
4824
4825 map_offset = reg->map_offset;
4826 tail = target_end - 1;
4827 s = text + (tail - target);
4828
4829 while (s < end) {
4830 p = s;
4831 t = tail;
4832 while (*p == *t) {
4833 if (t == target) return (UChar* )p;
4834 p--; t--;
4835 }
4836 if (s + map_offset >= text_end) break;
4837 s += reg->map[*(s + map_offset)];
4838 }
4839
4840 return (UChar* )NULL;
4841 }
4842
4843 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)4844 map_search(OnigEncoding enc, UChar map[],
4845 const UChar* text, const UChar* text_range)
4846 {
4847 const UChar *s = text;
4848
4849 while (s < text_range) {
4850 if (map[*s]) return (UChar* )s;
4851
4852 s += enclen(enc, s);
4853 }
4854 return (UChar* )NULL;
4855 }
4856
4857 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)4858 map_search_backward(OnigEncoding enc, UChar map[],
4859 const UChar* text, const UChar* adjust_text,
4860 const UChar* text_start)
4861 {
4862 const UChar *s = text_start;
4863
4864 while (s >= text) {
4865 if (map[*s]) return (UChar* )s;
4866
4867 s = onigenc_get_prev_char_head(enc, adjust_text, s);
4868 }
4869 return (UChar* )NULL;
4870 }
4871 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)4872 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
4873 OnigRegion* region, OnigOptionType option)
4874 {
4875 int r;
4876 OnigMatchParam mp;
4877
4878 onig_initialize_match_param(&mp);
4879 r = onig_match_with_param(reg, str, end, at, region, option, &mp);
4880 onig_free_match_param_content(&mp);
4881 return r;
4882 }
4883
4884 extern int
onig_match_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)4885 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
4886 const UChar* at, OnigRegion* region, OnigOptionType option,
4887 OnigMatchParam* mp)
4888 {
4889 int r;
4890 UChar *prev;
4891 MatchArg msa;
4892
4893 ADJUST_MATCH_PARAM(reg, mp);
4894 MATCH_ARG_INIT(msa, reg, option, region, at, mp);
4895 if (region
4896 #ifdef USE_POSIX_API_REGION_OPTION
4897 && !IS_POSIX_REGION(option)
4898 #endif
4899 ) {
4900 r = onig_region_resize_clear(region, reg->num_mem + 1);
4901 }
4902 else
4903 r = 0;
4904
4905 if (r == 0) {
4906 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4907 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
4908 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4909 goto end;
4910 }
4911 }
4912
4913 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
4914 r = match_at(reg, str, end, end, at, prev, &msa);
4915 }
4916
4917 end:
4918 MATCH_ARG_FREE(msa);
4919 return r;
4920 }
4921
4922 static int
forward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * start,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)4923 forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
4924 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4925 {
4926 UChar *p, *pprev = (UChar* )NULL;
4927
4928 #ifdef ONIG_DEBUG_SEARCH
4929 fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
4930 str, end, start, range);
4931 #endif
4932
4933 p = start;
4934 if (reg->dist_min != 0) {
4935 if (end - p <= reg->dist_min)
4936 return 0; /* fail */
4937
4938 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4939 p += reg->dist_min;
4940 }
4941 else {
4942 UChar *q = p + reg->dist_min;
4943 while (p < q) p += enclen(reg->enc, p);
4944 }
4945 }
4946
4947 retry:
4948 switch (reg->optimize) {
4949 case OPTIMIZE_STR:
4950 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4951 break;
4952 case OPTIMIZE_STR_CASE_FOLD:
4953 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4954 reg->exact, reg->exact_end, p, end, range);
4955 break;
4956
4957 case OPTIMIZE_STR_FAST:
4958 p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
4959 break;
4960
4961 case OPTIMIZE_STR_FAST_STEP_FORWARD:
4962 p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
4963 p, end, range);
4964 break;
4965
4966 case OPTIMIZE_MAP:
4967 p = map_search(reg->enc, reg->map, p, range);
4968 break;
4969 }
4970
4971 if (p && p < range) {
4972 if (p - start < reg->dist_min) {
4973 retry_gate:
4974 pprev = p;
4975 p += enclen(reg->enc, p);
4976 goto retry;
4977 }
4978
4979 if (reg->sub_anchor) {
4980 UChar* prev;
4981
4982 switch (reg->sub_anchor) {
4983 case ANCR_BEGIN_LINE:
4984 if (!ON_STR_BEGIN(p)) {
4985 prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
4986 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4987 goto retry_gate;
4988 }
4989 break;
4990
4991 case ANCR_END_LINE:
4992 if (ON_STR_END(p)) {
4993 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4994 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4995 (pprev ? pprev : str), p);
4996 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4997 goto retry_gate;
4998 #endif
4999 }
5000 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5001 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5002 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5003 #endif
5004 )
5005 goto retry_gate;
5006
5007 break;
5008 }
5009 }
5010
5011 if (reg->dist_max == 0) {
5012 *low = p;
5013 if (low_prev) {
5014 if (*low > start)
5015 *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);
5016 else
5017 *low_prev = onigenc_get_prev_char_head(reg->enc,
5018 (pprev ? pprev : str), p);
5019 }
5020 *high = p;
5021 }
5022 else {
5023 if (reg->dist_max != INFINITE_LEN) {
5024 if (p - str < reg->dist_max) {
5025 *low = (UChar* )str;
5026 if (low_prev)
5027 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
5028 }
5029 else {
5030 *low = p - reg->dist_max;
5031 if (*low > start) {
5032 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,
5033 *low, (const UChar** )low_prev);
5034 }
5035 else {
5036 if (low_prev)
5037 *low_prev = onigenc_get_prev_char_head(reg->enc,
5038 (pprev ? pprev : str), *low);
5039 }
5040 }
5041 }
5042 /* no needs to adjust *high, *high is used as range check only */
5043 if (p - str < reg->dist_min)
5044 *high = (UChar* )str;
5045 else
5046 *high = p - reg->dist_min;
5047 }
5048
5049 #ifdef ONIG_DEBUG_SEARCH
5050 fprintf(stderr,
5051 "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
5052 (int )(*low - str), (int )(*high - str),
5053 reg->dist_min, reg->dist_max);
5054 #endif
5055 return 1; /* success */
5056 }
5057
5058 return 0; /* fail */
5059 }
5060
5061
5062 static int
backward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)5063 backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
5064 const UChar* range, UChar* adjrange, UChar** low, UChar** high)
5065 {
5066 UChar *p;
5067
5068 p = s;
5069
5070 retry:
5071 switch (reg->optimize) {
5072 case OPTIMIZE_STR:
5073 exact_method:
5074 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5075 range, adjrange, end, p);
5076 break;
5077
5078 case OPTIMIZE_STR_CASE_FOLD:
5079 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
5080 reg->exact, reg->exact_end,
5081 range, adjrange, end, p);
5082 break;
5083
5084 case OPTIMIZE_STR_FAST:
5085 case OPTIMIZE_STR_FAST_STEP_FORWARD:
5086 goto exact_method;
5087 break;
5088
5089 case OPTIMIZE_MAP:
5090 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
5091 break;
5092 }
5093
5094 if (p) {
5095 if (reg->sub_anchor) {
5096 UChar* prev;
5097
5098 switch (reg->sub_anchor) {
5099 case ANCR_BEGIN_LINE:
5100 if (!ON_STR_BEGIN(p)) {
5101 prev = onigenc_get_prev_char_head(reg->enc, str, p);
5102 if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5103 p = prev;
5104 goto retry;
5105 }
5106 }
5107 break;
5108
5109 case ANCR_END_LINE:
5110 if (ON_STR_END(p)) {
5111 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5112 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5113 if (IS_NULL(prev)) goto fail;
5114 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5115 p = prev;
5116 goto retry;
5117 }
5118 #endif
5119 }
5120 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5121 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5122 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5123 #endif
5124 ) {
5125 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5126 if (IS_NULL(p)) goto fail;
5127 goto retry;
5128 }
5129 break;
5130 }
5131 }
5132
5133 if (reg->dist_max != INFINITE_LEN) {
5134 if (p - str < reg->dist_max)
5135 *low = (UChar* )str;
5136 else
5137 *low = p - reg->dist_max;
5138
5139 if (reg->dist_min != 0) {
5140 if (p - str < reg->dist_min)
5141 *high = (UChar* )str;
5142 else
5143 *high = p - reg->dist_min;
5144 }
5145 else {
5146 *high = p;
5147 }
5148
5149 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
5150 }
5151
5152 #ifdef ONIG_DEBUG_SEARCH
5153 fprintf(stderr, "backward_search: low: %d, high: %d\n",
5154 (int )(*low - str), (int )(*high - str));
5155 #endif
5156 return 1; /* success */
5157 }
5158
5159 fail:
5160 #ifdef ONIG_DEBUG_SEARCH
5161 fprintf(stderr, "backward_search: fail.\n");
5162 #endif
5163 return 0; /* fail */
5164 }
5165
5166
5167 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)5168 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5169 const UChar* start, const UChar* range, OnigRegion* region,
5170 OnigOptionType option)
5171 {
5172 int r;
5173 OnigMatchParam mp;
5174 const UChar* data_range;
5175
5176 onig_initialize_match_param(&mp);
5177
5178 /* The following is an expanded code of onig_search_with_param() */
5179 if (range > start)
5180 data_range = range;
5181 else
5182 data_range = end;
5183
5184 r = search_in_range(reg, str, end, start, range, data_range, region,
5185 option, &mp);
5186
5187 onig_free_match_param_content(&mp);
5188 return r;
5189
5190 }
5191
5192 static int
search_in_range(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * data_range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5193 search_in_range(regex_t* reg, const UChar* str, const UChar* end,
5194 const UChar* start, const UChar* range, /* match start range */
5195 const UChar* data_range, /* subject string range */
5196 OnigRegion* region,
5197 OnigOptionType option, OnigMatchParam* mp)
5198 {
5199 int r;
5200 UChar *s, *prev;
5201 MatchArg msa;
5202 const UChar *orig_start = start;
5203
5204 #ifdef ONIG_DEBUG_SEARCH
5205 fprintf(stderr,
5206 "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
5207 str, (int )(end - str), (int )(start - str), (int )(range - str));
5208 #endif
5209
5210 ADJUST_MATCH_PARAM(reg, mp);
5211
5212 if (region
5213 #ifdef USE_POSIX_API_REGION_OPTION
5214 && !IS_POSIX_REGION(option)
5215 #endif
5216 ) {
5217 r = onig_region_resize_clear(region, reg->num_mem + 1);
5218 if (r != 0) goto finish_no_msa;
5219 }
5220
5221 if (start > end || start < str) goto mismatch_no_msa;
5222
5223 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
5224 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5225 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5226 goto finish_no_msa;
5227 }
5228 }
5229
5230
5231 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5232 #define MATCH_AND_RETURN_CHECK(upper_range) \
5233 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5234 if (r != ONIG_MISMATCH) {\
5235 if (r >= 0) {\
5236 if (! IS_FIND_LONGEST(reg->options)) {\
5237 goto match;\
5238 }\
5239 }\
5240 else goto finish; /* error */ \
5241 }
5242 #else
5243 #define MATCH_AND_RETURN_CHECK(upper_range) \
5244 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5245 if (r != ONIG_MISMATCH) {\
5246 if (r >= 0) {\
5247 goto match;\
5248 }\
5249 else goto finish; /* error */ \
5250 }
5251 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5252
5253
5254 /* anchor optimize: resume search range */
5255 if (reg->anchor != 0 && str < end) {
5256 UChar *min_semi_end, *max_semi_end;
5257
5258 if (reg->anchor & ANCR_BEGIN_POSITION) {
5259 /* search start-position only */
5260 begin_position:
5261 if (range > start)
5262 range = start + 1;
5263 else
5264 range = start;
5265 }
5266 else if (reg->anchor & ANCR_BEGIN_BUF) {
5267 /* search str-position only */
5268 if (range > start) {
5269 if (start != str) goto mismatch_no_msa;
5270 range = str + 1;
5271 }
5272 else {
5273 if (range <= str) {
5274 start = str;
5275 range = str;
5276 }
5277 else
5278 goto mismatch_no_msa;
5279 }
5280 }
5281 else if (reg->anchor & ANCR_END_BUF) {
5282 min_semi_end = max_semi_end = (UChar* )end;
5283
5284 end_buf:
5285 if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
5286 goto mismatch_no_msa;
5287
5288 if (range > start) {
5289 if (reg->anc_dist_max != INFINITE_LEN &&
5290 min_semi_end - start > reg->anc_dist_max) {
5291 start = min_semi_end - reg->anc_dist_max;
5292 if (start < end)
5293 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
5294 }
5295 if (max_semi_end - (range - 1) < reg->anc_dist_min) {
5296 if (max_semi_end - str + 1 < reg->anc_dist_min)
5297 goto mismatch_no_msa;
5298 else
5299 range = max_semi_end - reg->anc_dist_min + 1;
5300 }
5301
5302 if (start > range) goto mismatch_no_msa;
5303 /* If start == range, match with empty at end.
5304 Backward search is used. */
5305 }
5306 else {
5307 if (reg->anc_dist_max != INFINITE_LEN &&
5308 min_semi_end - range > reg->anc_dist_max) {
5309 range = min_semi_end - reg->anc_dist_max;
5310 }
5311 if (max_semi_end - start < reg->anc_dist_min) {
5312 if (max_semi_end - str < reg->anc_dist_min)
5313 goto mismatch_no_msa;
5314 else {
5315 start = max_semi_end - reg->anc_dist_min;
5316 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
5317 }
5318 }
5319 if (range > start) goto mismatch_no_msa;
5320 }
5321 }
5322 else if (reg->anchor & ANCR_SEMI_END_BUF) {
5323 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
5324
5325 max_semi_end = (UChar* )end;
5326 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5327 min_semi_end = pre_end;
5328
5329 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5330 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
5331 if (IS_NOT_NULL(pre_end) &&
5332 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5333 min_semi_end = pre_end;
5334 }
5335 #endif
5336 if (min_semi_end > str && start <= min_semi_end) {
5337 goto end_buf;
5338 }
5339 }
5340 else {
5341 min_semi_end = (UChar* )end;
5342 goto end_buf;
5343 }
5344 }
5345 else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) {
5346 goto begin_position;
5347 }
5348 }
5349 else if (str == end) { /* empty string */
5350 static const UChar* address_for_empty_string = (UChar* )"";
5351
5352 #ifdef ONIG_DEBUG_SEARCH
5353 fprintf(stderr, "onig_search: empty string.\n");
5354 #endif
5355
5356 if (reg->threshold_len == 0) {
5357 start = end = str = address_for_empty_string;
5358 s = (UChar* )start;
5359 prev = (UChar* )NULL;
5360
5361 MATCH_ARG_INIT(msa, reg, option, region, start, mp);
5362 MATCH_AND_RETURN_CHECK(end);
5363 goto mismatch;
5364 }
5365 goto mismatch_no_msa;
5366 }
5367
5368 #ifdef ONIG_DEBUG_SEARCH
5369 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5370 (int )(end - str), (int )(start - str), (int )(range - str));
5371 #endif
5372
5373 MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
5374
5375 s = (UChar* )start;
5376 if (range > start) { /* forward search */
5377 if (s > str)
5378 prev = onigenc_get_prev_char_head(reg->enc, str, s);
5379 else
5380 prev = (UChar* )NULL;
5381
5382 if (reg->optimize != OPTIMIZE_NONE) {
5383 UChar *sch_range, *low, *high, *low_prev;
5384
5385 if (reg->dist_max != 0) {
5386 if (reg->dist_max == INFINITE_LEN)
5387 sch_range = (UChar* )end;
5388 else {
5389 if ((end - range) < reg->dist_max)
5390 sch_range = (UChar* )end;
5391 else {
5392 sch_range = (UChar* )range + reg->dist_max;
5393 }
5394 }
5395 }
5396 else
5397 sch_range = (UChar* )range;
5398
5399 if ((end - start) < reg->threshold_len)
5400 goto mismatch;
5401
5402 if (reg->dist_max != INFINITE_LEN) {
5403 do {
5404 if (! forward_search(reg, str, end, s, sch_range, &low, &high,
5405 &low_prev)) goto mismatch;
5406 if (s < low) {
5407 s = low;
5408 prev = low_prev;
5409 }
5410 while (s <= high) {
5411 MATCH_AND_RETURN_CHECK(data_range);
5412 prev = s;
5413 s += enclen(reg->enc, s);
5414 }
5415 } while (s < range);
5416 goto mismatch;
5417 }
5418 else { /* check only. */
5419 if (! forward_search(reg, str, end, s, sch_range, &low, &high,
5420 (UChar** )NULL)) goto mismatch;
5421
5422 if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {
5423 do {
5424 MATCH_AND_RETURN_CHECK(data_range);
5425 prev = s;
5426 s += enclen(reg->enc, s);
5427
5428 if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
5429 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
5430 prev = s;
5431 s += enclen(reg->enc, s);
5432 }
5433 }
5434 } while (s < range);
5435 goto mismatch;
5436 }
5437 }
5438 }
5439
5440 do {
5441 MATCH_AND_RETURN_CHECK(data_range);
5442 prev = s;
5443 s += enclen(reg->enc, s);
5444 } while (s < range);
5445
5446 if (s == range) { /* because empty match with /$/. */
5447 MATCH_AND_RETURN_CHECK(data_range);
5448 }
5449 }
5450 else { /* backward search */
5451 if (range < str) goto mismatch;
5452
5453 if (orig_start < end)
5454 orig_start += enclen(reg->enc, orig_start); /* is upper range */
5455
5456 if (reg->optimize != OPTIMIZE_NONE) {
5457 UChar *low, *high, *adjrange, *sch_start;
5458 const UChar *min_range;
5459
5460 if ((end - range) < reg->threshold_len) goto mismatch;
5461
5462 if (range < end)
5463 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
5464 else
5465 adjrange = (UChar* )end;
5466
5467 if (end - range > reg->dist_min)
5468 min_range = range + reg->dist_min;
5469 else
5470 min_range = end;
5471
5472 if (reg->dist_max != INFINITE_LEN) {
5473 do {
5474 if (end - s > reg->dist_max)
5475 sch_start = s + reg->dist_max;
5476 else {
5477 sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5478 }
5479
5480 if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5481 &low, &high) <= 0)
5482 goto mismatch;
5483
5484 if (s > high)
5485 s = high;
5486
5487 while (s >= low) {
5488 prev = onigenc_get_prev_char_head(reg->enc, str, s);
5489 MATCH_AND_RETURN_CHECK(orig_start);
5490 s = prev;
5491 }
5492 } while (s >= range);
5493 goto mismatch;
5494 }
5495 else { /* check only. */
5496 sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5497
5498 if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5499 &low, &high) <= 0) goto mismatch;
5500 }
5501 }
5502
5503 do {
5504 prev = onigenc_get_prev_char_head(reg->enc, str, s);
5505 MATCH_AND_RETURN_CHECK(orig_start);
5506 s = prev;
5507 } while (s >= range);
5508 }
5509
5510 mismatch:
5511 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5512 if (IS_FIND_LONGEST(reg->options)) {
5513 if (msa.best_len >= 0) {
5514 s = msa.best_s;
5515 goto match;
5516 }
5517 }
5518 #endif
5519 r = ONIG_MISMATCH;
5520
5521 finish:
5522 MATCH_ARG_FREE(msa);
5523
5524 /* If result is mismatch and no FIND_NOT_EMPTY option,
5525 then the region is not set in match_at(). */
5526 if (IS_FIND_NOT_EMPTY(reg->options) && region
5527 #ifdef USE_POSIX_API_REGION_OPTION
5528 && !IS_POSIX_REGION(option)
5529 #endif
5530 ) {
5531 onig_region_clear(region);
5532 }
5533
5534 #ifdef ONIG_DEBUG
5535 if (r != ONIG_MISMATCH)
5536 fprintf(stderr, "onig_search: error %d\n", r);
5537 #endif
5538 return r;
5539
5540 mismatch_no_msa:
5541 r = ONIG_MISMATCH;
5542 finish_no_msa:
5543 #ifdef ONIG_DEBUG
5544 if (r != ONIG_MISMATCH)
5545 fprintf(stderr, "onig_search: error %d\n", r);
5546 #endif
5547 return r;
5548
5549 match:
5550 MATCH_ARG_FREE(msa);
5551 return (int )(s - str);
5552 }
5553
5554 extern int
onig_search_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5555 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
5556 const UChar* start, const UChar* range, OnigRegion* region,
5557 OnigOptionType option, OnigMatchParam* mp)
5558 {
5559 const UChar* data_range;
5560
5561 if (range > start)
5562 data_range = range;
5563 else
5564 data_range = end;
5565
5566 return search_in_range(reg, str, end, start, range, data_range, region,
5567 option, mp);
5568 }
5569
5570 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)5571 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5572 OnigRegion* region, OnigOptionType option,
5573 int (*scan_callback)(int, int, OnigRegion*, void*),
5574 void* callback_arg)
5575 {
5576 int r;
5577 int n;
5578 int rs;
5579 const UChar* start;
5580
5581 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
5582 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5583 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5584
5585 ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5586 }
5587
5588 n = 0;
5589 start = str;
5590 while (1) {
5591 r = onig_search(reg, str, end, start, end, region, option);
5592 if (r >= 0) {
5593 rs = scan_callback(n, r, region, callback_arg);
5594 n++;
5595 if (rs != 0)
5596 return rs;
5597
5598 if (region->end[0] == start - str) {
5599 if (start >= end) break;
5600 start += enclen(reg->enc, start);
5601 }
5602 else
5603 start = str + region->end[0];
5604
5605 if (start > end)
5606 break;
5607 }
5608 else if (r == ONIG_MISMATCH) {
5609 break;
5610 }
5611 else { /* error */
5612 return r;
5613 }
5614 }
5615
5616 return n;
5617 }
5618
5619 extern OnigEncoding
onig_get_encoding(regex_t * reg)5620 onig_get_encoding(regex_t* reg)
5621 {
5622 return reg->enc;
5623 }
5624
5625 extern OnigOptionType
onig_get_options(regex_t * reg)5626 onig_get_options(regex_t* reg)
5627 {
5628 return reg->options;
5629 }
5630
5631 extern OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)5632 onig_get_case_fold_flag(regex_t* reg)
5633 {
5634 return reg->case_fold_flag;
5635 }
5636
5637 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)5638 onig_get_syntax(regex_t* reg)
5639 {
5640 return reg->syntax;
5641 }
5642
5643 extern int
onig_number_of_captures(regex_t * reg)5644 onig_number_of_captures(regex_t* reg)
5645 {
5646 return reg->num_mem;
5647 }
5648
5649 extern int
onig_number_of_capture_histories(regex_t * reg)5650 onig_number_of_capture_histories(regex_t* reg)
5651 {
5652 #ifdef USE_CAPTURE_HISTORY
5653 int i, n;
5654
5655 n = 0;
5656 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5657 if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5658 n++;
5659 }
5660 return n;
5661 #else
5662 return 0;
5663 #endif
5664 }
5665
5666 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)5667 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5668 {
5669 *to = *from;
5670 }
5671
5672 extern int
onig_regset_new(OnigRegSet ** rset,int n,regex_t * regs[])5673 onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
5674 {
5675 #define REGSET_INITIAL_ALLOC_SIZE 10
5676
5677 int i;
5678 int r;
5679 int alloc;
5680 OnigRegSet* set;
5681 RR* rs;
5682
5683 *rset = 0;
5684
5685 set = (OnigRegSet* )xmalloc(sizeof(*set));
5686 CHECK_NULL_RETURN_MEMERR(set);
5687
5688 alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
5689 rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
5690 if (IS_NULL(rs)) {
5691 xfree(set);
5692 return ONIGERR_MEMORY;
5693 }
5694
5695 set->rs = rs;
5696 set->n = 0;
5697 set->alloc = alloc;
5698
5699 for (i = 0; i < n; i++) {
5700 regex_t* reg = regs[i];
5701
5702 r = onig_regset_add(set, reg);
5703 if (r != 0) {
5704 for (i = 0; i < set->n; i++) {
5705 OnigRegion* region = set->rs[i].region;
5706 if (IS_NOT_NULL(region))
5707 onig_region_free(region, 1);
5708 }
5709 xfree(set->rs);
5710 xfree(set);
5711 return r;
5712 }
5713 }
5714
5715 *rset = set;
5716 return 0;
5717 }
5718
5719 static void
update_regset_by_reg(OnigRegSet * set,regex_t * reg)5720 update_regset_by_reg(OnigRegSet* set, regex_t* reg)
5721 {
5722 if (set->n == 1) {
5723 set->enc = reg->enc;
5724 set->anchor = reg->anchor;
5725 set->anc_dmin = reg->anc_dist_min;
5726 set->anc_dmax = reg->anc_dist_max;
5727 set->all_low_high =
5728 (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
5729 set->anychar_inf = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
5730 }
5731 else {
5732 int anchor;
5733
5734 anchor = set->anchor & reg->anchor;
5735 if (anchor != 0) {
5736 OnigLen anc_dmin;
5737 OnigLen anc_dmax;
5738
5739 anc_dmin = set->anc_dmin;
5740 anc_dmax = set->anc_dmax;
5741 if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
5742 if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
5743 set->anc_dmin = anc_dmin;
5744 set->anc_dmax = anc_dmax;
5745 }
5746
5747 set->anchor = anchor;
5748
5749 if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
5750 set->all_low_high = 0;
5751
5752 if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
5753 set->anychar_inf = 1;
5754 }
5755 }
5756
5757 extern int
onig_regset_add(OnigRegSet * set,regex_t * reg)5758 onig_regset_add(OnigRegSet* set, regex_t* reg)
5759 {
5760 OnigRegion* region;
5761
5762 if (IS_FIND_LONGEST(reg->options))
5763 return ONIGERR_INVALID_ARGUMENT;
5764
5765 if (set->n != 0 && reg->enc != set->enc)
5766 return ONIGERR_INVALID_ARGUMENT;
5767
5768 if (set->n >= set->alloc) {
5769 RR* nrs;
5770 int new_alloc;
5771
5772 new_alloc = set->alloc * 2;
5773 nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
5774 CHECK_NULL_RETURN_MEMERR(nrs);
5775
5776 set->rs = nrs;
5777 set->alloc = new_alloc;
5778 }
5779
5780 region = onig_region_new();
5781 CHECK_NULL_RETURN_MEMERR(region);
5782
5783 set->rs[set->n].reg = reg;
5784 set->rs[set->n].region = region;
5785 set->n++;
5786
5787 update_regset_by_reg(set, reg);
5788 return 0;
5789 }
5790
5791 extern int
onig_regset_replace(OnigRegSet * set,int at,regex_t * reg)5792 onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
5793 {
5794 int i;
5795
5796 if (at < 0 || at >= set->n)
5797 return ONIGERR_INVALID_ARGUMENT;
5798
5799 if (IS_NULL(reg)) {
5800 onig_region_free(set->rs[at].region, 1);
5801 for (i = at; i < set->n - 1; i++) {
5802 set->rs[i].reg = set->rs[i+1].reg;
5803 set->rs[i].region = set->rs[i+1].region;
5804 }
5805 set->n--;
5806 }
5807 else {
5808 if (IS_FIND_LONGEST(reg->options))
5809 return ONIGERR_INVALID_ARGUMENT;
5810
5811 if (set->n > 1 && reg->enc != set->enc)
5812 return ONIGERR_INVALID_ARGUMENT;
5813
5814 set->rs[at].reg = reg;
5815 }
5816
5817 for (i = 0; i < set->n; i++)
5818 update_regset_by_reg(set, set->rs[i].reg);
5819
5820 return 0;
5821 }
5822
5823 extern void
onig_regset_free(OnigRegSet * set)5824 onig_regset_free(OnigRegSet* set)
5825 {
5826 int i;
5827
5828 for (i = 0; i < set->n; i++) {
5829 regex_t* reg;
5830 OnigRegion* region;
5831
5832 reg = set->rs[i].reg;
5833 region = set->rs[i].region;
5834 onig_free(reg);
5835 if (IS_NOT_NULL(region))
5836 onig_region_free(region, 1);
5837 }
5838
5839 xfree(set->rs);
5840 xfree(set);
5841 }
5842
5843 extern int
onig_regset_number_of_regex(OnigRegSet * set)5844 onig_regset_number_of_regex(OnigRegSet* set)
5845 {
5846 return set->n;
5847 }
5848
5849 extern regex_t*
onig_regset_get_regex(OnigRegSet * set,int at)5850 onig_regset_get_regex(OnigRegSet* set, int at)
5851 {
5852 if (at < 0 || at >= set->n)
5853 return (regex_t* )0;
5854
5855 return set->rs[at].reg;
5856 }
5857
5858 extern OnigRegion*
onig_regset_get_region(OnigRegSet * set,int at)5859 onig_regset_get_region(OnigRegSet* set, int at)
5860 {
5861 if (at < 0 || at >= set->n)
5862 return (OnigRegion* )0;
5863
5864 return set->rs[at].region;
5865 }
5866
5867
5868 #ifdef USE_DIRECT_THREADED_CODE
5869 extern int
onig_init_for_match_at(regex_t * reg)5870 onig_init_for_match_at(regex_t* reg)
5871 {
5872 return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
5873 (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
5874 (MatchArg* )NULL);
5875 }
5876 #endif
5877
5878
5879 /* for callout functions */
5880
5881 #ifdef USE_CALLOUT
5882
5883 extern OnigCalloutFunc
onig_get_progress_callout(void)5884 onig_get_progress_callout(void)
5885 {
5886 return DefaultProgressCallout;
5887 }
5888
5889 extern int
onig_set_progress_callout(OnigCalloutFunc f)5890 onig_set_progress_callout(OnigCalloutFunc f)
5891 {
5892 DefaultProgressCallout = f;
5893 return ONIG_NORMAL;
5894 }
5895
5896 extern OnigCalloutFunc
onig_get_retraction_callout(void)5897 onig_get_retraction_callout(void)
5898 {
5899 return DefaultRetractionCallout;
5900 }
5901
5902 extern int
onig_set_retraction_callout(OnigCalloutFunc f)5903 onig_set_retraction_callout(OnigCalloutFunc f)
5904 {
5905 DefaultRetractionCallout = f;
5906 return ONIG_NORMAL;
5907 }
5908
5909 extern int
onig_get_callout_num_by_callout_args(OnigCalloutArgs * args)5910 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
5911 {
5912 return args->num;
5913 }
5914
5915 extern OnigCalloutIn
onig_get_callout_in_by_callout_args(OnigCalloutArgs * args)5916 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
5917 {
5918 return args->in;
5919 }
5920
5921 extern int
onig_get_name_id_by_callout_args(OnigCalloutArgs * args)5922 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
5923 {
5924 return args->name_id;
5925 }
5926
5927 extern const UChar*
onig_get_contents_by_callout_args(OnigCalloutArgs * args)5928 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
5929 {
5930 int num;
5931 CalloutListEntry* e;
5932
5933 num = args->num;
5934 e = onig_reg_callout_list_at(args->regex, num);
5935 if (IS_NULL(e)) return 0;
5936 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5937 return e->u.content.start;
5938 }
5939
5940 return 0;
5941 }
5942
5943 extern const UChar*
onig_get_contents_end_by_callout_args(OnigCalloutArgs * args)5944 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
5945 {
5946 int num;
5947 CalloutListEntry* e;
5948
5949 num = args->num;
5950 e = onig_reg_callout_list_at(args->regex, num);
5951 if (IS_NULL(e)) return 0;
5952 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5953 return e->u.content.end;
5954 }
5955
5956 return 0;
5957 }
5958
5959 extern int
onig_get_args_num_by_callout_args(OnigCalloutArgs * args)5960 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
5961 {
5962 int num;
5963 CalloutListEntry* e;
5964
5965 num = args->num;
5966 e = onig_reg_callout_list_at(args->regex, num);
5967 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5968 if (e->of == ONIG_CALLOUT_OF_NAME) {
5969 return e->u.arg.num;
5970 }
5971
5972 return ONIGERR_INVALID_ARGUMENT;
5973 }
5974
5975 extern int
onig_get_passed_args_num_by_callout_args(OnigCalloutArgs * args)5976 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
5977 {
5978 int num;
5979 CalloutListEntry* e;
5980
5981 num = args->num;
5982 e = onig_reg_callout_list_at(args->regex, num);
5983 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5984 if (e->of == ONIG_CALLOUT_OF_NAME) {
5985 return e->u.arg.passed_num;
5986 }
5987
5988 return ONIGERR_INVALID_ARGUMENT;
5989 }
5990
5991 extern int
onig_get_arg_by_callout_args(OnigCalloutArgs * args,int index,OnigType * type,OnigValue * val)5992 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
5993 OnigType* type, OnigValue* val)
5994 {
5995 int num;
5996 CalloutListEntry* e;
5997
5998 num = args->num;
5999 e = onig_reg_callout_list_at(args->regex, num);
6000 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6001 if (e->of == ONIG_CALLOUT_OF_NAME) {
6002 if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
6003 if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
6004 return ONIG_NORMAL;
6005 }
6006
6007 return ONIGERR_INVALID_ARGUMENT;
6008 }
6009
6010 extern const UChar*
onig_get_string_by_callout_args(OnigCalloutArgs * args)6011 onig_get_string_by_callout_args(OnigCalloutArgs* args)
6012 {
6013 return args->string;
6014 }
6015
6016 extern const UChar*
onig_get_string_end_by_callout_args(OnigCalloutArgs * args)6017 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
6018 {
6019 return args->string_end;
6020 }
6021
6022 extern const UChar*
onig_get_start_by_callout_args(OnigCalloutArgs * args)6023 onig_get_start_by_callout_args(OnigCalloutArgs* args)
6024 {
6025 return args->start;
6026 }
6027
6028 extern const UChar*
onig_get_right_range_by_callout_args(OnigCalloutArgs * args)6029 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
6030 {
6031 return args->right_range;
6032 }
6033
6034 extern const UChar*
onig_get_current_by_callout_args(OnigCalloutArgs * args)6035 onig_get_current_by_callout_args(OnigCalloutArgs* args)
6036 {
6037 return args->current;
6038 }
6039
6040 extern OnigRegex
onig_get_regex_by_callout_args(OnigCalloutArgs * args)6041 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
6042 {
6043 return args->regex;
6044 }
6045
6046 extern unsigned long
onig_get_retry_counter_by_callout_args(OnigCalloutArgs * args)6047 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
6048 {
6049 return args->retry_in_match_counter;
6050 }
6051
6052
6053 extern int
onig_get_capture_range_in_callout(OnigCalloutArgs * a,int mem_num,int * begin,int * end)6054 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
6055 {
6056 OnigRegex reg;
6057 const UChar* str;
6058 StackType* stk_base;
6059 int i;
6060 StackIndex* mem_start_stk;
6061 StackIndex* mem_end_stk;
6062
6063 i = mem_num;
6064 reg = a->regex;
6065 str = a->string;
6066 stk_base = a->stk_base;
6067 mem_start_stk = a->mem_start_stk;
6068 mem_end_stk = a->mem_end_stk;
6069
6070 if (i > 0) {
6071 if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
6072 *begin = (int )(STACK_MEM_START(reg, i) - str);
6073 *end = (int )(STACK_MEM_END(reg, i) - str);
6074 }
6075 else {
6076 *begin = *end = ONIG_REGION_NOTPOS;
6077 }
6078 }
6079 else
6080 return ONIGERR_INVALID_ARGUMENT;
6081
6082 return ONIG_NORMAL;
6083 }
6084
6085 extern int
onig_get_used_stack_size_in_callout(OnigCalloutArgs * a,int * used_num,int * used_bytes)6086 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
6087 {
6088 int n;
6089
6090 n = (int )(a->stk - a->stk_base);
6091
6092 if (used_num != 0)
6093 *used_num = n;
6094
6095 if (used_bytes != 0)
6096 *used_bytes = n * sizeof(StackType);
6097
6098 return ONIG_NORMAL;
6099 }
6100
6101
6102 /* builtin callout functions */
6103
6104 extern int
onig_builtin_fail(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6105 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6106 {
6107 return ONIG_CALLOUT_FAIL;
6108 }
6109
6110 extern int
onig_builtin_mismatch(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6111 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6112 {
6113 return ONIG_MISMATCH;
6114 }
6115
6116 extern int
onig_builtin_error(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6117 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6118 {
6119 int r;
6120 int n;
6121 OnigValue val;
6122
6123 r = onig_get_arg_by_callout_args(args, 0, 0, &val);
6124 if (r != ONIG_NORMAL) return r;
6125
6126 n = (int )val.l;
6127 if (n >= 0) {
6128 n = ONIGERR_INVALID_CALLOUT_BODY;
6129 }
6130 else if (onig_is_error_code_needs_param(n)) {
6131 n = ONIGERR_INVALID_CALLOUT_BODY;
6132 }
6133
6134 return n;
6135 }
6136
6137 extern int
onig_builtin_count(OnigCalloutArgs * args,void * user_data)6138 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
6139 {
6140 (void )onig_check_callout_data_and_clear_old_values(args);
6141
6142 return onig_builtin_total_count(args, user_data);
6143 }
6144
6145 extern int
onig_builtin_total_count(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6146 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6147 {
6148 int r;
6149 int slot;
6150 OnigType type;
6151 OnigValue val;
6152 OnigValue aval;
6153 OnigCodePoint count_type;
6154
6155 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6156 if (r != ONIG_NORMAL) return r;
6157
6158 count_type = aval.c;
6159 if (count_type != '>' && count_type != 'X' && count_type != '<')
6160 return ONIGERR_INVALID_CALLOUT_ARG;
6161
6162 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
6163 &type, &val);
6164 if (r < ONIG_NORMAL)
6165 return r;
6166 else if (r > ONIG_NORMAL) {
6167 /* type == void: initial state */
6168 val.l = 0;
6169 }
6170
6171 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6172 slot = 2;
6173 if (count_type == '<')
6174 val.l++;
6175 else if (count_type == 'X')
6176 val.l--;
6177 }
6178 else {
6179 slot = 1;
6180 if (count_type != '<')
6181 val.l++;
6182 }
6183
6184 r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
6185 if (r != ONIG_NORMAL) return r;
6186
6187 /* slot 1: in progress counter, slot 2: in retraction counter */
6188 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
6189 &type, &val);
6190 if (r < ONIG_NORMAL)
6191 return r;
6192 else if (r > ONIG_NORMAL) {
6193 val.l = 0;
6194 }
6195
6196 val.l++;
6197 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6198 if (r != ONIG_NORMAL) return r;
6199
6200 return ONIG_CALLOUT_SUCCESS;
6201 }
6202
6203 extern int
onig_builtin_max(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6204 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6205 {
6206 int r;
6207 int slot;
6208 long max_val;
6209 OnigCodePoint count_type;
6210 OnigType type;
6211 OnigValue val;
6212 OnigValue aval;
6213
6214 (void )onig_check_callout_data_and_clear_old_values(args);
6215
6216 slot = 0;
6217 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6218 if (r < ONIG_NORMAL)
6219 return r;
6220 else if (r > ONIG_NORMAL) {
6221 /* type == void: initial state */
6222 type = ONIG_TYPE_LONG;
6223 val.l = 0;
6224 }
6225
6226 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6227 if (r != ONIG_NORMAL) return r;
6228 if (type == ONIG_TYPE_TAG) {
6229 r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
6230 if (r < ONIG_NORMAL) return r;
6231 else if (r > ONIG_NORMAL)
6232 max_val = 0L;
6233 else
6234 max_val = aval.l;
6235 }
6236 else { /* LONG */
6237 max_val = aval.l;
6238 }
6239
6240 r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
6241 if (r != ONIG_NORMAL) return r;
6242
6243 count_type = aval.c;
6244 if (count_type != '>' && count_type != 'X' && count_type != '<')
6245 return ONIGERR_INVALID_CALLOUT_ARG;
6246
6247 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6248 if (count_type == '<') {
6249 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6250 val.l++;
6251 }
6252 else if (count_type == 'X')
6253 val.l--;
6254 }
6255 else {
6256 if (count_type != '<') {
6257 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6258 val.l++;
6259 }
6260 }
6261
6262 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6263 if (r != ONIG_NORMAL) return r;
6264
6265 return ONIG_CALLOUT_SUCCESS;
6266 }
6267
6268 enum OP_CMP {
6269 OP_EQ,
6270 OP_NE,
6271 OP_LT,
6272 OP_GT,
6273 OP_LE,
6274 OP_GE
6275 };
6276
6277 extern int
onig_builtin_cmp(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6278 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6279 {
6280 int r;
6281 int slot;
6282 long lv;
6283 long rv;
6284 OnigType type;
6285 OnigValue val;
6286 regex_t* reg;
6287 enum OP_CMP op;
6288
6289 reg = args->regex;
6290
6291 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6292 if (r != ONIG_NORMAL) return r;
6293
6294 if (type == ONIG_TYPE_TAG) {
6295 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6296 if (r < ONIG_NORMAL) return r;
6297 else if (r > ONIG_NORMAL)
6298 lv = 0L;
6299 else
6300 lv = val.l;
6301 }
6302 else { /* ONIG_TYPE_LONG */
6303 lv = val.l;
6304 }
6305
6306 r = onig_get_arg_by_callout_args(args, 2, &type, &val);
6307 if (r != ONIG_NORMAL) return r;
6308
6309 if (type == ONIG_TYPE_TAG) {
6310 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6311 if (r < ONIG_NORMAL) return r;
6312 else if (r > ONIG_NORMAL)
6313 rv = 0L;
6314 else
6315 rv = val.l;
6316 }
6317 else { /* ONIG_TYPE_LONG */
6318 rv = val.l;
6319 }
6320
6321 slot = 0;
6322 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6323 if (r < ONIG_NORMAL)
6324 return r;
6325 else if (r > ONIG_NORMAL) {
6326 /* type == void: initial state */
6327 OnigCodePoint c1, c2;
6328 UChar* p;
6329
6330 r = onig_get_arg_by_callout_args(args, 1, &type, &val);
6331 if (r != ONIG_NORMAL) return r;
6332
6333 p = val.s.start;
6334 c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6335 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6336 if (p < val.s.end) {
6337 c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6338 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6339 if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
6340 }
6341 else
6342 c2 = 0;
6343
6344 switch (c1) {
6345 case '=':
6346 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6347 op = OP_EQ;
6348 break;
6349 case '!':
6350 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6351 op = OP_NE;
6352 break;
6353 case '<':
6354 if (c2 == '=') op = OP_LE;
6355 else if (c2 == 0) op = OP_LT;
6356 else return ONIGERR_INVALID_CALLOUT_ARG;
6357 break;
6358 case '>':
6359 if (c2 == '=') op = OP_GE;
6360 else if (c2 == 0) op = OP_GT;
6361 else return ONIGERR_INVALID_CALLOUT_ARG;
6362 break;
6363 default:
6364 return ONIGERR_INVALID_CALLOUT_ARG;
6365 break;
6366 }
6367 val.l = (long )op;
6368 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6369 if (r != ONIG_NORMAL) return r;
6370 }
6371 else {
6372 op = (enum OP_CMP )val.l;
6373 }
6374
6375 switch (op) {
6376 case OP_EQ: r = (lv == rv); break;
6377 case OP_NE: r = (lv != rv); break;
6378 case OP_LT: r = (lv < rv); break;
6379 case OP_GT: r = (lv > rv); break;
6380 case OP_LE: r = (lv <= rv); break;
6381 case OP_GE: r = (lv >= rv); break;
6382 }
6383
6384 return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
6385 }
6386
6387
6388 #include <stdio.h>
6389
6390 static FILE* OutFp;
6391
6392 /* name start with "onig_" for macros. */
6393 static int
onig_builtin_monitor(OnigCalloutArgs * args,void * user_data)6394 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
6395 {
6396 int r;
6397 int num;
6398 size_t tag_len;
6399 const UChar* start;
6400 const UChar* right;
6401 const UChar* current;
6402 const UChar* string;
6403 const UChar* strend;
6404 const UChar* tag_start;
6405 const UChar* tag_end;
6406 regex_t* reg;
6407 OnigCalloutIn in;
6408 OnigType type;
6409 OnigValue val;
6410 char buf[20];
6411 FILE* fp;
6412
6413 fp = OutFp;
6414
6415 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6416 if (r != ONIG_NORMAL) return r;
6417
6418 in = onig_get_callout_in_by_callout_args(args);
6419 if (in == ONIG_CALLOUT_IN_PROGRESS) {
6420 if (val.c == '<')
6421 return ONIG_CALLOUT_SUCCESS;
6422 }
6423 else {
6424 if (val.c != 'X' && val.c != '<')
6425 return ONIG_CALLOUT_SUCCESS;
6426 }
6427
6428 num = onig_get_callout_num_by_callout_args(args);
6429 start = onig_get_start_by_callout_args(args);
6430 right = onig_get_right_range_by_callout_args(args);
6431 current = onig_get_current_by_callout_args(args);
6432 string = onig_get_string_by_callout_args(args);
6433 strend = onig_get_string_end_by_callout_args(args);
6434 reg = onig_get_regex_by_callout_args(args);
6435 tag_start = onig_get_callout_tag_start(reg, num);
6436 tag_end = onig_get_callout_tag_end(reg, num);
6437
6438 if (tag_start == 0)
6439 xsnprintf(buf, sizeof(buf), "#%d", num);
6440 else {
6441 /* CAUTION: tag string is not terminated with NULL. */
6442 int i;
6443
6444 tag_len = tag_end - tag_start;
6445 if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
6446 for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
6447 buf[tag_len] = '\0';
6448 }
6449
6450 fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
6451 buf,
6452 in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
6453 (int )(current - string),
6454 (int )(start - string),
6455 (int )(right - string),
6456 (int )(strend - string));
6457 fflush(fp);
6458
6459 return ONIG_CALLOUT_SUCCESS;
6460 }
6461
6462 extern int
onig_setup_builtin_monitors_by_ascii_encoded_name(void * fp)6463 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
6464 {
6465 int id;
6466 char* name;
6467 OnigEncoding enc;
6468 unsigned int ts[4];
6469 OnigValue opts[4];
6470
6471 if (IS_NOT_NULL(fp))
6472 OutFp = (FILE* )fp;
6473 else
6474 OutFp = stdout;
6475
6476 enc = ONIG_ENCODING_ASCII;
6477
6478 name = "MON";
6479 ts[0] = ONIG_TYPE_CHAR;
6480 opts[0].c = '>';
6481 BC_B_O(name, monitor, 1, ts, 1, opts);
6482
6483 return ONIG_NORMAL;
6484 }
6485
6486 #endif /* USE_CALLOUT */
6487