xref: /PHP-7.3/ext/mbstring/oniguruma/src/regexec.c (revision 1979c5d1)
1 /**********************************************************************
2   regexec.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2019  K.Kosako
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 #include "regint.h"
30 
31 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
32   ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
33 
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36   (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37    ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39 
40 #define CHECK_INTERRUPT_IN_MATCH
41 
42 #define STACK_MEM_START(reg, i) \
43   (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \
44    STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i])))
45 
46 #define STACK_MEM_END(reg, i) \
47   (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \
48    STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i])))
49 
50 static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev);
51 
52 static int
53 search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
54 
55 
56 #ifdef USE_CALLOUT
57 typedef struct {
58   int last_match_at_call_counter;
59   struct {
60     OnigType  type;
61     OnigValue val;
62   } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
63 } CalloutData;
64 #endif
65 
66 struct OnigMatchParamStruct {
67   unsigned int    match_stack_limit;
68   unsigned long   retry_limit_in_match;
69 #ifdef USE_CALLOUT
70   OnigCalloutFunc progress_callout_of_contents;
71   OnigCalloutFunc retraction_callout_of_contents;
72   int             match_at_call_counter;
73   void*           callout_user_data;
74   CalloutData*    callout_data;
75   int             callout_data_alloc_num;
76 #endif
77 };
78 
79 extern int
onig_set_match_stack_limit_size_of_match_param(OnigMatchParam * param,unsigned int limit)80 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
81                                                unsigned int limit)
82 {
83   param->match_stack_limit = limit;
84   return ONIG_NORMAL;
85 }
86 
87 extern int
onig_set_retry_limit_in_match_of_match_param(OnigMatchParam * param,unsigned long limit)88 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
89                                              unsigned long limit)
90 {
91   param->retry_limit_in_match = limit;
92   return ONIG_NORMAL;
93 }
94 
95 extern int
onig_set_progress_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)96 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
97 {
98 #ifdef USE_CALLOUT
99   param->progress_callout_of_contents = f;
100   return ONIG_NORMAL;
101 #else
102   return ONIG_NO_SUPPORT_CONFIG;
103 #endif
104 }
105 
106 extern int
onig_set_retraction_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)107 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
108 {
109 #ifdef USE_CALLOUT
110   param->retraction_callout_of_contents = f;
111   return ONIG_NORMAL;
112 #else
113   return ONIG_NO_SUPPORT_CONFIG;
114 #endif
115 }
116 
117 extern int
onig_set_callout_user_data_of_match_param(OnigMatchParam * param,void * user_data)118 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
119 {
120 #ifdef USE_CALLOUT
121   param->callout_user_data = user_data;
122   return ONIG_NORMAL;
123 #else
124   return ONIG_NO_SUPPORT_CONFIG;
125 #endif
126 }
127 
128 
129 typedef struct {
130   void* stack_p;
131   int   stack_n;
132   OnigOptionType options;
133   OnigRegion*    region;
134   int            ptr_num;
135   const UChar*   start;   /* search start position (for \G: BEGIN_POSITION) */
136   unsigned int   match_stack_limit;
137   unsigned long  retry_limit_in_match;
138   OnigMatchParam* mp;
139 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
140   int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */
141   UChar* best_s;
142 #endif
143 } MatchArg;
144 
145 
146 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
147 
148 /* arguments type */
149 typedef enum {
150   ARG_SPECIAL = -1,
151   ARG_NON     =  0,
152   ARG_RELADDR =  1,
153   ARG_ABSADDR =  2,
154   ARG_LENGTH  =  3,
155   ARG_MEMNUM  =  4,
156   ARG_OPTION  =  5,
157   ARG_MODE    =  6
158 } OpArgType;
159 
160 typedef struct {
161   short int opcode;
162   char*     name;
163 } OpInfoType;
164 
165 static OpInfoType OpInfo[] = {
166   { OP_FINISH,         "finish"},
167   { OP_END,            "end"},
168   { OP_STR_1,          "str_1"},
169   { OP_STR_2,          "str_2"},
170   { OP_STR_3,          "str_3"},
171   { OP_STR_4,          "str_4"},
172   { OP_STR_5,          "str_5"},
173   { OP_STR_N,          "str_n"},
174   { OP_STR_MB2N1,      "str_mb2-n1"},
175   { OP_STR_MB2N2,      "str_mb2-n2"},
176   { OP_STR_MB2N3,      "str_mb2-n3"},
177   { OP_STR_MB2N,       "str_mb2-n"},
178   { OP_STR_MB3N,       "str_mb3n"},
179   { OP_STR_MBN,        "str_mbn"},
180   { OP_STR_1_IC,       "str_1-ic"},
181   { OP_STR_N_IC,       "str_n-ic"},
182   { OP_CCLASS,         "cclass"},
183   { OP_CCLASS_MB,      "cclass-mb"},
184   { OP_CCLASS_MIX,     "cclass-mix"},
185   { OP_CCLASS_NOT,     "cclass-not"},
186   { OP_CCLASS_MB_NOT,  "cclass-mb-not"},
187   { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
188   { OP_ANYCHAR,               "anychar"},
189   { OP_ANYCHAR_ML,            "anychar-ml"},
190   { OP_ANYCHAR_STAR,          "anychar*"},
191   { OP_ANYCHAR_ML_STAR,       "anychar-ml*"},
192   { OP_ANYCHAR_STAR_PEEK_NEXT,    "anychar*-peek-next"},
193   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
194   { OP_WORD,                  "word"},
195   { OP_WORD_ASCII,            "word-ascii"},
196   { OP_NO_WORD,               "not-word"},
197   { OP_NO_WORD_ASCII,         "not-word-ascii"},
198   { OP_WORD_BOUNDARY,         "word-boundary"},
199   { OP_NO_WORD_BOUNDARY,      "not-word-boundary"},
200   { OP_WORD_BEGIN,            "word-begin"},
201   { OP_WORD_END,              "word-end"},
202   { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
203   { OP_BEGIN_BUF,             "begin-buf"},
204   { OP_END_BUF,               "end-buf"},
205   { OP_BEGIN_LINE,            "begin-line"},
206   { OP_END_LINE,              "end-line"},
207   { OP_SEMI_END_BUF,          "semi-end-buf"},
208   { OP_BEGIN_POSITION,        "begin-position"},
209   { OP_BACKREF1,              "backref1"},
210   { OP_BACKREF2,              "backref2"},
211   { OP_BACKREF_N,             "backref-n"},
212   { OP_BACKREF_N_IC,          "backref-n-ic"},
213   { OP_BACKREF_MULTI,         "backref_multi"},
214   { OP_BACKREF_MULTI_IC,      "backref_multi-ic"},
215   { OP_BACKREF_WITH_LEVEL,    "backref_with_level"},
216   { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
217   { OP_BACKREF_CHECK,         "backref_check"},
218   { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
219   { OP_MEM_START_PUSH,        "mem-start-push"},
220   { OP_MEM_START,             "mem-start"},
221   { OP_MEM_END_PUSH,          "mem-end-push"},
222 #ifdef USE_CALL
223   { OP_MEM_END_PUSH_REC,      "mem-end-push-rec"},
224 #endif
225   { OP_MEM_END,               "mem-end"},
226 #ifdef USE_CALL
227   { OP_MEM_END_REC,           "mem-end-rec"},
228 #endif
229   { OP_FAIL,                  "fail"},
230   { OP_JUMP,                  "jump"},
231   { OP_PUSH,                  "push"},
232   { OP_PUSH_SUPER,            "push-super"},
233   { OP_POP_OUT,               "pop-out"},
234 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
235   { OP_PUSH_OR_JUMP_EXACT1,   "push-or-jump-e1"},
236 #endif
237   { OP_PUSH_IF_PEEK_NEXT,     "push-if-peek-next"},
238   { OP_REPEAT,                "repeat"},
239   { OP_REPEAT_NG,             "repeat-ng"},
240   { OP_REPEAT_INC,            "repeat-inc"},
241   { OP_REPEAT_INC_NG,         "repeat-inc-ng"},
242   { OP_EMPTY_CHECK_START,     "empty-check-start"},
243   { OP_EMPTY_CHECK_END,       "empty-check-end"},
244   { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
245 #ifdef USE_CALL
246   { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
247 #endif
248   { OP_PREC_READ_START,       "push-pos"},
249   { OP_PREC_READ_END,         "pop-pos"},
250   { OP_PREC_READ_NOT_START,   "prec-read-not-start"},
251   { OP_PREC_READ_NOT_END,     "prec-read-not-end"},
252   { OP_ATOMIC_START,          "atomic-start"},
253   { OP_ATOMIC_END,            "atomic-end"},
254   { OP_LOOK_BEHIND,           "look-behind"},
255   { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"},
256   { OP_LOOK_BEHIND_NOT_END,   "look-behind-not-end"},
257   { OP_PUSH_SAVE_VAL,         "push-save-val"},
258   { OP_UPDATE_VAR,            "update-var"},
259 #ifdef USE_CALL
260   { OP_CALL,                  "call"},
261   { OP_RETURN,                "return"},
262 #endif
263 #ifdef USE_CALLOUT
264   { OP_CALLOUT_CONTENTS,      "callout-contents"},
265   { OP_CALLOUT_NAME,          "callout-name"},
266 #endif
267   { -1, ""}
268 };
269 
270 static char*
op2name(int opcode)271 op2name(int opcode)
272 {
273   int i;
274 
275   for (i = 0; OpInfo[i].opcode >= 0; i++) {
276     if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
277   }
278 
279   return "";
280 }
281 
282 static void
p_string(FILE * f,int len,UChar * s)283 p_string(FILE* f, int len, UChar* s)
284 {
285   fputs(":", f);
286   while (len-- > 0) { fputc(*s++, f); }
287 }
288 
289 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)290 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
291 {
292   int x = len * mb_len;
293 
294   fprintf(f, ":%d:", len);
295   while (x-- > 0) { fputc(*s++, f); }
296 }
297 
298 static void
p_rel_addr(FILE * f,RelAddrType rel_addr,Operation * p,Operation * start)299 p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
300 {
301   RelAddrType curr = (RelAddrType )(p - start);
302 
303   fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
304 }
305 
306 static int
bitset_on_num(BitSetRef bs)307 bitset_on_num(BitSetRef bs)
308 {
309   int i, n;
310 
311   n = 0;
312   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
313     if (BITSET_AT(bs, i)) n++;
314   }
315 
316   return n;
317 }
318 
319 static void
print_compiled_byte_code(FILE * f,regex_t * reg,int index,Operation * start,OnigEncoding enc)320 print_compiled_byte_code(FILE* f, regex_t* reg, int index,
321                          Operation* start, OnigEncoding enc)
322 {
323   int i, n;
324   RelAddrType addr;
325   LengthType  len;
326   MemNumType  mem;
327   OnigCodePoint code;
328   ModeType mode;
329   UChar *q;
330   Operation* p;
331   enum OpCode opcode;
332 
333   p = reg->ops + index;
334 
335 #ifdef USE_DIRECT_THREADED_CODE
336   opcode = reg->ocs[index];
337 #else
338   opcode = p->opcode;
339 #endif
340 
341   fprintf(f, "%s", op2name(opcode));
342   switch (opcode) {
343   case OP_STR_1:
344     p_string(f, 1, p->exact.s); break;
345   case OP_STR_2:
346     p_string(f, 2, p->exact.s); break;
347   case OP_STR_3:
348     p_string(f, 3, p->exact.s); break;
349   case OP_STR_4:
350     p_string(f, 4, p->exact.s); break;
351   case OP_STR_5:
352     p_string(f, 5, p->exact.s); break;
353   case OP_STR_N:
354     len = p->exact_n.n;
355     p_string(f, len, p->exact_n.s); break;
356   case OP_STR_MB2N1:
357     p_string(f, 2, p->exact.s); break;
358   case OP_STR_MB2N2:
359     p_string(f, 4, p->exact.s); break;
360   case OP_STR_MB2N3:
361     p_string(f, 3, p->exact.s); break;
362   case OP_STR_MB2N:
363     len = p->exact_n.n;
364     p_len_string(f, len, 2, p->exact_n.s); break;
365   case OP_STR_MB3N:
366     len = p->exact_n.n;
367     p_len_string(f, len, 3, p->exact_n.s); break;
368   case OP_STR_MBN:
369     {
370       int mb_len;
371 
372       mb_len = p->exact_len_n.len;
373       len    = p->exact_len_n.n;
374       q      = p->exact_len_n.s;
375       fprintf(f, ":%d:%d:", mb_len, len);
376       n = len * mb_len;
377       while (n-- > 0) { fputc(*q++, f); }
378     }
379     break;
380   case OP_STR_1_IC:
381     len = enclen(enc, p->exact.s);
382     p_string(f, len, p->exact.s);
383     break;
384   case OP_STR_N_IC:
385     len = p->exact_n.n;
386     p_len_string(f, len, 1, p->exact_n.s);
387     break;
388 
389   case OP_CCLASS:
390   case OP_CCLASS_NOT:
391     n = bitset_on_num(p->cclass.bsp);
392     fprintf(f, ":%d", n);
393     break;
394   case OP_CCLASS_MB:
395   case OP_CCLASS_MB_NOT:
396     {
397       OnigCodePoint ncode;
398       OnigCodePoint* codes;
399 
400       codes = (OnigCodePoint* )p->cclass_mb.mb;
401       GET_CODE_POINT(ncode, codes);
402       codes++;
403       GET_CODE_POINT(code, codes);
404       fprintf(f, ":%d:0x%x", ncode, code);
405     }
406     break;
407   case OP_CCLASS_MIX:
408   case OP_CCLASS_MIX_NOT:
409     {
410       OnigCodePoint ncode;
411       OnigCodePoint* codes;
412 
413       codes = (OnigCodePoint* )p->cclass_mix.mb;
414       n = bitset_on_num(p->cclass_mix.bsp);
415 
416       GET_CODE_POINT(ncode, codes);
417       codes++;
418       GET_CODE_POINT(code, codes);
419       fprintf(f, ":%d:%u:%u", n, code, ncode);
420     }
421     break;
422 
423   case OP_ANYCHAR_STAR_PEEK_NEXT:
424   case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
425     p_string(f, 1, &(p->anychar_star_peek_next.c));
426     break;
427 
428   case OP_WORD_BOUNDARY:
429   case OP_NO_WORD_BOUNDARY:
430   case OP_WORD_BEGIN:
431   case OP_WORD_END:
432     mode = p->word_boundary.mode;
433     fprintf(f, ":%d", mode);
434     break;
435 
436   case OP_BACKREF_N:
437   case OP_BACKREF_N_IC:
438     mem = p->backref_n.n1;
439     fprintf(f, ":%d", mem);
440     break;
441   case OP_BACKREF_MULTI_IC:
442   case OP_BACKREF_MULTI:
443   case OP_BACKREF_CHECK:
444     fputs(" ", f);
445     n = p->backref_general.num;
446     for (i = 0; i < n; i++) {
447       mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
448       if (i > 0) fputs(", ", f);
449       fprintf(f, "%d", mem);
450     }
451     break;
452   case OP_BACKREF_WITH_LEVEL:
453   case OP_BACKREF_WITH_LEVEL_IC:
454   case OP_BACKREF_CHECK_WITH_LEVEL:
455     {
456       LengthType level;
457 
458       level = p->backref_general.nest_level;
459       fprintf(f, ":%d", level);
460       fputs(" ", f);
461       n = p->backref_general.num;
462       for (i = 0; i < n; i++) {
463         mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
464         if (i > 0) fputs(", ", f);
465         fprintf(f, "%d", mem);
466       }
467     }
468     break;
469 
470   case OP_MEM_START:
471   case OP_MEM_START_PUSH:
472     mem = p->memory_start.num;
473     fprintf(f, ":%d", mem);
474     break;
475 
476   case OP_MEM_END:
477   case OP_MEM_END_PUSH:
478 #ifdef USE_CALL
479   case OP_MEM_END_REC:
480   case OP_MEM_END_PUSH_REC:
481 #endif
482     mem = p->memory_end.num;
483     fprintf(f, ":%d", mem);
484     break;
485 
486   case OP_JUMP:
487     addr = p->jump.addr;
488     fputc(':', f);
489     p_rel_addr(f, addr, p, start);
490     break;
491 
492   case OP_PUSH:
493   case OP_PUSH_SUPER:
494     addr = p->push.addr;
495     fputc(':', f);
496     p_rel_addr(f, addr, p, start);
497     break;
498 
499 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
500   case OP_PUSH_OR_JUMP_EXACT1:
501     addr = p->push_or_jump_exact1.addr;
502     fputc(':', f);
503     p_rel_addr(f, addr, p, start);
504     p_string(f, 1, &(p->push_or_jump_exact1.c));
505     break;
506 #endif
507 
508   case OP_PUSH_IF_PEEK_NEXT:
509     addr = p->push_if_peek_next.addr;
510     fputc(':', f);
511     p_rel_addr(f, addr, p, start);
512     p_string(f, 1, &(p->push_if_peek_next.c));
513     break;
514 
515   case OP_REPEAT:
516   case OP_REPEAT_NG:
517     mem = p->repeat.id;
518     addr = p->repeat.addr;
519     fprintf(f, ":%d:", mem);
520     p_rel_addr(f, addr, p, start);
521     break;
522 
523   case OP_REPEAT_INC:
524   case OP_REPEAT_INC_NG:
525     mem = p->repeat.id;
526     fprintf(f, ":%d", mem);
527     break;
528 
529   case OP_EMPTY_CHECK_START:
530     mem = p->empty_check_start.mem;
531     fprintf(f, ":%d", mem);
532     break;
533   case OP_EMPTY_CHECK_END:
534   case OP_EMPTY_CHECK_END_MEMST:
535 #ifdef USE_CALL
536   case OP_EMPTY_CHECK_END_MEMST_PUSH:
537 #endif
538     mem = p->empty_check_end.mem;
539     fprintf(f, ":%d", mem);
540     break;
541 
542   case OP_PREC_READ_NOT_START:
543     addr = p->prec_read_not_start.addr;
544     fputc(':', f);
545     p_rel_addr(f, addr, p, start);
546     break;
547 
548   case OP_LOOK_BEHIND:
549     len = p->look_behind.len;
550     fprintf(f, ":%d", len);
551     break;
552 
553   case OP_LOOK_BEHIND_NOT_START:
554     addr = p->look_behind_not_start.addr;
555     len  = p->look_behind_not_start.len;
556     fprintf(f, ":%d:", len);
557     p_rel_addr(f, addr, p, start);
558     break;
559 
560 #ifdef USE_CALL
561   case OP_CALL:
562     addr = p->call.addr;
563     fprintf(f, ":{/%d}", addr);
564     break;
565 #endif
566 
567   case OP_PUSH_SAVE_VAL:
568     {
569       SaveType type;
570 
571       type = p->push_save_val.type;
572       mem  = p->push_save_val.id;
573       fprintf(f, ":%d:%d", type, mem);
574     }
575     break;
576 
577   case OP_UPDATE_VAR:
578     {
579       UpdateVarType type;
580 
581       type = p->update_var.type;
582       mem  = p->update_var.id;
583       fprintf(f, ":%d:%d", type, mem);
584     }
585     break;
586 
587 #ifdef USE_CALLOUT
588   case OP_CALLOUT_CONTENTS:
589     mem = p->callout_contents.num;
590     fprintf(f, ":%d", mem);
591     break;
592 
593   case OP_CALLOUT_NAME:
594     {
595       int id;
596 
597       id  = p->callout_name.id;
598       mem = p->callout_name.num;
599       fprintf(f, ":%d:%d", id, mem);
600     }
601     break;
602 #endif
603 
604   case OP_TEXT_SEGMENT_BOUNDARY:
605     if (p->text_segment_boundary.not != 0)
606       fprintf(f, ":not");
607     break;
608 
609   case OP_FINISH:
610   case OP_END:
611   case OP_ANYCHAR:
612   case OP_ANYCHAR_ML:
613   case OP_ANYCHAR_STAR:
614   case OP_ANYCHAR_ML_STAR:
615   case OP_WORD:
616   case OP_WORD_ASCII:
617   case OP_NO_WORD:
618   case OP_NO_WORD_ASCII:
619   case OP_BEGIN_BUF:
620   case OP_END_BUF:
621   case OP_BEGIN_LINE:
622   case OP_END_LINE:
623   case OP_SEMI_END_BUF:
624   case OP_BEGIN_POSITION:
625   case OP_BACKREF1:
626   case OP_BACKREF2:
627   case OP_FAIL:
628   case OP_POP_OUT:
629   case OP_PREC_READ_START:
630   case OP_PREC_READ_END:
631   case OP_PREC_READ_NOT_END:
632   case OP_ATOMIC_START:
633   case OP_ATOMIC_END:
634   case OP_LOOK_BEHIND_NOT_END:
635 #ifdef USE_CALL
636   case OP_RETURN:
637 #endif
638     break;
639 
640   default:
641     fprintf(stderr, "print_compiled_byte_code: undefined code %d\n", opcode);
642     break;
643   }
644 }
645 #endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
646 
647 #ifdef ONIG_DEBUG_COMPILE
648 extern void
onig_print_compiled_byte_code_list(FILE * f,regex_t * reg)649 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
650 {
651   Operation* bp;
652   Operation* start = reg->ops;
653   Operation* end   = reg->ops + reg->ops_used;
654 
655   fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
656           reg->push_mem_start, reg->push_mem_end);
657   fprintf(f, "code-length: %d\n", reg->ops_used);
658 
659   bp = start;
660   while (bp < end) {
661     int pos = bp - start;
662 
663     fprintf(f, "%4d: ", pos);
664     print_compiled_byte_code(f, reg, pos, start, reg->enc);
665     fprintf(f, "\n");
666     bp++;
667   }
668   fprintf(f, "\n");
669 }
670 #endif
671 
672 
673 #ifdef USE_CAPTURE_HISTORY
674 static void history_tree_free(OnigCaptureTreeNode* node);
675 
676 static void
history_tree_clear(OnigCaptureTreeNode * node)677 history_tree_clear(OnigCaptureTreeNode* node)
678 {
679   int i;
680 
681   if (IS_NULL(node)) return ;
682 
683   for (i = 0; i < node->num_childs; i++) {
684     if (IS_NOT_NULL(node->childs[i])) {
685       history_tree_free(node->childs[i]);
686     }
687   }
688   for (i = 0; i < node->allocated; i++) {
689     node->childs[i] = (OnigCaptureTreeNode* )0;
690   }
691   node->num_childs = 0;
692   node->beg = ONIG_REGION_NOTPOS;
693   node->end = ONIG_REGION_NOTPOS;
694   node->group = -1;
695 }
696 
697 static void
history_tree_free(OnigCaptureTreeNode * node)698 history_tree_free(OnigCaptureTreeNode* node)
699 {
700   history_tree_clear(node);
701   if (IS_NOT_NULL(node->childs)) xfree(node->childs);
702 
703   xfree(node);
704 }
705 
706 static void
history_root_free(OnigRegion * r)707 history_root_free(OnigRegion* r)
708 {
709   if (IS_NULL(r->history_root)) return ;
710 
711   history_tree_free(r->history_root);
712   r->history_root = (OnigCaptureTreeNode* )0;
713 }
714 
715 static OnigCaptureTreeNode*
history_node_new(void)716 history_node_new(void)
717 {
718   OnigCaptureTreeNode* node;
719 
720   node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
721   CHECK_NULL_RETURN(node);
722 
723   node->childs     = (OnigCaptureTreeNode** )0;
724   node->allocated  =  0;
725   node->num_childs =  0;
726   node->group      = -1;
727   node->beg        = ONIG_REGION_NOTPOS;
728   node->end        = ONIG_REGION_NOTPOS;
729 
730   return node;
731 }
732 
733 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)734 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
735 {
736 #define HISTORY_TREE_INIT_ALLOC_SIZE  8
737 
738   if (parent->num_childs >= parent->allocated) {
739     int n, i;
740 
741     if (IS_NULL(parent->childs)) {
742       n = HISTORY_TREE_INIT_ALLOC_SIZE;
743       parent->childs =
744         (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
745     }
746     else {
747       n = parent->allocated * 2;
748       parent->childs =
749         (OnigCaptureTreeNode** )xrealloc(parent->childs,
750                                          sizeof(parent->childs[0]) * n);
751     }
752     CHECK_NULL_RETURN_MEMERR(parent->childs);
753     for (i = parent->allocated; i < n; i++) {
754       parent->childs[i] = (OnigCaptureTreeNode* )0;
755     }
756     parent->allocated = n;
757   }
758 
759   parent->childs[parent->num_childs] = child;
760   parent->num_childs++;
761   return 0;
762 }
763 
764 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)765 history_tree_clone(OnigCaptureTreeNode* node)
766 {
767   int i;
768   OnigCaptureTreeNode *clone, *child;
769 
770   clone = history_node_new();
771   CHECK_NULL_RETURN(clone);
772 
773   clone->beg = node->beg;
774   clone->end = node->end;
775   for (i = 0; i < node->num_childs; i++) {
776     child = history_tree_clone(node->childs[i]);
777     if (IS_NULL(child)) {
778       history_tree_free(clone);
779       return (OnigCaptureTreeNode* )0;
780     }
781     history_tree_add_child(clone, child);
782   }
783 
784   return clone;
785 }
786 
787 extern  OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)788 onig_get_capture_tree(OnigRegion* region)
789 {
790   return region->history_root;
791 }
792 #endif /* USE_CAPTURE_HISTORY */
793 
794 extern void
onig_region_clear(OnigRegion * region)795 onig_region_clear(OnigRegion* region)
796 {
797   int i;
798 
799   for (i = 0; i < region->num_regs; i++) {
800     region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
801   }
802 #ifdef USE_CAPTURE_HISTORY
803   history_root_free(region);
804 #endif
805 }
806 
807 extern int
onig_region_resize(OnigRegion * region,int n)808 onig_region_resize(OnigRegion* region, int n)
809 {
810   region->num_regs = n;
811 
812   if (n < ONIG_NREGION)
813     n = ONIG_NREGION;
814 
815   if (region->allocated == 0) {
816     region->beg = (int* )xmalloc(n * sizeof(int));
817     region->end = (int* )xmalloc(n * sizeof(int));
818 
819     if (region->beg == 0 || region->end == 0)
820       return ONIGERR_MEMORY;
821 
822     region->allocated = n;
823   }
824   else if (region->allocated < n) {
825     region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
826     region->end = (int* )xrealloc(region->end, n * sizeof(int));
827 
828     if (region->beg == 0 || region->end == 0)
829       return ONIGERR_MEMORY;
830 
831     region->allocated = n;
832   }
833 
834   return 0;
835 }
836 
837 static int
onig_region_resize_clear(OnigRegion * region,int n)838 onig_region_resize_clear(OnigRegion* region, int n)
839 {
840   int r;
841 
842   r = onig_region_resize(region, n);
843   if (r != 0) return r;
844   onig_region_clear(region);
845   return 0;
846 }
847 
848 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)849 onig_region_set(OnigRegion* region, int at, int beg, int end)
850 {
851   if (at < 0) return ONIGERR_INVALID_ARGUMENT;
852 
853   if (at >= region->allocated) {
854     int r = onig_region_resize(region, at + 1);
855     if (r < 0) return r;
856   }
857 
858   region->beg[at] = beg;
859   region->end[at] = end;
860   return 0;
861 }
862 
863 extern void
onig_region_init(OnigRegion * region)864 onig_region_init(OnigRegion* region)
865 {
866   region->num_regs     = 0;
867   region->allocated    = 0;
868   region->beg          = (int* )0;
869   region->end          = (int* )0;
870   region->history_root = (OnigCaptureTreeNode* )0;
871 }
872 
873 extern OnigRegion*
onig_region_new(void)874 onig_region_new(void)
875 {
876   OnigRegion* r;
877 
878   r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
879   CHECK_NULL_RETURN(r);
880   onig_region_init(r);
881   return r;
882 }
883 
884 extern void
onig_region_free(OnigRegion * r,int free_self)885 onig_region_free(OnigRegion* r, int free_self)
886 {
887   if (r != 0) {
888     if (r->allocated > 0) {
889       if (r->beg) xfree(r->beg);
890       if (r->end) xfree(r->end);
891       r->allocated = 0;
892     }
893 #ifdef USE_CAPTURE_HISTORY
894     history_root_free(r);
895 #endif
896     if (free_self) xfree(r);
897   }
898 }
899 
900 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)901 onig_region_copy(OnigRegion* to, OnigRegion* from)
902 {
903 #define RREGC_SIZE   (sizeof(int) * from->num_regs)
904   int i;
905 
906   if (to == from) return;
907 
908   if (to->allocated == 0) {
909     if (from->num_regs > 0) {
910       to->beg = (int* )xmalloc(RREGC_SIZE);
911       if (IS_NULL(to->beg)) return;
912       to->end = (int* )xmalloc(RREGC_SIZE);
913       if (IS_NULL(to->end)) return;
914       to->allocated = from->num_regs;
915     }
916   }
917   else if (to->allocated < from->num_regs) {
918     to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
919     if (IS_NULL(to->beg)) return;
920     to->end = (int* )xrealloc(to->end, RREGC_SIZE);
921     if (IS_NULL(to->end)) return;
922     to->allocated = from->num_regs;
923   }
924 
925   for (i = 0; i < from->num_regs; i++) {
926     to->beg[i] = from->beg[i];
927     to->end[i] = from->end[i];
928   }
929   to->num_regs = from->num_regs;
930 
931 #ifdef USE_CAPTURE_HISTORY
932   history_root_free(to);
933 
934   if (IS_NOT_NULL(from->history_root)) {
935     to->history_root = history_tree_clone(from->history_root);
936   }
937 #endif
938 }
939 
940 #ifdef USE_CALLOUT
941 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
942   args.in            = (ain);\
943   args.name_id       = (aname_id);\
944   args.num           = anum;\
945   args.regex         = reg;\
946   args.string        = str;\
947   args.string_end    = end;\
948   args.start         = sstart;\
949   args.right_range   = right_range;\
950   args.current       = s;\
951   args.retry_in_match_counter = retry_in_match_counter;\
952   args.msa           = msa;\
953   args.stk_base      = stk_base;\
954   args.stk           = stk;\
955   args.mem_start_stk = mem_start_stk;\
956   args.mem_end_stk   = mem_end_stk;\
957   result = (func)(&args, user);\
958 } while (0)
959 
960 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
961   int result;\
962   OnigCalloutArgs args;\
963   CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
964   switch (result) {\
965   case ONIG_CALLOUT_FAIL:\
966   case ONIG_CALLOUT_SUCCESS:\
967     break;\
968   default:\
969     if (result > 0) {\
970       result = ONIGERR_INVALID_ARGUMENT;\
971     }\
972     best_len = result;\
973     goto match_at_end;\
974     break;\
975   }\
976 } while(0)
977 #endif
978 
979 
980 /** stack **/
981 #define INVALID_STACK_INDEX   -1
982 
983 #define STK_ALT_FLAG               0x0001
984 
985 /* stack type */
986 /* used by normal-POP */
987 #define STK_SUPER_ALT             STK_ALT_FLAG
988 #define STK_ALT                   (0x0002 | STK_ALT_FLAG)
989 #define STK_ALT_PREC_READ_NOT     (0x0004 | STK_ALT_FLAG)
990 #define STK_ALT_LOOK_BEHIND_NOT   (0x0006 | STK_ALT_FLAG)
991 
992 /* handled by normal-POP */
993 #define STK_MEM_START              0x0010
994 #define STK_MEM_END                0x8030
995 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
996 #define STK_REPEAT_INC             (0x0040 | STK_MASK_POP_HANDLED)
997 #else
998 #define STK_REPEAT_INC             0x0040
999 #endif
1000 #ifdef USE_CALLOUT
1001 #define STK_CALLOUT                0x0070
1002 #endif
1003 
1004 /* avoided by normal-POP */
1005 #define STK_VOID                   0x0000  /* for fill a blank */
1006 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1007 #define STK_EMPTY_CHECK_START      (0x3000 | STK_MASK_POP_HANDLED)
1008 #else
1009 #define STK_EMPTY_CHECK_START      0x3000
1010 #endif
1011 #define STK_EMPTY_CHECK_END        0x5000  /* for recursive call */
1012 #define STK_MEM_END_MARK           0x8100
1013 #define STK_TO_VOID_START          0x1200  /* mark for "(?>...)" */
1014 /* #define STK_REPEAT                 0x0300 */
1015 #define STK_CALL_FRAME             0x0400
1016 #define STK_RETURN                 0x0500
1017 #define STK_SAVE_VAL               0x0600
1018 #define STK_PREC_READ_START        0x0700
1019 #define STK_PREC_READ_END          0x0800
1020 
1021 /* stack type check mask */
1022 #define STK_MASK_POP_USED          STK_ALT_FLAG
1023 #define STK_MASK_POP_HANDLED       0x0010
1024 #define STK_MASK_POP_HANDLED_TIL   (STK_MASK_POP_HANDLED | 0x0004)
1025 #define STK_MASK_TO_VOID_TARGET    0x100e
1026 #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */
1027 
1028 typedef intptr_t StackIndex;
1029 
1030 typedef struct _StackType {
1031   unsigned int type;
1032   int zid;
1033   union {
1034     struct {
1035       Operation* pcode;     /* byte code position */
1036       UChar*     pstr;      /* string position */
1037       UChar*     pstr_prev; /* previous char position of pstr */
1038     } state;
1039     struct {
1040       int        count;
1041 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1042       StackIndex prev_index;  /* index of stack */
1043 #endif
1044     } repeat_inc;
1045     struct {
1046       UChar *pstr;       /* start/end position */
1047       /* Following information is set, if this stack type is MEM-START */
1048       StackIndex prev_start;  /* prev. info (for backtrack  "(...)*" ) */
1049       StackIndex prev_end;    /* prev. info (for backtrack  "(...)*" ) */
1050     } mem;
1051     struct {
1052       UChar *pstr;            /* start position */
1053 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1054       StackIndex prev_index;  /* index of stack */
1055 #endif
1056     } empty_check;
1057 #ifdef USE_CALL
1058     struct {
1059       Operation *ret_addr; /* byte code position */
1060       UChar *pstr;         /* string position */
1061     } call_frame;
1062 #endif
1063     struct {
1064       enum SaveType type;
1065       UChar* v;
1066       UChar* v2;
1067     } val;
1068 #ifdef USE_CALLOUT
1069     struct {
1070       int num;
1071       OnigCalloutFunc func;
1072     } callout;
1073 #endif
1074   } u;
1075 } StackType;
1076 
1077 #ifdef USE_CALLOUT
1078 
1079 struct OnigCalloutArgsStruct {
1080   OnigCalloutIn    in;
1081   int              name_id;   /* name id or ONIG_NON_NAME_ID */
1082   int              num;
1083   OnigRegex        regex;
1084   const OnigUChar* string;
1085   const OnigUChar* string_end;
1086   const OnigUChar* start;
1087   const OnigUChar* right_range;
1088   const OnigUChar* current;  /* current matching position */
1089   unsigned long    retry_in_match_counter;
1090 
1091   /* invisible to users */
1092   MatchArg*   msa;
1093   StackType*  stk_base;
1094   StackType*  stk;
1095   StackIndex* mem_start_stk;
1096   StackIndex* mem_end_stk;
1097 };
1098 
1099 #endif
1100 
1101 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1102 
1103 #define PTR_NUM_SIZE(reg)  ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
1104 #define UPDATE_FOR_STACK_REALLOC do{\
1105   repeat_stk      = (StackIndex* )alloc_base;\
1106   empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1107   mem_start_stk   = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
1108   mem_end_stk     = mem_start_stk + num_mem + 1;\
1109 } while(0)
1110 
1111 #define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
1112 #define LOAD_TO_REPEAT_STK_VAR(sid)  repeat_stk[sid] = GET_STACK_INDEX(stk)
1113 #define POP_REPEAT_INC  else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
1114 
1115 #define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
1116 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)  empty_check_stk[sid] = GET_STACK_INDEX(stk)
1117 #define POP_EMPTY_CHECK_START  else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
1118 
1119 #else
1120 
1121 #define PTR_NUM_SIZE(reg)  (((reg)->num_mem + 1) * 2)
1122 #define UPDATE_FOR_STACK_REALLOC do{\
1123   mem_start_stk = (StackIndex* )alloc_base;\
1124   mem_end_stk   = mem_start_stk + num_mem + 1;\
1125 } while(0)
1126 
1127 #define SAVE_REPEAT_STK_VAR(sid)
1128 #define LOAD_TO_REPEAT_STK_VAR(sid)
1129 #define POP_REPEAT_INC
1130 
1131 #define SAVE_EMPTY_CHECK_STK_VAR(sid)
1132 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
1133 #define POP_EMPTY_CHECK_START
1134 
1135 #endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
1136 
1137 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1138 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1139   (msa).stack_p  = (void* )0;\
1140   (msa).options  = (arg_option);\
1141   (msa).region   = (arg_region);\
1142   (msa).start    = (arg_start);\
1143   (msa).match_stack_limit  = (mpv)->match_stack_limit;\
1144   (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
1145   (msa).mp = mpv;\
1146   (msa).best_len = ONIG_MISMATCH;\
1147   (msa).ptr_num  = PTR_NUM_SIZE(reg);\
1148 } while(0)
1149 #else
1150 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1151   (msa).stack_p  = (void* )0;\
1152   (msa).options  = (arg_option);\
1153   (msa).region   = (arg_region);\
1154   (msa).start    = (arg_start);\
1155   (msa).match_stack_limit  = (mpv)->match_stack_limit;\
1156   (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
1157   (msa).mp = mpv;\
1158   (msa).ptr_num  = PTR_NUM_SIZE(reg);\
1159 } while(0)
1160 #endif
1161 
1162 #define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p)
1163 
1164 
1165 #define ALLOCA_PTR_NUM_LIMIT   50
1166 
1167 #define STACK_INIT(stack_num)  do {\
1168   if (msa->stack_p) {\
1169     is_alloca  = 0;\
1170     alloc_base = msa->stack_p;\
1171     stk_base   = (StackType* )(alloc_base\
1172                  + (sizeof(StackIndex) * msa->ptr_num));\
1173     stk        = stk_base;\
1174     stk_end    = stk_base + msa->stack_n;\
1175   }\
1176   else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1177     is_alloca  = 0;\
1178     alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1179                   + sizeof(StackType) * (stack_num));\
1180     CHECK_NULL_RETURN_MEMERR(alloc_base);\
1181     stk_base   = (StackType* )(alloc_base\
1182                  + (sizeof(StackIndex) * msa->ptr_num));\
1183     stk        = stk_base;\
1184     stk_end    = stk_base + (stack_num);\
1185   }\
1186   else {\
1187     is_alloca  = 1;\
1188     alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\
1189                  + sizeof(StackType) * (stack_num));\
1190     CHECK_NULL_RETURN_MEMERR(alloc_base);\
1191     stk_base   = (StackType* )(alloc_base\
1192                  + (sizeof(StackIndex) * msa->ptr_num));\
1193     stk        = stk_base;\
1194     stk_end    = stk_base + (stack_num);\
1195   }\
1196 } while(0);
1197 
1198 
1199 #define STACK_SAVE do{\
1200     msa->stack_n = (int )(stk_end - stk_base);\
1201   if (is_alloca != 0) {\
1202     size_t size = sizeof(StackIndex) * msa->ptr_num \
1203                 + sizeof(StackType) * msa->stack_n;\
1204     msa->stack_p = xmalloc(size);\
1205     CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
1206     xmemcpy(msa->stack_p, alloc_base, size);\
1207   }\
1208   else {\
1209     msa->stack_p = alloc_base;\
1210   };\
1211 } while(0)
1212 
1213 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1214 
1215 extern unsigned int
onig_get_match_stack_limit_size(void)1216 onig_get_match_stack_limit_size(void)
1217 {
1218   return MatchStackLimit;
1219 }
1220 
1221 extern int
onig_set_match_stack_limit_size(unsigned int size)1222 onig_set_match_stack_limit_size(unsigned int size)
1223 {
1224   MatchStackLimit = size;
1225   return 0;
1226 }
1227 
1228 #ifdef USE_RETRY_LIMIT_IN_MATCH
1229 
1230 static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
1231 
1232 #define CHECK_RETRY_LIMIT_IN_MATCH  do {\
1233   if (retry_in_match_counter++ > retry_limit_in_match) {\
1234     MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\
1235   }\
1236 } while (0)
1237 
1238 #else
1239 
1240 #define CHECK_RETRY_LIMIT_IN_MATCH
1241 
1242 #endif /* USE_RETRY_LIMIT_IN_MATCH */
1243 
1244 extern unsigned long
onig_get_retry_limit_in_match(void)1245 onig_get_retry_limit_in_match(void)
1246 {
1247 #ifdef USE_RETRY_LIMIT_IN_MATCH
1248   return RetryLimitInMatch;
1249 #else
1250   /* return ONIG_NO_SUPPORT_CONFIG; */
1251   return 0;
1252 #endif
1253 }
1254 
1255 extern int
onig_set_retry_limit_in_match(unsigned long size)1256 onig_set_retry_limit_in_match(unsigned long size)
1257 {
1258 #ifdef USE_RETRY_LIMIT_IN_MATCH
1259   RetryLimitInMatch = size;
1260   return 0;
1261 #else
1262   return ONIG_NO_SUPPORT_CONFIG;
1263 #endif
1264 }
1265 
1266 #ifdef USE_CALLOUT
1267 static OnigCalloutFunc DefaultProgressCallout;
1268 static OnigCalloutFunc DefaultRetractionCallout;
1269 #endif
1270 
1271 extern OnigMatchParam*
onig_new_match_param(void)1272 onig_new_match_param(void)
1273 {
1274   OnigMatchParam* p;
1275 
1276   p = (OnigMatchParam* )xmalloc(sizeof(*p));
1277   if (IS_NOT_NULL(p)) {
1278     onig_initialize_match_param(p);
1279   }
1280 
1281   return p;
1282 }
1283 
1284 extern void
onig_free_match_param_content(OnigMatchParam * p)1285 onig_free_match_param_content(OnigMatchParam* p)
1286 {
1287 #ifdef USE_CALLOUT
1288   if (IS_NOT_NULL(p->callout_data)) {
1289     xfree(p->callout_data);
1290     p->callout_data = 0;
1291   }
1292 #endif
1293 }
1294 
1295 extern void
onig_free_match_param(OnigMatchParam * p)1296 onig_free_match_param(OnigMatchParam* p)
1297 {
1298   if (IS_NOT_NULL(p)) {
1299     onig_free_match_param_content(p);
1300     xfree(p);
1301   }
1302 }
1303 
1304 extern int
onig_initialize_match_param(OnigMatchParam * mp)1305 onig_initialize_match_param(OnigMatchParam* mp)
1306 {
1307   mp->match_stack_limit  = MatchStackLimit;
1308 #ifdef USE_RETRY_LIMIT_IN_MATCH
1309   mp->retry_limit_in_match = RetryLimitInMatch;
1310 #endif
1311 
1312 #ifdef USE_CALLOUT
1313   mp->progress_callout_of_contents   = DefaultProgressCallout;
1314   mp->retraction_callout_of_contents = DefaultRetractionCallout;
1315   mp->match_at_call_counter  = 0;
1316   mp->callout_user_data      = 0;
1317   mp->callout_data           = 0;
1318   mp->callout_data_alloc_num = 0;
1319 #endif
1320 
1321   return ONIG_NORMAL;
1322 }
1323 
1324 #ifdef USE_CALLOUT
1325 
1326 static int
adjust_match_param(regex_t * reg,OnigMatchParam * mp)1327 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1328 {
1329   RegexExt* ext = reg->extp;
1330 
1331   mp->match_at_call_counter = 0;
1332 
1333   if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1334 
1335   if (ext->callout_num > mp->callout_data_alloc_num) {
1336     CalloutData* d;
1337     size_t n = ext->callout_num * sizeof(*d);
1338     if (IS_NOT_NULL(mp->callout_data))
1339       d = (CalloutData* )xrealloc(mp->callout_data, n);
1340     else
1341       d = (CalloutData* )xmalloc(n);
1342     CHECK_NULL_RETURN_MEMERR(d);
1343 
1344     mp->callout_data = d;
1345     mp->callout_data_alloc_num = ext->callout_num;
1346   }
1347 
1348   xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1349   return ONIG_NORMAL;
1350 }
1351 
1352 #define ADJUST_MATCH_PARAM(reg, mp) \
1353   r = adjust_match_param(reg, mp);\
1354   if (r != ONIG_NORMAL) return r;
1355 
1356 #define CALLOUT_DATA_AT_NUM(mp, num)  ((mp)->callout_data + ((num) - 1))
1357 
1358 extern int
onig_check_callout_data_and_clear_old_values(OnigCalloutArgs * args)1359 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1360 {
1361   OnigMatchParam* mp;
1362   int num;
1363   CalloutData* d;
1364 
1365   mp  = args->msa->mp;
1366   num = args->num;
1367 
1368   d = CALLOUT_DATA_AT_NUM(mp, num);
1369   if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1370     xmemset(d, 0, sizeof(*d));
1371     d->last_match_at_call_counter = mp->match_at_call_counter;
1372     return d->last_match_at_call_counter;
1373   }
1374 
1375   return 0;
1376 }
1377 
1378 extern int
onig_get_callout_data_dont_clear_old(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1379 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1380                                      int callout_num, int slot,
1381                                      OnigType* type, OnigValue* val)
1382 {
1383   OnigType t;
1384   CalloutData* d;
1385 
1386   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1387 
1388   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1389   t = d->slot[slot].type;
1390   if (IS_NOT_NULL(type)) *type = t;
1391   if (IS_NOT_NULL(val))  *val  = d->slot[slot].val;
1392   return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1393 }
1394 
1395 extern int
onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1396 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
1397                                                           int slot, OnigType* type,
1398                                                           OnigValue* val)
1399 {
1400   return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1401                                               args->num, slot, type, val);
1402 }
1403 
1404 extern int
onig_get_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1405 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1406                       int callout_num, int slot,
1407                       OnigType* type, OnigValue* val)
1408 {
1409   OnigType t;
1410   CalloutData* d;
1411 
1412   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1413 
1414   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1415   if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1416     xmemset(d, 0, sizeof(*d));
1417     d->last_match_at_call_counter = mp->match_at_call_counter;
1418   }
1419 
1420   t = d->slot[slot].type;
1421   if (IS_NOT_NULL(type)) *type = t;
1422   if (IS_NOT_NULL(val))  *val  = d->slot[slot].val;
1423   return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1424 }
1425 
1426 extern int
onig_get_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType * type,OnigValue * val)1427 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1428                              const UChar* tag, const UChar* tag_end, int slot,
1429                              OnigType* type, OnigValue* val)
1430 {
1431   int num;
1432 
1433   num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1434   if (num < 0)  return num;
1435   if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1436 
1437   return onig_get_callout_data(reg, mp, num, slot, type, val);
1438 }
1439 
1440 extern int
onig_get_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType * type,OnigValue * val)1441 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1442                                       int callout_num, int slot,
1443                                       OnigType* type, OnigValue* val)
1444 {
1445   return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1446                                type, val);
1447 }
1448 
1449 extern int
onig_get_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1450 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1451                                            int slot, OnigType* type, OnigValue* val)
1452 {
1453   return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1454                                type, val);
1455 }
1456 
1457 extern int
onig_set_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType type,OnigValue * val)1458 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1459                       int callout_num, int slot,
1460                       OnigType type, OnigValue* val)
1461 {
1462   CalloutData* d;
1463 
1464   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1465 
1466   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1467   d->slot[slot].type = type;
1468   d->slot[slot].val  = *val;
1469   d->last_match_at_call_counter = mp->match_at_call_counter;
1470 
1471   return ONIG_NORMAL;
1472 }
1473 
1474 extern int
onig_set_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType type,OnigValue * val)1475 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1476                              const UChar* tag, const UChar* tag_end, int slot,
1477                              OnigType type, OnigValue* val)
1478 {
1479   int num;
1480 
1481   num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1482   if (num < 0)  return num;
1483   if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1484 
1485   return onig_set_callout_data(reg, mp, num, slot, type, val);
1486 }
1487 
1488 extern int
onig_set_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType type,OnigValue * val)1489 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1490                                       int callout_num, int slot,
1491                                       OnigType type, OnigValue* val)
1492 {
1493   return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1494                                type, val);
1495 }
1496 
1497 extern int
onig_set_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType type,OnigValue * val)1498 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1499                                            int slot, OnigType type, OnigValue* val)
1500 {
1501   return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1502                                type, val);
1503 }
1504 
1505 #else
1506 #define ADJUST_MATCH_PARAM(reg, mp)
1507 #endif /* USE_CALLOUT */
1508 
1509 
1510 static int
stack_double(int is_alloca,char ** arg_alloc_base,StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,MatchArg * msa)1511 stack_double(int is_alloca, char** arg_alloc_base,
1512              StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk,
1513              MatchArg* msa)
1514 {
1515   unsigned int n;
1516   int used;
1517   size_t size;
1518   size_t new_size;
1519   char* alloc_base;
1520   char* new_alloc_base;
1521   StackType *stk_base, *stk_end, *stk;
1522 
1523   alloc_base = *arg_alloc_base;
1524   stk_base = *arg_stk_base;
1525   stk_end  = *arg_stk_end;
1526   stk      = *arg_stk;
1527 
1528   n = (unsigned int )(stk_end - stk_base);
1529   size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1530   n *= 2;
1531   new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1532   if (is_alloca != 0) {
1533     new_alloc_base = (char* )xmalloc(new_size);
1534     if (IS_NULL(new_alloc_base)) {
1535       STACK_SAVE;
1536       return ONIGERR_MEMORY;
1537     }
1538     xmemcpy(new_alloc_base, alloc_base, size);
1539   }
1540   else {
1541     if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1542       if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)
1543         return ONIGERR_MATCH_STACK_LIMIT_OVER;
1544       else
1545         n = msa->match_stack_limit;
1546     }
1547     new_alloc_base = (char* )xrealloc(alloc_base, new_size);
1548     if (IS_NULL(new_alloc_base)) {
1549       STACK_SAVE;
1550       return ONIGERR_MEMORY;
1551     }
1552   }
1553 
1554   alloc_base = new_alloc_base;
1555   used = (int )(stk - stk_base);
1556   *arg_alloc_base = alloc_base;
1557   *arg_stk_base   = (StackType* )(alloc_base
1558                                   + (sizeof(StackIndex) * msa->ptr_num));
1559   *arg_stk      = *arg_stk_base + used;
1560   *arg_stk_end  = *arg_stk_base + n;
1561   return 0;
1562 }
1563 
1564 #define STACK_ENSURE(n) do {\
1565     if ((int )(stk_end - stk) < (n)) {\
1566     int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1567     if (r != 0) { STACK_SAVE; return r; } \
1568     is_alloca = 0;\
1569     UPDATE_FOR_STACK_REALLOC;\
1570   }\
1571 } while(0)
1572 
1573 #define STACK_AT(index)        (stk_base + (index))
1574 #define GET_STACK_INDEX(stk)   ((stk) - stk_base)
1575 
1576 #define STACK_PUSH_TYPE(stack_type) do {\
1577   STACK_ENSURE(1);\
1578   stk->type = (stack_type);\
1579   STACK_INC;\
1580 } while(0)
1581 
1582 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1583 
1584 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
1585   STACK_ENSURE(1);\
1586   stk->type = (stack_type);\
1587   stk->u.state.pcode     = (pat);\
1588   stk->u.state.pstr      = (s);\
1589   stk->u.state.pstr_prev = (sprev);\
1590   STACK_INC;\
1591 } while(0)
1592 
1593 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1594   stk->type = (stack_type);\
1595   stk->u.state.pcode = (pat);\
1596   STACK_INC;\
1597 } while(0)
1598 
1599 #ifdef ONIG_DEBUG_MATCH
1600 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1601   stk->type = (stack_type);\
1602   stk->u.state.pcode = (pat);\
1603   stk->u.state.pstr      = s;\
1604   stk->u.state.pstr_prev = sprev;\
1605   STACK_INC;\
1606 } while (0)
1607 #else
1608 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1609   stk->type = (stack_type);\
1610   stk->u.state.pcode = (pat);\
1611   STACK_INC;\
1612 } while (0)
1613 #endif
1614 
1615 #define STACK_PUSH_ALT(pat,s,sprev)       STACK_PUSH(STK_ALT,pat,s,sprev)
1616 #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
1617 #define STACK_PUSH_PREC_READ_START(s,sprev) \
1618   STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
1619 #define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
1620   STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
1621 #define STACK_PUSH_TO_VOID_START        STACK_PUSH_TYPE(STK_TO_VOID_START)
1622 #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
1623   STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
1624 
1625 #if 0
1626 #define STACK_PUSH_REPEAT(sid, pat) do {\
1627   STACK_ENSURE(1);\
1628   stk->type = STK_REPEAT;\
1629   stk->zid  = (sid);\
1630   stk->u.repeat.pcode = (pat);\
1631   STACK_INC;\
1632 } while(0)
1633 #endif
1634 
1635 #define STACK_PUSH_REPEAT_INC(sid, ct) do {\
1636   STACK_ENSURE(1);\
1637   stk->type = STK_REPEAT_INC;\
1638   stk->zid  = (sid);\
1639   stk->u.repeat_inc.count = (ct);\
1640   SAVE_REPEAT_STK_VAR(sid);\
1641   LOAD_TO_REPEAT_STK_VAR(sid);\
1642   STACK_INC;\
1643 } while(0)
1644 
1645 #define STACK_PUSH_MEM_START(mnum, s) do {\
1646   STACK_ENSURE(1);\
1647   stk->type = STK_MEM_START;\
1648   stk->zid  = (mnum);\
1649   stk->u.mem.pstr       = (s);\
1650   stk->u.mem.prev_start = mem_start_stk[mnum];\
1651   stk->u.mem.prev_end   = mem_end_stk[mnum];\
1652   mem_start_stk[mnum]   = GET_STACK_INDEX(stk);\
1653   mem_end_stk[mnum]     = INVALID_STACK_INDEX;\
1654   STACK_INC;\
1655 } while(0)
1656 
1657 #define STACK_PUSH_MEM_END(mnum, s) do {\
1658   STACK_ENSURE(1);\
1659   stk->type = STK_MEM_END;\
1660   stk->zid  = (mnum);\
1661   stk->u.mem.pstr       = (s);\
1662   stk->u.mem.prev_start = mem_start_stk[mnum];\
1663   stk->u.mem.prev_end   = mem_end_stk[mnum];\
1664   mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1665   STACK_INC;\
1666 } while(0)
1667 
1668 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1669   STACK_ENSURE(1);\
1670   stk->type = STK_MEM_END_MARK;\
1671   stk->zid  = (mnum);\
1672   STACK_INC;\
1673 } while(0)
1674 
1675 #define STACK_GET_MEM_START(mnum, k) do {\
1676   int level = 0;\
1677   k = stk;\
1678   while (k > stk_base) {\
1679     k--;\
1680     if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1681       && k->zid == (mnum)) {\
1682       level++;\
1683     }\
1684     else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1685       if (level == 0) break;\
1686       level--;\
1687     }\
1688   }\
1689 } while(0)
1690 
1691 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1692   int level = 0;\
1693   while (k < stk) {\
1694     if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1695       if (level == 0) (start) = k->u.mem.pstr;\
1696       level++;\
1697     }\
1698     else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1699       level--;\
1700       if (level == 0) {\
1701         (end) = k->u.mem.pstr;\
1702         break;\
1703       }\
1704     }\
1705     k++;\
1706   }\
1707 } while(0)
1708 
1709 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1710   STACK_ENSURE(1);\
1711   stk->type = STK_EMPTY_CHECK_START;\
1712   stk->zid  = (cnum);\
1713   stk->u.empty_check.pstr = (s);\
1714   SAVE_EMPTY_CHECK_STK_VAR(cnum);\
1715   LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
1716   STACK_INC;\
1717 } while(0)
1718 
1719 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1720   STACK_ENSURE(1);\
1721   stk->type = STK_EMPTY_CHECK_END;\
1722   stk->zid  = (cnum);\
1723   STACK_INC;\
1724 } while(0)
1725 
1726 #define STACK_PUSH_CALL_FRAME(pat) do {\
1727   STACK_ENSURE(1);\
1728   stk->type = STK_CALL_FRAME;\
1729   stk->u.call_frame.ret_addr = (pat);\
1730   STACK_INC;\
1731 } while(0)
1732 
1733 #define STACK_PUSH_RETURN do {\
1734   STACK_ENSURE(1);\
1735   stk->type = STK_RETURN;\
1736   STACK_INC;\
1737 } while(0)
1738 
1739 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1740   STACK_ENSURE(1);\
1741   stk->type = STK_SAVE_VAL;\
1742   stk->zid  = (sid);\
1743   stk->u.val.type = (stype);\
1744   stk->u.val.v    = (UChar* )(sval);\
1745   STACK_INC;\
1746 } while(0)
1747 
1748 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1749   STACK_ENSURE(1);\
1750   stk->type = STK_SAVE_VAL;\
1751   stk->zid  = (sid);\
1752   stk->u.val.type = (stype);\
1753   stk->u.val.v    = (UChar* )(sval);\
1754   stk->u.val.v2   = sprev;\
1755   STACK_INC;\
1756 } while(0)
1757 
1758 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1759   StackType *k = stk;\
1760   while (k > stk_base) {\
1761     k--;\
1762     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1763     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1764       (sval) = k->u.val.v;\
1765       break;\
1766     }\
1767   }\
1768 } while (0)
1769 
1770 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
1771   int level = 0;\
1772   StackType *k = stk;\
1773   while (k > stk_base) {\
1774     k--;\
1775     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1776     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1777         && k->zid == (sid)) {\
1778       if (level == 0) {\
1779         (sval) = k->u.val.v;\
1780         break;\
1781       }\
1782     }\
1783     else if (k->type == STK_CALL_FRAME)\
1784       level--;\
1785     else if (k->type == STK_RETURN)\
1786       level++;\
1787   }\
1788 } while (0)
1789 
1790 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1791   int level = 0;\
1792   StackType *k = stk;\
1793   while (k > stk_base) {\
1794     k--;\
1795     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1796     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1797         && k->zid == (sid)) {\
1798       if (level == 0) {\
1799         (sval) = k->u.val.v;\
1800         sprev  = k->u.val.v2;\
1801         break;\
1802       }\
1803     }\
1804     else if (k->type == STK_CALL_FRAME)\
1805       level--;\
1806     else if (k->type == STK_RETURN)\
1807       level++;\
1808   }\
1809 } while (0)
1810 
1811 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
1812   int level = 0;\
1813   StackType *k = (stk_from);\
1814   while (k > stk_base) {\
1815     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
1816     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1817         && k->u.val.id == (sid)) {\
1818       if (level == 0) {\
1819         (sval) = k->u.val.v;\
1820         break;\
1821       }\
1822     }\
1823     else if (k->type == STK_CALL_FRAME)\
1824       level--;\
1825     else if (k->type == STK_RETURN)\
1826       level++;\
1827     k--;\
1828   }\
1829 } while (0)
1830 
1831 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1832   STACK_ENSURE(1);\
1833   stk->type = STK_CALLOUT;\
1834   stk->zid  = ONIG_NON_NAME_ID;\
1835   stk->u.callout.num = (anum);\
1836   stk->u.callout.func = (func);\
1837   STACK_INC;\
1838 } while(0)
1839 
1840 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1841   STACK_ENSURE(1);\
1842   stk->type = STK_CALLOUT;\
1843   stk->zid  = (aid);\
1844   stk->u.callout.num = (anum);\
1845   stk->u.callout.func = (func);\
1846   STACK_INC;\
1847 } while(0)
1848 
1849 #ifdef ONIG_DEBUG
1850 #define STACK_BASE_CHECK(p, at) \
1851   if ((p) < stk_base) {\
1852     fprintf(stderr, "at %s\n", at);\
1853     MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
1854   }
1855 #else
1856 #define STACK_BASE_CHECK(p, at)
1857 #endif
1858 
1859 #define STACK_POP_ONE do {\
1860   stk--;\
1861   STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1862 } while(0)
1863 
1864 
1865 #ifdef USE_CALLOUT
1866 #define POP_CALLOUT_CASE \
1867   else if (stk->type == STK_CALLOUT) {\
1868     RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
1869   }
1870 #else
1871 #define POP_CALLOUT_CASE
1872 #endif
1873 
1874 #define STACK_POP  do {\
1875   switch (pop_level) {\
1876   case STACK_POP_LEVEL_FREE:\
1877     while (1) {\
1878       stk--;\
1879       STACK_BASE_CHECK(stk, "STACK_POP"); \
1880       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1881     }\
1882     break;\
1883   case STACK_POP_LEVEL_MEM_START:\
1884     while (1) {\
1885       stk--;\
1886       STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1887       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1888       else if (stk->type == STK_MEM_START) {\
1889         mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1890         mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1891       }\
1892     }\
1893     break;\
1894   default:\
1895     while (1) {\
1896       stk--;\
1897       STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1898       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1899       else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
1900         if (stk->type == STK_MEM_START) {\
1901           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1902           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1903         }\
1904         else if (stk->type == STK_MEM_END) {\
1905           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1906           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1907         }\
1908         POP_REPEAT_INC \
1909         POP_EMPTY_CHECK_START \
1910         POP_CALLOUT_CASE\
1911       }\
1912     }\
1913     break;\
1914   }\
1915 } while(0)
1916 
1917 #define POP_TIL_BODY(aname, til_type) do {\
1918   while (1) {\
1919     stk--;\
1920     STACK_BASE_CHECK(stk, (aname));\
1921     if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
1922       if (stk->type == (til_type)) break;\
1923       else {\
1924         if (stk->type == STK_MEM_START) {\
1925           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1926           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1927         }\
1928         else if (stk->type == STK_MEM_END) {\
1929           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1930           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1931         }\
1932         POP_REPEAT_INC \
1933         POP_EMPTY_CHECK_START \
1934         /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
1935       }\
1936     }\
1937   }\
1938 } while(0)
1939 
1940 #define STACK_POP_TIL_ALT_PREC_READ_NOT  do {\
1941   POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
1942 } while(0)
1943 
1944 #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT  do {\
1945   POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
1946 } while(0)
1947 
1948 
1949 #define STACK_EXEC_TO_VOID(k) do {\
1950   k = stk;\
1951   while (1) {\
1952     k--;\
1953     STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
1954     if (IS_TO_VOID_TARGET(k)) {\
1955       if (k->type == STK_TO_VOID_START) {\
1956         k->type = STK_VOID;\
1957         break;\
1958       }\
1959       k->type = STK_VOID;\
1960     }\
1961   }\
1962 } while(0)
1963 
1964 #define STACK_GET_PREC_READ_START(k) do {\
1965   int level = 0;\
1966   k = stk;\
1967   while (1) {\
1968     k--;\
1969     STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
1970     if (IS_TO_VOID_TARGET(k)) {\
1971       k->type = STK_VOID;\
1972     }\
1973     else if (k->type == STK_PREC_READ_START) {\
1974       if (level == 0) {\
1975         break;\
1976       }\
1977       level--;\
1978     }\
1979     else if (k->type == STK_PREC_READ_END) {\
1980       level++;\
1981     }\
1982   }\
1983 } while(0)
1984 
1985 
1986 #define EMPTY_CHECK_START_SEARCH(sid, k) do {\
1987   k = stk;\
1988   while (1) {\
1989     k--;\
1990     STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
1991     if (k->type == STK_EMPTY_CHECK_START) {\
1992       if (k->zid == (sid)) break;\
1993     }\
1994   }\
1995 } while(0)
1996 
1997 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1998 
1999 #define GET_EMPTY_CHECK_START(sid, k) do {\
2000   if (reg->num_call == 0) {\
2001     k = STACK_AT(empty_check_stk[sid]);\
2002   }\
2003   else {\
2004     EMPTY_CHECK_START_SEARCH(sid, k);\
2005   }\
2006 } while(0)
2007 #else
2008 
2009 #define GET_EMPTY_CHECK_START(sid, k)  EMPTY_CHECK_START_SEARCH(sid, k)
2010 
2011 #endif
2012 
2013 
2014 #define STACK_EMPTY_CHECK(isnull, sid, s) do {\
2015   StackType* k;\
2016   GET_EMPTY_CHECK_START(sid, k);\
2017   (isnull) = (k->u.empty_check.pstr == (s));\
2018 } while(0)
2019 
2020 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
2021   if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
2022     (addr) = 0;\
2023   }\
2024   else {\
2025     if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
2026       (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
2027     else\
2028       (addr) = (UChar* )k->u.mem.prev_end;\
2029   }\
2030 } while (0)
2031 
2032 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
2033 #define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
2034   StackType* k;\
2035   GET_EMPTY_CHECK_START(sid, k);\
2036   if (k->u.empty_check.pstr != (s)) {\
2037     (isnull) = 0;\
2038   }\
2039   else {\
2040     UChar* endp;\
2041     (isnull) = 1;\
2042     while (k < stk) {\
2043       if (k->type == STK_MEM_START &&\
2044         MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
2045         STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2046         if (endp == 0) {\
2047           (isnull) = 0; break;\
2048         }\
2049         else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
2050           (isnull) = 0; break;\
2051         }\
2052         else if (endp != s) {\
2053           (isnull) = -1; /* empty, but position changed */ \
2054         }\
2055       }\
2056       k++;\
2057     }\
2058   }\
2059 } while(0)
2060 
2061 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
2062   int level = 0;\
2063   StackType* k = stk;\
2064   while (1) {\
2065     k--;\
2066     STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
2067     if (k->type == STK_EMPTY_CHECK_START) {\
2068       if (k->zid == (sid)) {\
2069         if (level == 0) {\
2070           if (k->u.empty_check.pstr != (s)) {\
2071             (isnull) = 0;\
2072             break;\
2073           }\
2074           else {\
2075             UChar* endp;\
2076             (isnull) = 1;\
2077             while (k < stk) {\
2078               if (k->type == STK_MEM_START) {\
2079                 if (level == 0 && \
2080                   MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\
2081                   STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2082                   if (endp == 0) {\
2083                     (isnull) = 0; break;\
2084                   }\
2085                   else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
2086                     (isnull) = 0; break;\
2087                   }\
2088                   else if (endp != s) {\
2089                     (isnull) = -1; /* empty, but position changed */\
2090                   }\
2091                 }\
2092               }\
2093               else if (k->type == STK_EMPTY_CHECK_START) {\
2094                 if (k->zid == (sid)) level++;\
2095               }\
2096               else if (k->type == STK_EMPTY_CHECK_END) {\
2097                 if (k->zid == (sid)) level--;\
2098               }\
2099               k++;\
2100             }\
2101             break;\
2102           }\
2103         }\
2104         else {\
2105           level--;\
2106         }\
2107       }\
2108     }\
2109     else if (k->type == STK_EMPTY_CHECK_END) {\
2110       if (k->zid == (sid)) level++;\
2111     }\
2112   }\
2113 } while(0)
2114 #else
2115 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2116   int level = 0;\
2117   StackType* k = stk;\
2118   while (1) {\
2119     k--;\
2120     STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2121     if (k->type == STK_EMPTY_CHECK_START) {\
2122       if (k->u.empty_check.num == (id)) {\
2123         if (level == 0) {\
2124           (isnull) = (k->u.empty_check.pstr == (s));\
2125           break;\
2126         }\
2127       }\
2128       level--;\
2129     }\
2130     else if (k->type == STK_EMPTY_CHECK_END) {\
2131       level++;\
2132     }\
2133   }\
2134 } while(0)
2135 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2136 
2137 #define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
2138   StackType* k = stk;\
2139   while (1) {\
2140     (k)--;\
2141     STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
2142     if ((k)->type == STK_REPEAT_INC) {\
2143       if ((k)->zid == (sid)) {\
2144         (c) = (k)->u.repeat_inc.count;\
2145         break;\
2146       }\
2147     }\
2148     else if ((k)->type == STK_RETURN) {\
2149       int level = -1;\
2150       while (1) {\
2151         (k)--;\
2152         if ((k)->type == STK_CALL_FRAME) {\
2153           level++;\
2154           if (level == 0) break;\
2155         }\
2156         else if ((k)->type == STK_RETURN) level--;\
2157       }\
2158     }\
2159   }\
2160 } while(0)
2161 
2162 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2163 
2164 #define STACK_GET_REPEAT_COUNT(sid, c) do {\
2165   if (reg->num_call == 0) {\
2166     (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
2167   }\
2168   else {\
2169     STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
2170   }\
2171 } while(0)
2172 #else
2173 #define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
2174 #endif
2175 
2176 #define STACK_RETURN(addr)  do {\
2177   int level = 0;\
2178   StackType* k = stk;\
2179   while (1) {\
2180     k--;\
2181     STACK_BASE_CHECK(k, "STACK_RETURN"); \
2182     if (k->type == STK_CALL_FRAME) {\
2183       if (level == 0) {\
2184         (addr) = k->u.call_frame.ret_addr;\
2185         break;\
2186       }\
2187       else level--;\
2188     }\
2189     else if (k->type == STK_RETURN)\
2190       level++;\
2191   }\
2192 } while(0)
2193 
2194 
2195 #define STRING_CMP(s1,s2,len) do {\
2196   while (len-- > 0) {\
2197     if (*s1++ != *s2++) goto fail;\
2198   }\
2199 } while(0)
2200 
2201 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2202   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2203     goto fail; \
2204 } while(0)
2205 
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)2206 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2207                          UChar* s1, UChar** ps2, int mblen)
2208 {
2209   UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2210   UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2211   UChar *p1, *p2, *end1, *s2, *end2;
2212   int len1, len2;
2213 
2214   s2   = *ps2;
2215   end1 = s1 + mblen;
2216   end2 = s2 + mblen;
2217   while (s1 < end1) {
2218     len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2219     len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2220     if (len1 != len2) return 0;
2221     p1 = buf1;
2222     p2 = buf2;
2223     while (len1-- > 0) {
2224       if (*p1 != *p2) return 0;
2225       p1++;
2226       p2++;
2227     }
2228   }
2229 
2230   *ps2 = s2;
2231   return 1;
2232 }
2233 
2234 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2235   is_fail = 0;\
2236   while (len-- > 0) {\
2237     if (*s1++ != *s2++) {\
2238       is_fail = 1; break;\
2239     }\
2240   }\
2241 } while(0)
2242 
2243 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2244   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2245     is_fail = 1; \
2246   else \
2247     is_fail = 0; \
2248 } while(0)
2249 
2250 
2251 #define IS_EMPTY_STR           (str == end)
2252 #define ON_STR_BEGIN(s)        ((s) == str)
2253 #define ON_STR_END(s)          ((s) == end)
2254 #define DATA_ENSURE_CHECK1     (s < right_range)
2255 #define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range)
2256 #define DATA_ENSURE(n)         if (s + (n) > right_range) goto fail
2257 
2258 #define INIT_RIGHT_RANGE    right_range = (UChar* )in_right_range
2259 
2260 #ifdef USE_CAPTURE_HISTORY
2261 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)2262 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2263                           StackType* stk_top, UChar* str, regex_t* reg)
2264 {
2265   int n, r;
2266   OnigCaptureTreeNode* child;
2267   StackType* k = *kp;
2268 
2269   while (k < stk_top) {
2270     if (k->type == STK_MEM_START) {
2271       n = k->zid;
2272       if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2273           MEM_STATUS_AT(reg->capture_history, n) != 0) {
2274         child = history_node_new();
2275         CHECK_NULL_RETURN_MEMERR(child);
2276         child->group = n;
2277         child->beg = (int )(k->u.mem.pstr - str);
2278         r = history_tree_add_child(node, child);
2279         if (r != 0) return r;
2280         *kp = (k + 1);
2281         r = make_capture_history_tree(child, kp, stk_top, str, reg);
2282         if (r != 0) return r;
2283 
2284         k = *kp;
2285         child->end = (int )(k->u.mem.pstr - str);
2286       }
2287     }
2288     else if (k->type == STK_MEM_END) {
2289       if (k->zid == node->group) {
2290         node->end = (int )(k->u.mem.pstr - str);
2291         *kp = k;
2292         return 0;
2293       }
2294     }
2295     k++;
2296   }
2297 
2298   return 1; /* 1: root node ending. */
2299 }
2300 #endif
2301 
2302 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,MemNumType * memp)2303 static int mem_is_in_memp(int mem, int num, MemNumType* memp)
2304 {
2305   int i;
2306 
2307   for (i = 0; i < num; i++) {
2308     if (mem == (int )memp[i]) return 1;
2309   }
2310   return 0;
2311 }
2312 
2313 static int
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,MemNumType * memp,UChar ** s,const UChar * send)2314 backref_match_at_nested_level(regex_t* reg,
2315                               StackType* top, StackType* stk_base,
2316                               int ignore_case, int case_fold_flag,
2317                               int nest, int mem_num, MemNumType* memp,
2318                               UChar** s, const UChar* send)
2319 {
2320   UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2321   int level;
2322   StackType* k;
2323 
2324   level = 0;
2325   k = top;
2326   k--;
2327   while (k >= stk_base) {
2328     if (k->type == STK_CALL_FRAME) {
2329       level--;
2330     }
2331     else if (k->type == STK_RETURN) {
2332       level++;
2333     }
2334     else if (level == nest) {
2335       if (k->type == STK_MEM_START) {
2336         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2337           pstart = k->u.mem.pstr;
2338           if (IS_NOT_NULL(pend)) {
2339             if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2340             p  = pstart;
2341             ss = *s;
2342 
2343             if (ignore_case != 0) {
2344               if (string_cmp_ic(reg->enc, case_fold_flag,
2345                                 pstart, &ss, (int )(pend - pstart)) == 0)
2346                 return 0; /* or goto next_mem; */
2347             }
2348             else {
2349               while (p < pend) {
2350                 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2351               }
2352             }
2353 
2354             *s = ss;
2355             return 1;
2356           }
2357         }
2358       }
2359       else if (k->type == STK_MEM_END) {
2360         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2361           pend = k->u.mem.pstr;
2362         }
2363       }
2364     }
2365     k--;
2366   }
2367 
2368   return 0;
2369 }
2370 
2371 static int
backref_check_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int nest,int mem_num,MemNumType * memp)2372 backref_check_at_nested_level(regex_t* reg,
2373                               StackType* top, StackType* stk_base,
2374                               int nest, int mem_num, MemNumType* memp)
2375 {
2376   int level;
2377   StackType* k;
2378 
2379   level = 0;
2380   k = top;
2381   k--;
2382   while (k >= stk_base) {
2383     if (k->type == STK_CALL_FRAME) {
2384       level--;
2385     }
2386     else if (k->type == STK_RETURN) {
2387       level++;
2388     }
2389     else if (level == nest) {
2390       if (k->type == STK_MEM_END) {
2391         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2392           return 1;
2393         }
2394       }
2395     }
2396     k--;
2397   }
2398 
2399   return 0;
2400 }
2401 #endif /* USE_BACKREF_WITH_LEVEL */
2402 
2403 
2404 #ifdef ONIG_DEBUG_STATISTICS
2405 
2406 #define USE_TIMEOFDAY
2407 
2408 #ifdef USE_TIMEOFDAY
2409 #ifdef HAVE_SYS_TIME_H
2410 #include <sys/time.h>
2411 #endif
2412 #ifdef HAVE_UNISTD_H
2413 #include <unistd.h>
2414 #endif
2415 static struct timeval ts, te;
2416 #define GETTIME(t)        gettimeofday(&(t), (struct timezone* )0)
2417 #define TIMEDIFF(te,ts)   (((te).tv_usec - (ts).tv_usec) + \
2418                            (((te).tv_sec - (ts).tv_sec)*1000000))
2419 #else
2420 #ifdef HAVE_SYS_TIMES_H
2421 #include <sys/times.h>
2422 #endif
2423 static struct tms ts, te;
2424 #define GETTIME(t)         times(&(t))
2425 #define TIMEDIFF(te,ts)   ((te).tms_utime - (ts).tms_utime)
2426 #endif
2427 
2428 static int OpCounter[256];
2429 static int OpPrevCounter[256];
2430 static unsigned long OpTime[256];
2431 static int OpCurr = OP_FINISH;
2432 static int OpPrevTarget = OP_FAIL;
2433 static int MaxStackDepth = 0;
2434 
2435 #define SOP_IN(opcode) do {\
2436   if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2437   OpCurr = opcode;\
2438   OpCounter[opcode]++;\
2439   GETTIME(ts);\
2440 } while(0)
2441 
2442 #define SOP_OUT do {\
2443   GETTIME(te);\
2444   OpTime[OpCurr] += TIMEDIFF(te, ts);\
2445 } while(0)
2446 
2447 extern void
onig_statistics_init(void)2448 onig_statistics_init(void)
2449 {
2450   int i;
2451   for (i = 0; i < 256; i++) {
2452     OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2453   }
2454   MaxStackDepth = 0;
2455 }
2456 
2457 extern int
onig_print_statistics(FILE * f)2458 onig_print_statistics(FILE* f)
2459 {
2460   int r;
2461   int i;
2462 
2463   r = fprintf(f, "   count      prev        time\n");
2464   if (r < 0) return -1;
2465 
2466   for (i = 0; OpInfo[i].opcode >= 0; i++) {
2467     r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2468                 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2469     if (r < 0) return -1;
2470   }
2471   r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2472   if (r < 0) return -1;
2473 
2474   return 0;
2475 }
2476 
2477 #define STACK_INC do {\
2478   stk++;\
2479   if (stk - stk_base > MaxStackDepth) \
2480     MaxStackDepth = stk - stk_base;\
2481 } while(0)
2482 
2483 #else
2484 #define STACK_INC     stk++
2485 
2486 #define SOP_IN(opcode)
2487 #define SOP_OUT
2488 #endif
2489 
2490 
2491 /* matching region of POSIX API */
2492 typedef int regoff_t;
2493 
2494 typedef struct {
2495   regoff_t  rm_so;
2496   regoff_t  rm_eo;
2497 } posix_regmatch_t;
2498 
2499 
2500 
2501 #ifdef USE_THREADED_CODE
2502 
2503 #define BYTECODE_INTERPRETER_START      GOTO_OP;
2504 #define BYTECODE_INTERPRETER_END
2505 #define CASE_OP(x)   L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
2506 #define DEFAULT_OP   /* L_DEFAULT: */
2507 #define NEXT_OP      sprev = sbegin; JUMP_OP
2508 #define JUMP_OP      GOTO_OP
2509 #ifdef USE_DIRECT_THREADED_CODE
2510 #define GOTO_OP      goto *(p->opaddr)
2511 #else
2512 #define GOTO_OP      goto *opcode_to_label[p->opcode]
2513 #endif
2514 #define BREAK_OP     /* Nothing */
2515 
2516 #else
2517 
2518 #define BYTECODE_INTERPRETER_START \
2519   while (1) {\
2520   MATCH_DEBUG_OUT(0)\
2521   sbegin = s;\
2522   switch (p->opcode) {
2523 #define BYTECODE_INTERPRETER_END  } sprev = sbegin; }
2524 #define CASE_OP(x)   case OP_##x: SOP_IN(OP_##x);
2525 #define DEFAULT_OP   default:
2526 #define NEXT_OP      break
2527 #define JUMP_OP      GOTO_OP
2528 #define GOTO_OP      continue; break
2529 #define BREAK_OP     break
2530 
2531 #endif /* USE_THREADED_CODE */
2532 
2533 #define INC_OP       p++
2534 #define NEXT_OUT     SOP_OUT; NEXT_OP
2535 #define JUMP_OUT     SOP_OUT; JUMP_OP
2536 #define BREAK_OUT    SOP_OUT; BREAK_OP
2537 #define CHECK_INTERRUPT_JUMP_OUT  SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2538 
2539 
2540 #ifdef ONIG_DEBUG_MATCH
2541 #define MATCH_DEBUG_OUT(offset) do {\
2542       Operation *xp;\
2543       UChar *q, *bp, buf[50];\
2544       int len, spos;\
2545       spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2546       xp = p - (offset);\
2547       fprintf(stderr, "%7u: %7ld: %4d> \"",\
2548               counter, GET_STACK_INDEX(stk), spos);\
2549       counter++;\
2550       bp = buf;\
2551       if (IS_NOT_NULL(s)) {\
2552         for (i = 0, q = s; i < 7 && q < end; i++) {\
2553           len = enclen(encode, q);\
2554           while (len-- > 0) *bp++ = *q++;\
2555         }\
2556         if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2557         else         { xmemcpy(bp, "\"",    1); bp += 1; }\
2558       }\
2559       else {\
2560         xmemcpy(bp, "\"", 1); bp += 1;\
2561       }\
2562       *bp = 0;\
2563       fputs((char* )buf, stderr);\
2564       for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
2565       if (xp == FinishCode)\
2566         fprintf(stderr, "----: finish");\
2567       else {\
2568         fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\
2569         print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\
2570       }\
2571       fprintf(stderr, "\n");\
2572   } while(0);
2573 #else
2574 #define MATCH_DEBUG_OUT(offset)
2575 #endif
2576 
2577 #define MATCH_AT_ERROR_RETURN(err_code)  best_len = err_code; goto match_at_end
2578 
2579 
2580 /* match data(str - end) from position (sstart). */
2581 /* if sstart == str then set sprev to NULL. */
2582 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * in_right_range,const UChar * sstart,UChar * sprev,MatchArg * msa)2583 match_at(regex_t* reg, const UChar* str, const UChar* end,
2584          const UChar* in_right_range, const UChar* sstart, UChar* sprev,
2585          MatchArg* msa)
2586 {
2587 
2588 #if defined(USE_DIRECT_THREADED_CODE)
2589   static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
2590 #else
2591   static Operation FinishCode[] = { { OP_FINISH } };
2592 #endif
2593 
2594 #ifdef USE_THREADED_CODE
2595   static const void *opcode_to_label[] = {
2596   &&L_FINISH,
2597   &&L_END,
2598   &&L_STR_1,
2599   &&L_STR_2,
2600   &&L_STR_3,
2601   &&L_STR_4,
2602   &&L_STR_5,
2603   &&L_STR_N,
2604   &&L_STR_MB2N1,
2605   &&L_STR_MB2N2,
2606   &&L_STR_MB2N3,
2607   &&L_STR_MB2N,
2608   &&L_STR_MB3N,
2609   &&L_STR_MBN,
2610   &&L_STR_1_IC,
2611   &&L_STR_N_IC,
2612   &&L_CCLASS,
2613   &&L_CCLASS_MB,
2614   &&L_CCLASS_MIX,
2615   &&L_CCLASS_NOT,
2616   &&L_CCLASS_MB_NOT,
2617   &&L_CCLASS_MIX_NOT,
2618   &&L_ANYCHAR,
2619   &&L_ANYCHAR_ML,
2620   &&L_ANYCHAR_STAR,
2621   &&L_ANYCHAR_ML_STAR,
2622   &&L_ANYCHAR_STAR_PEEK_NEXT,
2623   &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
2624   &&L_WORD,
2625   &&L_WORD_ASCII,
2626   &&L_NO_WORD,
2627   &&L_NO_WORD_ASCII,
2628   &&L_WORD_BOUNDARY,
2629   &&L_NO_WORD_BOUNDARY,
2630   &&L_WORD_BEGIN,
2631   &&L_WORD_END,
2632   &&L_TEXT_SEGMENT_BOUNDARY,
2633   &&L_BEGIN_BUF,
2634   &&L_END_BUF,
2635   &&L_BEGIN_LINE,
2636   &&L_END_LINE,
2637   &&L_SEMI_END_BUF,
2638   &&L_BEGIN_POSITION,
2639   &&L_BACKREF1,
2640   &&L_BACKREF2,
2641   &&L_BACKREF_N,
2642   &&L_BACKREF_N_IC,
2643   &&L_BACKREF_MULTI,
2644   &&L_BACKREF_MULTI_IC,
2645   &&L_BACKREF_WITH_LEVEL,
2646   &&L_BACKREF_WITH_LEVEL_IC,
2647   &&L_BACKREF_CHECK,
2648   &&L_BACKREF_CHECK_WITH_LEVEL,
2649   &&L_MEM_START,
2650   &&L_MEM_START_PUSH,
2651   &&L_MEM_END_PUSH,
2652 #ifdef USE_CALL
2653   &&L_MEM_END_PUSH_REC,
2654 #endif
2655   &&L_MEM_END,
2656 #ifdef USE_CALL
2657   &&L_MEM_END_REC,
2658 #endif
2659   &&L_FAIL,
2660   &&L_JUMP,
2661   &&L_PUSH,
2662   &&L_PUSH_SUPER,
2663   &&L_POP_OUT,
2664 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2665   &&L_PUSH_OR_JUMP_EXACT1,
2666 #endif
2667   &&L_PUSH_IF_PEEK_NEXT,
2668   &&L_REPEAT,
2669   &&L_REPEAT_NG,
2670   &&L_REPEAT_INC,
2671   &&L_REPEAT_INC_NG,
2672   &&L_EMPTY_CHECK_START,
2673   &&L_EMPTY_CHECK_END,
2674   &&L_EMPTY_CHECK_END_MEMST,
2675 #ifdef USE_CALL
2676   &&L_EMPTY_CHECK_END_MEMST_PUSH,
2677 #endif
2678   &&L_PREC_READ_START,
2679   &&L_PREC_READ_END,
2680   &&L_PREC_READ_NOT_START,
2681   &&L_PREC_READ_NOT_END,
2682   &&L_ATOMIC_START,
2683   &&L_ATOMIC_END,
2684   &&L_LOOK_BEHIND,
2685   &&L_LOOK_BEHIND_NOT_START,
2686   &&L_LOOK_BEHIND_NOT_END,
2687   &&L_PUSH_SAVE_VAL,
2688   &&L_UPDATE_VAR,
2689 #ifdef USE_CALL
2690   &&L_CALL,
2691   &&L_RETURN,
2692 #endif
2693 #ifdef USE_CALLOUT
2694   &&L_CALLOUT_CONTENTS,
2695   &&L_CALLOUT_NAME,
2696 #endif
2697   };
2698 #endif
2699 
2700   int i, n, num_mem, best_len, pop_level;
2701   LengthType tlen, tlen2;
2702   MemNumType mem;
2703   RelAddrType addr;
2704   UChar *s, *q, *ps, *sbegin;
2705   UChar *right_range;
2706   int is_alloca;
2707   char *alloc_base;
2708   StackType *stk_base, *stk, *stk_end;
2709   StackType *stkp; /* used as any purpose. */
2710   StackIndex *mem_start_stk, *mem_end_stk;
2711   UChar* keep;
2712 
2713 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2714   StackIndex *repeat_stk;
2715   StackIndex *empty_check_stk;
2716 #endif
2717 #ifdef USE_RETRY_LIMIT_IN_MATCH
2718   unsigned long retry_limit_in_match;
2719   unsigned long retry_in_match_counter;
2720 #endif
2721 #ifdef USE_CALLOUT
2722   int of;
2723 #endif
2724 
2725   Operation* p = reg->ops;
2726   OnigOptionType option = reg->options;
2727   OnigEncoding encode = reg->enc;
2728   OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2729 
2730 #ifdef ONIG_DEBUG_MATCH
2731   static unsigned int counter = 1;
2732 #endif
2733 
2734 #ifdef USE_DIRECT_THREADED_CODE
2735   if (IS_NULL(msa)) {
2736     for (i = 0; i < reg->ops_used; i++) {
2737        const void* addr;
2738        addr = opcode_to_label[reg->ocs[i]];
2739        p->opaddr = addr;
2740        p++;
2741     }
2742     return ONIG_NORMAL;
2743   }
2744 #endif
2745 
2746 #ifdef USE_CALLOUT
2747   msa->mp->match_at_call_counter++;
2748 #endif
2749 
2750 #ifdef USE_RETRY_LIMIT_IN_MATCH
2751   retry_limit_in_match = msa->retry_limit_in_match;
2752 #endif
2753 
2754   pop_level = reg->stack_pop_level;
2755   num_mem = reg->num_mem;
2756   STACK_INIT(INIT_MATCH_STACK_SIZE);
2757   UPDATE_FOR_STACK_REALLOC;
2758   for (i = 1; i <= num_mem; i++) {
2759     mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
2760   }
2761 
2762 #ifdef ONIG_DEBUG_MATCH
2763   fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
2764           str, end, sstart, sprev);
2765   fprintf(stderr, "size: %d, start offset: %d\n",
2766           (int )(end - str), (int )(sstart - str));
2767 #endif
2768 
2769   best_len = ONIG_MISMATCH;
2770   keep = s = (UChar* )sstart;
2771   STACK_PUSH_BOTTOM(STK_ALT, FinishCode);  /* bottom stack */
2772   INIT_RIGHT_RANGE;
2773 
2774 #ifdef USE_RETRY_LIMIT_IN_MATCH
2775   retry_in_match_counter = 0;
2776 #endif
2777 
2778   BYTECODE_INTERPRETER_START {
2779     CASE_OP(END)
2780       n = (int )(s - sstart);
2781       if (n > best_len) {
2782         OnigRegion* region;
2783 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2784         if (IS_FIND_LONGEST(option)) {
2785           if (n > msa->best_len) {
2786             msa->best_len = n;
2787             msa->best_s   = (UChar* )sstart;
2788           }
2789           else
2790             goto end_best_len;
2791         }
2792 #endif
2793         best_len = n;
2794         region = msa->region;
2795         if (region) {
2796           if (keep > s) keep = s;
2797 
2798 #ifdef USE_POSIX_API_REGION_OPTION
2799           if (IS_POSIX_REGION(msa->options)) {
2800             posix_regmatch_t* rmt = (posix_regmatch_t* )region;
2801 
2802             rmt[0].rm_so = (regoff_t )(keep - str);
2803             rmt[0].rm_eo = (regoff_t )(s    - str);
2804             for (i = 1; i <= num_mem; i++) {
2805               if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2806                 rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
2807                 rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i)   - str);
2808               }
2809               else {
2810                 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
2811               }
2812             }
2813           }
2814           else {
2815 #endif /* USE_POSIX_API_REGION_OPTION */
2816             region->beg[0] = (int )(keep - str);
2817             region->end[0] = (int )(s    - str);
2818             for (i = 1; i <= num_mem; i++) {
2819               if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2820                 region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
2821                 region->end[i] = (int )(STACK_MEM_END(reg, i)   - str);
2822               }
2823               else {
2824                 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2825               }
2826             }
2827 
2828 #ifdef USE_CAPTURE_HISTORY
2829             if (reg->capture_history != 0) {
2830               int r;
2831               OnigCaptureTreeNode* node;
2832 
2833               if (IS_NULL(region->history_root)) {
2834                 region->history_root = node = history_node_new();
2835                 CHECK_NULL_RETURN_MEMERR(node);
2836               }
2837               else {
2838                 node = region->history_root;
2839                 history_tree_clear(node);
2840               }
2841 
2842               node->group = 0;
2843               node->beg   = (int )(keep - str);
2844               node->end   = (int )(s    - str);
2845 
2846               stkp = stk_base;
2847               r = make_capture_history_tree(region->history_root, &stkp,
2848                                             stk, (UChar* )str, reg);
2849               if (r < 0) MATCH_AT_ERROR_RETURN(r);
2850             }
2851 #endif /* USE_CAPTURE_HISTORY */
2852 #ifdef USE_POSIX_API_REGION_OPTION
2853           } /* else IS_POSIX_REGION() */
2854 #endif
2855         } /* if (region) */
2856       } /* n > best_len */
2857 
2858 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2859     end_best_len:
2860 #endif
2861       SOP_OUT;
2862 
2863       if (IS_FIND_CONDITION(option)) {
2864         if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2865           best_len = ONIG_MISMATCH;
2866           goto fail; /* for retry */
2867         }
2868         if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2869           goto fail; /* for retry */
2870         }
2871       }
2872 
2873       /* default behavior: return first-matching result. */
2874       goto match_at_end;
2875 
2876     CASE_OP(STR_1)
2877       DATA_ENSURE(1);
2878       ps = p->exact.s;
2879       if (*ps != *s) goto fail;
2880       s++;
2881       INC_OP;
2882       NEXT_OUT;
2883 
2884     CASE_OP(STR_1_IC)
2885       {
2886         int len;
2887         UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2888 
2889         DATA_ENSURE(1);
2890         len = ONIGENC_MBC_CASE_FOLD(encode,
2891                  /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2892                                     case_fold_flag,
2893                                     &s, end, lowbuf);
2894         DATA_ENSURE(0);
2895         q = lowbuf;
2896         ps = p->exact.s;
2897         while (len-- > 0) {
2898           if (*ps != *q) goto fail;
2899           ps++; q++;
2900         }
2901       }
2902       INC_OP;
2903       NEXT_OUT;
2904 
2905     CASE_OP(STR_2)
2906       DATA_ENSURE(2);
2907       ps = p->exact.s;
2908       if (*ps != *s) goto fail;
2909       ps++; s++;
2910       if (*ps != *s) goto fail;
2911       sprev = s;
2912       s++;
2913       INC_OP;
2914       JUMP_OUT;
2915 
2916     CASE_OP(STR_3)
2917       DATA_ENSURE(3);
2918       ps = p->exact.s;
2919       if (*ps != *s) goto fail;
2920       ps++; s++;
2921       if (*ps != *s) goto fail;
2922       ps++; s++;
2923       if (*ps != *s) goto fail;
2924       sprev = s;
2925       s++;
2926       INC_OP;
2927       JUMP_OUT;
2928 
2929     CASE_OP(STR_4)
2930       DATA_ENSURE(4);
2931       ps = p->exact.s;
2932       if (*ps != *s) goto fail;
2933       ps++; s++;
2934       if (*ps != *s) goto fail;
2935       ps++; s++;
2936       if (*ps != *s) goto fail;
2937       ps++; s++;
2938       if (*ps != *s) goto fail;
2939       sprev = s;
2940       s++;
2941       INC_OP;
2942       JUMP_OUT;
2943 
2944     CASE_OP(STR_5)
2945       DATA_ENSURE(5);
2946       ps = p->exact.s;
2947       if (*ps != *s) goto fail;
2948       ps++; s++;
2949       if (*ps != *s) goto fail;
2950       ps++; s++;
2951       if (*ps != *s) goto fail;
2952       ps++; s++;
2953       if (*ps != *s) goto fail;
2954       ps++; s++;
2955       if (*ps != *s) goto fail;
2956       sprev = s;
2957       s++;
2958       INC_OP;
2959       JUMP_OUT;
2960 
2961     CASE_OP(STR_N)
2962       tlen = p->exact_n.n;
2963       DATA_ENSURE(tlen);
2964       ps = p->exact_n.s;
2965       while (tlen-- > 0) {
2966         if (*ps++ != *s++) goto fail;
2967       }
2968       sprev = s - 1;
2969       INC_OP;
2970       JUMP_OUT;
2971 
2972     CASE_OP(STR_N_IC)
2973       {
2974         int len;
2975         UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2976 
2977         tlen = p->exact_n.n;
2978         ps   = p->exact_n.s;
2979         endp = ps + tlen;
2980         while (ps < endp) {
2981           sprev = s;
2982           DATA_ENSURE(1);
2983           len = ONIGENC_MBC_CASE_FOLD(encode,
2984                         /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2985                                       case_fold_flag,
2986                                       &s, end, lowbuf);
2987           DATA_ENSURE(0);
2988           q = lowbuf;
2989           while (len-- > 0) {
2990             if (ps >= endp) goto fail;
2991             if (*ps != *q) goto fail;
2992             ps++; q++;
2993           }
2994         }
2995       }
2996 
2997       INC_OP;
2998       JUMP_OUT;
2999 
3000     CASE_OP(STR_MB2N1)
3001       DATA_ENSURE(2);
3002       ps = p->exact.s;
3003       if (*ps != *s) goto fail;
3004       ps++; s++;
3005       if (*ps != *s) goto fail;
3006       s++;
3007       INC_OP;
3008       NEXT_OUT;
3009 
3010     CASE_OP(STR_MB2N2)
3011       DATA_ENSURE(4);
3012       ps = p->exact.s;
3013       if (*ps != *s) goto fail;
3014       ps++; s++;
3015       if (*ps != *s) goto fail;
3016       ps++; s++;
3017       sprev = s;
3018       if (*ps != *s) goto fail;
3019       ps++; s++;
3020       if (*ps != *s) goto fail;
3021       s++;
3022       INC_OP;
3023       JUMP_OUT;
3024 
3025     CASE_OP(STR_MB2N3)
3026       DATA_ENSURE(6);
3027       ps = p->exact.s;
3028       if (*ps != *s) goto fail;
3029       ps++; s++;
3030       if (*ps != *s) goto fail;
3031       ps++; s++;
3032       if (*ps != *s) goto fail;
3033       ps++; s++;
3034       if (*ps != *s) goto fail;
3035       ps++; s++;
3036       sprev = s;
3037       if (*ps != *s) goto fail;
3038       ps++; s++;
3039       if (*ps != *s) goto fail;
3040       ps++; s++;
3041       INC_OP;
3042       JUMP_OUT;
3043 
3044     CASE_OP(STR_MB2N)
3045       tlen = p->exact_n.n;
3046       DATA_ENSURE(tlen * 2);
3047       ps = p->exact_n.s;
3048       while (tlen-- > 0) {
3049         if (*ps != *s) goto fail;
3050         ps++; s++;
3051         if (*ps != *s) goto fail;
3052         ps++; s++;
3053       }
3054       sprev = s - 2;
3055       INC_OP;
3056       JUMP_OUT;
3057 
3058     CASE_OP(STR_MB3N)
3059       tlen = p->exact_n.n;
3060       DATA_ENSURE(tlen * 3);
3061       ps = p->exact_n.s;
3062       while (tlen-- > 0) {
3063         if (*ps != *s) goto fail;
3064         ps++; s++;
3065         if (*ps != *s) goto fail;
3066         ps++; s++;
3067         if (*ps != *s) goto fail;
3068         ps++; s++;
3069       }
3070       sprev = s - 3;
3071       INC_OP;
3072       JUMP_OUT;
3073 
3074     CASE_OP(STR_MBN)
3075       tlen  = p->exact_len_n.len; /* mb byte len */
3076       tlen2 = p->exact_len_n.n;   /* number of chars */
3077       tlen2 *= tlen;
3078       DATA_ENSURE(tlen2);
3079       ps = p->exact_len_n.s;
3080       while (tlen2-- > 0) {
3081         if (*ps != *s) goto fail;
3082         ps++; s++;
3083       }
3084       sprev = s - tlen;
3085       INC_OP;
3086       JUMP_OUT;
3087 
3088     CASE_OP(CCLASS)
3089       DATA_ENSURE(1);
3090       if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
3091       s++;
3092       INC_OP;
3093       NEXT_OUT;
3094 
3095     CASE_OP(CCLASS_MB)
3096       DATA_ENSURE(1);
3097       if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3098 
3099     cclass_mb:
3100       {
3101         OnigCodePoint code;
3102         UChar *ss;
3103         int mb_len;
3104 
3105         mb_len = enclen(encode, s);
3106         DATA_ENSURE(mb_len);
3107         ss = s;
3108         s += mb_len;
3109         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3110         if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3111       }
3112       INC_OP;
3113       NEXT_OUT;
3114 
3115     CASE_OP(CCLASS_MIX)
3116       DATA_ENSURE(1);
3117       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3118         goto cclass_mb;
3119       }
3120       else {
3121         if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
3122           goto fail;
3123 
3124         s++;
3125       }
3126       INC_OP;
3127       NEXT_OUT;
3128 
3129     CASE_OP(CCLASS_NOT)
3130       DATA_ENSURE(1);
3131       if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
3132       s += enclen(encode, s);
3133       INC_OP;
3134       NEXT_OUT;
3135 
3136     CASE_OP(CCLASS_MB_NOT)
3137       DATA_ENSURE(1);
3138       if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
3139         s++;
3140         goto cc_mb_not_success;
3141       }
3142 
3143     cclass_mb_not:
3144       {
3145         OnigCodePoint code;
3146         UChar *ss;
3147         int mb_len = enclen(encode, s);
3148 
3149         if (! DATA_ENSURE_CHECK(mb_len)) {
3150           DATA_ENSURE(1);
3151           s = (UChar* )end;
3152           goto cc_mb_not_success;
3153         }
3154 
3155         ss = s;
3156         s += mb_len;
3157         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3158         if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3159       }
3160 
3161     cc_mb_not_success:
3162       INC_OP;
3163       NEXT_OUT;
3164 
3165     CASE_OP(CCLASS_MIX_NOT)
3166       DATA_ENSURE(1);
3167       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3168         goto cclass_mb_not;
3169       }
3170       else {
3171         if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
3172           goto fail;
3173 
3174         s++;
3175       }
3176       INC_OP;
3177       NEXT_OUT;
3178 
3179     CASE_OP(ANYCHAR)
3180       DATA_ENSURE(1);
3181       n = enclen(encode, s);
3182       DATA_ENSURE(n);
3183       if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3184       s += n;
3185       INC_OP;
3186       NEXT_OUT;
3187 
3188     CASE_OP(ANYCHAR_ML)
3189       DATA_ENSURE(1);
3190       n = enclen(encode, s);
3191       DATA_ENSURE(n);
3192       s += n;
3193       INC_OP;
3194       NEXT_OUT;
3195 
3196     CASE_OP(ANYCHAR_STAR)
3197       INC_OP;
3198       while (DATA_ENSURE_CHECK1) {
3199         STACK_PUSH_ALT(p, s, sprev);
3200         n = enclen(encode, s);
3201         DATA_ENSURE(n);
3202         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
3203         sprev = s;
3204         s += n;
3205       }
3206       JUMP_OUT;
3207 
3208     CASE_OP(ANYCHAR_ML_STAR)
3209       INC_OP;
3210       while (DATA_ENSURE_CHECK1) {
3211         STACK_PUSH_ALT(p, s, sprev);
3212         n = enclen(encode, s);
3213         if (n > 1) {
3214           DATA_ENSURE(n);
3215           sprev = s;
3216           s += n;
3217         }
3218         else {
3219           sprev = s;
3220           s++;
3221         }
3222       }
3223       JUMP_OUT;
3224 
3225     CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
3226       {
3227         UChar c;
3228 
3229         c = p->anychar_star_peek_next.c;
3230         INC_OP;
3231         while (DATA_ENSURE_CHECK1) {
3232           if (c == *s) {
3233             STACK_PUSH_ALT(p, s, sprev);
3234           }
3235           n = enclen(encode, s);
3236           DATA_ENSURE(n);
3237           if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
3238           sprev = s;
3239           s += n;
3240         }
3241       }
3242       NEXT_OUT;
3243 
3244     CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
3245       {
3246         UChar c;
3247 
3248         c = p->anychar_star_peek_next.c;
3249         INC_OP;
3250         while (DATA_ENSURE_CHECK1) {
3251           if (c == *s) {
3252             STACK_PUSH_ALT(p, s, sprev);
3253           }
3254           n = enclen(encode, s);
3255           if (n > 1) {
3256             DATA_ENSURE(n);
3257             sprev = s;
3258             s += n;
3259           }
3260           else {
3261             sprev = s;
3262             s++;
3263           }
3264         }
3265       }
3266       NEXT_OUT;
3267 
3268     CASE_OP(WORD)
3269       DATA_ENSURE(1);
3270       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3271         goto fail;
3272 
3273       s += enclen(encode, s);
3274       INC_OP;
3275       NEXT_OUT;
3276 
3277     CASE_OP(WORD_ASCII)
3278       DATA_ENSURE(1);
3279       if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3280         goto fail;
3281 
3282       s += enclen(encode, s);
3283       INC_OP;
3284       NEXT_OUT;
3285 
3286     CASE_OP(NO_WORD)
3287       DATA_ENSURE(1);
3288       if (ONIGENC_IS_MBC_WORD(encode, s, end))
3289         goto fail;
3290 
3291       s += enclen(encode, s);
3292       INC_OP;
3293       NEXT_OUT;
3294 
3295     CASE_OP(NO_WORD_ASCII)
3296       DATA_ENSURE(1);
3297       if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3298         goto fail;
3299 
3300       s += enclen(encode, s);
3301       INC_OP;
3302       NEXT_OUT;
3303 
3304     CASE_OP(WORD_BOUNDARY)
3305       {
3306         ModeType mode;
3307 
3308         mode = p->word_boundary.mode;
3309         if (ON_STR_BEGIN(s)) {
3310           DATA_ENSURE(1);
3311           if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3312             goto fail;
3313         }
3314         else if (ON_STR_END(s)) {
3315           if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3316             goto fail;
3317         }
3318         else {
3319           if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3320               == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3321             goto fail;
3322         }
3323       }
3324       INC_OP;
3325       JUMP_OUT;
3326 
3327     CASE_OP(NO_WORD_BOUNDARY)
3328       {
3329         ModeType mode;
3330 
3331         mode = p->word_boundary.mode;
3332         if (ON_STR_BEGIN(s)) {
3333           if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3334             goto fail;
3335         }
3336         else if (ON_STR_END(s)) {
3337           if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3338             goto fail;
3339         }
3340         else {
3341           if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3342               != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3343             goto fail;
3344         }
3345       }
3346       INC_OP;
3347       JUMP_OUT;
3348 
3349 #ifdef USE_WORD_BEGIN_END
3350     CASE_OP(WORD_BEGIN)
3351       {
3352         ModeType mode;
3353 
3354         mode = p->word_boundary.mode;
3355         if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3356           if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3357             INC_OP;
3358             JUMP_OUT;
3359           }
3360         }
3361       }
3362       goto fail;
3363 
3364     CASE_OP(WORD_END)
3365       {
3366         ModeType mode;
3367 
3368         mode = p->word_boundary.mode;
3369         if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3370           if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3371             INC_OP;
3372             JUMP_OUT;
3373           }
3374         }
3375       }
3376       goto fail;
3377 #endif
3378 
3379     CASE_OP(TEXT_SEGMENT_BOUNDARY)
3380       {
3381         int is_break;
3382 
3383         switch (p->text_segment_boundary.type) {
3384         case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3385           is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
3386           break;
3387 #ifdef USE_UNICODE_WORD_BREAK
3388         case WORD_BOUNDARY:
3389           is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
3390           break;
3391 #endif
3392         default:
3393           MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
3394           break;
3395         }
3396 
3397         if (p->text_segment_boundary.not != 0)
3398           is_break = ! is_break;
3399 
3400         if (is_break != 0) {
3401           INC_OP;
3402           JUMP_OUT;
3403         }
3404         else {
3405           goto fail;
3406         }
3407       }
3408 
3409     CASE_OP(BEGIN_BUF)
3410       if (! ON_STR_BEGIN(s)) goto fail;
3411 
3412       INC_OP;
3413       JUMP_OUT;
3414 
3415     CASE_OP(END_BUF)
3416       if (! ON_STR_END(s)) goto fail;
3417 
3418       INC_OP;
3419       JUMP_OUT;
3420 
3421     CASE_OP(BEGIN_LINE)
3422       if (ON_STR_BEGIN(s)) {
3423         if (IS_NOTBOL(msa->options)) goto fail;
3424         INC_OP;
3425         JUMP_OUT;
3426       }
3427       else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
3428         INC_OP;
3429         JUMP_OUT;
3430       }
3431       goto fail;
3432 
3433     CASE_OP(END_LINE)
3434       if (ON_STR_END(s)) {
3435 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3436         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3437 #endif
3438           if (IS_NOTEOL(msa->options)) goto fail;
3439           INC_OP;
3440           JUMP_OUT;
3441 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3442         }
3443 #endif
3444       }
3445       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3446         INC_OP;
3447         JUMP_OUT;
3448       }
3449 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3450       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3451         INC_OP;
3452         JUMP_OUT;
3453       }
3454 #endif
3455       goto fail;
3456 
3457     CASE_OP(SEMI_END_BUF)
3458       if (ON_STR_END(s)) {
3459 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3460         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3461 #endif
3462           if (IS_NOTEOL(msa->options)) goto fail;
3463           INC_OP;
3464           JUMP_OUT;
3465 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3466         }
3467 #endif
3468       }
3469       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3470                ON_STR_END(s + enclen(encode, s))) {
3471         INC_OP;
3472         JUMP_OUT;
3473       }
3474 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3475       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3476         UChar* ss = s + enclen(encode, s);
3477         ss += enclen(encode, ss);
3478         if (ON_STR_END(ss)) {
3479           INC_OP;
3480           JUMP_OUT;
3481         }
3482       }
3483 #endif
3484       goto fail;
3485 
3486     CASE_OP(BEGIN_POSITION)
3487       if (s != msa->start)
3488         goto fail;
3489 
3490       INC_OP;
3491       JUMP_OUT;
3492 
3493     CASE_OP(MEM_START_PUSH)
3494       mem = p->memory_start.num;
3495       STACK_PUSH_MEM_START(mem, s);
3496       INC_OP;
3497       JUMP_OUT;
3498 
3499     CASE_OP(MEM_START)
3500       mem = p->memory_start.num;
3501       mem_start_stk[mem] = (StackIndex )((void* )s);
3502       INC_OP;
3503       JUMP_OUT;
3504 
3505     CASE_OP(MEM_END_PUSH)
3506       mem = p->memory_end.num;
3507       STACK_PUSH_MEM_END(mem, s);
3508       INC_OP;
3509       JUMP_OUT;
3510 
3511     CASE_OP(MEM_END)
3512       mem = p->memory_end.num;
3513       mem_end_stk[mem] = (StackIndex )((void* )s);
3514       INC_OP;
3515       JUMP_OUT;
3516 
3517 #ifdef USE_CALL
3518     CASE_OP(MEM_END_PUSH_REC)
3519       {
3520         StackIndex si;
3521 
3522         mem = p->memory_end.num;
3523         STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3524         si = GET_STACK_INDEX(stkp);
3525         STACK_PUSH_MEM_END(mem, s);
3526         mem_start_stk[mem] = si;
3527         INC_OP;
3528         JUMP_OUT;
3529       }
3530 
3531     CASE_OP(MEM_END_REC)
3532       mem = p->memory_end.num;
3533       mem_end_stk[mem] = (StackIndex )((void* )s);
3534       STACK_GET_MEM_START(mem, stkp);
3535 
3536       if (MEM_STATUS_AT(reg->push_mem_start, mem))
3537         mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3538       else
3539         mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
3540 
3541       STACK_PUSH_MEM_END_MARK(mem);
3542       INC_OP;
3543       JUMP_OUT;
3544 #endif
3545 
3546     CASE_OP(BACKREF1)
3547       mem = 1;
3548       goto backref;
3549 
3550     CASE_OP(BACKREF2)
3551       mem = 2;
3552       goto backref;
3553 
3554     CASE_OP(BACKREF_N)
3555       mem = p->backref_n.n1;
3556     backref:
3557       {
3558         int len;
3559         UChar *pstart, *pend;
3560 
3561         if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
3562         if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3563 
3564         pstart = STACK_MEM_START(reg, mem);
3565         pend   = STACK_MEM_END(reg, mem);
3566         n = (int )(pend - pstart);
3567         if (n != 0) {
3568           DATA_ENSURE(n);
3569           sprev = s;
3570           STRING_CMP(s, pstart, n);
3571           while (sprev + (len = enclen(encode, sprev)) < s)
3572             sprev += len;
3573         }
3574       }
3575       INC_OP;
3576       JUMP_OUT;
3577 
3578     CASE_OP(BACKREF_N_IC)
3579       mem = p->backref_n.n1;
3580       {
3581         int len;
3582         UChar *pstart, *pend;
3583 
3584         if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
3585         if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3586 
3587         pstart = STACK_MEM_START(reg, mem);
3588         pend   = STACK_MEM_END(reg, mem);
3589         n = (int )(pend - pstart);
3590         if (n != 0) {
3591           DATA_ENSURE(n);
3592           sprev = s;
3593           STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3594           while (sprev + (len = enclen(encode, sprev)) < s)
3595             sprev += len;
3596         }
3597       }
3598       INC_OP;
3599       JUMP_OUT;
3600 
3601     CASE_OP(BACKREF_MULTI)
3602       {
3603         int len, is_fail;
3604         UChar *pstart, *pend, *swork;
3605 
3606         tlen = p->backref_general.num;
3607         for (i = 0; i < tlen; i++) {
3608           mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3609 
3610           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3611           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3612 
3613           pstart = STACK_MEM_START(reg, mem);
3614           pend   = STACK_MEM_END(reg, mem);
3615           n = (int )(pend - pstart);
3616           if (n != 0) {
3617             DATA_ENSURE(n);
3618             sprev = s;
3619             swork = s;
3620             STRING_CMP_VALUE(swork, pstart, n, is_fail);
3621             if (is_fail) continue;
3622             s = swork;
3623             while (sprev + (len = enclen(encode, sprev)) < s)
3624               sprev += len;
3625           }
3626           break; /* success */
3627         }
3628         if (i == tlen) goto fail;
3629       }
3630       INC_OP;
3631       JUMP_OUT;
3632 
3633     CASE_OP(BACKREF_MULTI_IC)
3634       {
3635         int len, is_fail;
3636         UChar *pstart, *pend, *swork;
3637 
3638         tlen = p->backref_general.num;
3639         for (i = 0; i < tlen; i++) {
3640           mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3641 
3642           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3643           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3644 
3645           pstart = STACK_MEM_START(reg, mem);
3646           pend   = STACK_MEM_END(reg, mem);
3647           n = (int )(pend - pstart);
3648           if (n != 0) {
3649             DATA_ENSURE(n);
3650             sprev = s;
3651             swork = s;
3652             STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3653             if (is_fail) continue;
3654             s = swork;
3655             while (sprev + (len = enclen(encode, sprev)) < s)
3656               sprev += len;
3657           }
3658           break; /* success */
3659         }
3660         if (i == tlen) goto fail;
3661       }
3662       INC_OP;
3663       JUMP_OUT;
3664 
3665 #ifdef USE_BACKREF_WITH_LEVEL
3666     CASE_OP(BACKREF_WITH_LEVEL_IC)
3667       n = 1; /* ignore case */
3668       goto backref_with_level;
3669     CASE_OP(BACKREF_WITH_LEVEL)
3670       {
3671         int len;
3672         int level;
3673         MemNumType* mems;
3674         UChar* ssave;
3675 
3676         n = 0;
3677       backref_with_level:
3678         level = p->backref_general.nest_level;
3679         tlen  = p->backref_general.num;
3680         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3681 
3682         ssave = s;
3683         if (backref_match_at_nested_level(reg, stk, stk_base, n,
3684                     case_fold_flag, level, (int )tlen, mems, &s, end)) {
3685           if (ssave != s) {
3686             sprev = ssave;
3687             while (sprev + (len = enclen(encode, sprev)) < s)
3688               sprev += len;
3689           }
3690         }
3691         else
3692           goto fail;
3693       }
3694       INC_OP;
3695       JUMP_OUT;
3696 #endif
3697 
3698     CASE_OP(BACKREF_CHECK)
3699       {
3700         MemNumType* mems;
3701 
3702         tlen  = p->backref_general.num;
3703         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3704 
3705         for (i = 0; i < tlen; i++) {
3706           mem = mems[i];
3707           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3708           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3709           break; /* success */
3710         }
3711         if (i == tlen) goto fail;
3712       }
3713       INC_OP;
3714       JUMP_OUT;
3715 
3716 #ifdef USE_BACKREF_WITH_LEVEL
3717     CASE_OP(BACKREF_CHECK_WITH_LEVEL)
3718       {
3719         LengthType level;
3720         MemNumType* mems;
3721 
3722         level = p->backref_general.nest_level;
3723         tlen  = p->backref_general.num;
3724         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3725 
3726         if (backref_check_at_nested_level(reg, stk, stk_base,
3727                                           (int )level, (int )tlen, mems) == 0)
3728           goto fail;
3729       }
3730       INC_OP;
3731       JUMP_OUT;
3732 #endif
3733 
3734     CASE_OP(EMPTY_CHECK_START)
3735       mem = p->empty_check_start.mem;   /* mem: null check id */
3736       STACK_PUSH_EMPTY_CHECK_START(mem, s);
3737       INC_OP;
3738       JUMP_OUT;
3739 
3740     CASE_OP(EMPTY_CHECK_END)
3741       {
3742         int is_empty;
3743 
3744         mem = p->empty_check_end.mem;  /* mem: null check id */
3745         STACK_EMPTY_CHECK(is_empty, mem, s);
3746         INC_OP;
3747         if (is_empty) {
3748 #ifdef ONIG_DEBUG_MATCH
3749           fprintf(stderr, "EMPTY_CHECK_END: skip  id:%d, s:%p\n", (int )mem, s);
3750 #endif
3751         empty_check_found:
3752           /* empty loop founded, skip next instruction */
3753 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3754           switch (p->opcode) {
3755           case OP_JUMP:
3756           case OP_PUSH:
3757           case OP_REPEAT_INC:
3758           case OP_REPEAT_INC_NG:
3759             INC_OP;
3760             break;
3761           default:
3762             MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
3763             break;
3764           }
3765 #else
3766           INC_OP;
3767 #endif
3768         }
3769       }
3770       JUMP_OUT;
3771 
3772 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3773     CASE_OP(EMPTY_CHECK_END_MEMST)
3774       {
3775         int is_empty;
3776 
3777         mem = p->empty_check_end.mem;  /* mem: null check id */
3778         STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
3779         INC_OP;
3780         if (is_empty) {
3781 #ifdef ONIG_DEBUG_MATCH
3782           fprintf(stderr, "EMPTY_CHECK_END_MEM: skip  id:%d, s:%p\n", (int)mem, s);
3783 #endif
3784           if (is_empty == -1) goto fail;
3785           goto empty_check_found;
3786         }
3787       }
3788       JUMP_OUT;
3789 #endif
3790 
3791 #ifdef USE_CALL
3792     CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
3793       {
3794         int is_empty;
3795 
3796         mem = p->empty_check_end.mem;  /* mem: null check id */
3797 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3798         STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
3799 #else
3800         STACK_EMPTY_CHECK_REC(is_empty, mem, s);
3801 #endif
3802         INC_OP;
3803         if (is_empty) {
3804 #ifdef ONIG_DEBUG_MATCH
3805           fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip  id:%d, s:%p\n",
3806                   (int )mem, s);
3807 #endif
3808           if (is_empty == -1) goto fail;
3809           goto empty_check_found;
3810         }
3811         else {
3812           STACK_PUSH_EMPTY_CHECK_END(mem);
3813         }
3814       }
3815       JUMP_OUT;
3816 #endif
3817 
3818     CASE_OP(JUMP)
3819       addr = p->jump.addr;
3820       p += addr;
3821       CHECK_INTERRUPT_JUMP_OUT;
3822 
3823     CASE_OP(PUSH)
3824       addr = p->push.addr;
3825       STACK_PUSH_ALT(p + addr, s, sprev);
3826       INC_OP;
3827       JUMP_OUT;
3828 
3829     CASE_OP(PUSH_SUPER)
3830       addr = p->push.addr;
3831       STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
3832       INC_OP;
3833       JUMP_OUT;
3834 
3835     CASE_OP(POP_OUT)
3836       STACK_POP_ONE;
3837       /* for stop backtrack */
3838       /* CHECK_RETRY_LIMIT_IN_MATCH; */
3839       INC_OP;
3840       JUMP_OUT;
3841 
3842  #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3843     CASE_OP(PUSH_OR_JUMP_EXACT1)
3844       {
3845         UChar c;
3846 
3847         addr = p->push_or_jump_exact1.addr;
3848         c    = p->push_or_jump_exact1.c;
3849         if (DATA_ENSURE_CHECK1 && c == *s) {
3850           STACK_PUSH_ALT(p + addr, s, sprev);
3851           INC_OP;
3852           JUMP_OUT;
3853         }
3854       }
3855       p += addr;
3856       JUMP_OUT;
3857 #endif
3858 
3859     CASE_OP(PUSH_IF_PEEK_NEXT)
3860       {
3861         UChar c;
3862 
3863         addr = p->push_if_peek_next.addr;
3864         c    = p->push_if_peek_next.c;
3865         if (DATA_ENSURE_CHECK1 && c == *s) {
3866           STACK_PUSH_ALT(p + addr, s, sprev);
3867           INC_OP;
3868           JUMP_OUT;
3869         }
3870       }
3871       INC_OP;
3872       JUMP_OUT;
3873 
3874     CASE_OP(REPEAT)
3875       mem  = p->repeat.id;  /* mem: OP_REPEAT ID */
3876       addr = p->repeat.addr;
3877 
3878       STACK_PUSH_REPEAT_INC(mem, 0);
3879       if (reg->repeat_range[mem].lower == 0) {
3880         STACK_PUSH_ALT(p + addr, s, sprev);
3881       }
3882       INC_OP;
3883       JUMP_OUT;
3884 
3885     CASE_OP(REPEAT_NG)
3886       mem  = p->repeat.id;  /* mem: OP_REPEAT ID */
3887       addr = p->repeat.addr;
3888 
3889       STACK_PUSH_REPEAT_INC(mem, 0);
3890       if (reg->repeat_range[mem].lower == 0) {
3891         STACK_PUSH_ALT(p + 1, s, sprev);
3892         p += addr;
3893       }
3894       else
3895         INC_OP;
3896       JUMP_OUT;
3897 
3898     CASE_OP(REPEAT_INC)
3899       mem  = p->repeat_inc.id;  /* mem: OP_REPEAT ID */
3900       STACK_GET_REPEAT_COUNT(mem, n);
3901       n++;
3902       if (n >= reg->repeat_range[mem].upper) {
3903         /* end of repeat. Nothing to do. */
3904         INC_OP;
3905       }
3906       else if (n >= reg->repeat_range[mem].lower) {
3907         INC_OP;
3908         STACK_PUSH_ALT(p, s, sprev);
3909         p = reg->repeat_range[mem].u.pcode;
3910       }
3911       else {
3912         p = reg->repeat_range[mem].u.pcode;
3913       }
3914       STACK_PUSH_REPEAT_INC(mem, n);
3915       CHECK_INTERRUPT_JUMP_OUT;
3916 
3917     CASE_OP(REPEAT_INC_NG)
3918       mem = p->repeat_inc.id;  /* mem: OP_REPEAT ID */
3919       STACK_GET_REPEAT_COUNT(mem, n);
3920       n++;
3921       STACK_PUSH_REPEAT_INC(mem, n);
3922       if (n == reg->repeat_range[mem].upper) {
3923         INC_OP;
3924       }
3925       else {
3926         if (n >= reg->repeat_range[mem].lower) {
3927           STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
3928           INC_OP;
3929         }
3930         else {
3931           p = reg->repeat_range[mem].u.pcode;
3932         }
3933       }
3934       CHECK_INTERRUPT_JUMP_OUT;
3935 
3936     CASE_OP(PREC_READ_START)
3937       STACK_PUSH_PREC_READ_START(s, sprev);
3938       INC_OP;
3939       JUMP_OUT;
3940 
3941     CASE_OP(PREC_READ_END)
3942       STACK_GET_PREC_READ_START(stkp);
3943       s     = stkp->u.state.pstr;
3944       sprev = stkp->u.state.pstr_prev;
3945       STACK_PUSH(STK_PREC_READ_END,0,0,0);
3946       INC_OP;
3947       JUMP_OUT;
3948 
3949     CASE_OP(PREC_READ_NOT_START)
3950       addr = p->prec_read_not_start.addr;
3951       STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
3952       INC_OP;
3953       JUMP_OUT;
3954 
3955     CASE_OP(PREC_READ_NOT_END)
3956       STACK_POP_TIL_ALT_PREC_READ_NOT;
3957       goto fail;
3958 
3959     CASE_OP(ATOMIC_START)
3960       STACK_PUSH_TO_VOID_START;
3961       INC_OP;
3962       JUMP_OUT;
3963 
3964     CASE_OP(ATOMIC_END)
3965       STACK_EXEC_TO_VOID(stkp);
3966       INC_OP;
3967       JUMP_OUT;
3968 
3969     CASE_OP(LOOK_BEHIND)
3970       tlen = p->look_behind.len;
3971       s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3972       if (IS_NULL(s)) goto fail;
3973       sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3974       INC_OP;
3975       JUMP_OUT;
3976 
3977     CASE_OP(LOOK_BEHIND_NOT_START)
3978       addr = p->look_behind_not_start.addr;
3979       tlen = p->look_behind_not_start.len;
3980       q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3981       if (IS_NULL(q)) {
3982         /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3983            If you want to change to fail, replace following line. */
3984         p += addr;
3985         /* goto fail; */
3986       }
3987       else {
3988         STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev);
3989         s = q;
3990         sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3991         INC_OP;
3992       }
3993       JUMP_OUT;
3994 
3995     CASE_OP(LOOK_BEHIND_NOT_END)
3996       STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
3997       INC_OP;
3998       goto fail;
3999 
4000 #ifdef USE_CALL
4001     CASE_OP(CALL)
4002       addr = p->call.addr;
4003       INC_OP; STACK_PUSH_CALL_FRAME(p);
4004       p = reg->ops + addr;
4005       JUMP_OUT;
4006 
4007     CASE_OP(RETURN)
4008       STACK_RETURN(p);
4009       STACK_PUSH_RETURN;
4010       JUMP_OUT;
4011 #endif
4012 
4013     CASE_OP(PUSH_SAVE_VAL)
4014       {
4015         SaveType type;
4016 
4017         type = p->push_save_val.type;
4018         mem  = p->push_save_val.id; /* mem: save id */
4019         switch ((enum SaveType )type) {
4020         case SAVE_KEEP:
4021           STACK_PUSH_SAVE_VAL(mem, type, s);
4022           break;
4023 
4024         case SAVE_S:
4025           STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
4026           break;
4027 
4028         case SAVE_RIGHT_RANGE:
4029           STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
4030           break;
4031         }
4032       }
4033       INC_OP;
4034       JUMP_OUT;
4035 
4036     CASE_OP(UPDATE_VAR)
4037       {
4038         UpdateVarType type;
4039         enum SaveType save_type;
4040 
4041         type = p->update_var.type;
4042         mem  = p->update_var.id; /* mem: save id */
4043 
4044         switch ((enum UpdateVarType )type) {
4045         case UPDATE_VAR_KEEP_FROM_STACK_LAST:
4046           STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
4047           break;
4048         case UPDATE_VAR_S_FROM_STACK:
4049           STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
4050           break;
4051         case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
4052           save_type = SAVE_S;
4053           goto get_save_val_type_last_id;
4054           break;
4055         case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
4056           save_type = SAVE_RIGHT_RANGE;
4057         get_save_val_type_last_id:
4058           STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range);
4059           break;
4060         case UPDATE_VAR_RIGHT_RANGE_INIT:
4061           INIT_RIGHT_RANGE;
4062           break;
4063         }
4064       }
4065       INC_OP;
4066       JUMP_OUT;
4067 
4068 #ifdef USE_CALLOUT
4069     CASE_OP(CALLOUT_CONTENTS)
4070       of = ONIG_CALLOUT_OF_CONTENTS;
4071       mem = p->callout_contents.num;
4072       goto callout_common_entry;
4073       BREAK_OUT;
4074 
4075     CASE_OP(CALLOUT_NAME)
4076       {
4077         int call_result;
4078         int name_id;
4079         int in;
4080         CalloutListEntry* e;
4081         OnigCalloutFunc func;
4082         OnigCalloutArgs args;
4083 
4084         of  = ONIG_CALLOUT_OF_NAME;
4085         mem = p->callout_name.num;
4086 
4087       callout_common_entry:
4088         e = onig_reg_callout_list_at(reg, mem);
4089         in = e->in;
4090         if (of == ONIG_CALLOUT_OF_NAME) {
4091           name_id = p->callout_name.id;
4092           func = onig_get_callout_start_func(reg, mem);
4093         }
4094         else {
4095           name_id = ONIG_NON_NAME_ID;
4096           func = msa->mp->progress_callout_of_contents;
4097         }
4098 
4099         if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
4100           CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
4101                        (int )mem, msa->mp->callout_user_data, args, call_result);
4102           switch (call_result) {
4103           case ONIG_CALLOUT_FAIL:
4104             goto fail;
4105             break;
4106           case ONIG_CALLOUT_SUCCESS:
4107             goto retraction_callout2;
4108             break;
4109           default: /* error code */
4110             if (call_result > 0) {
4111               call_result = ONIGERR_INVALID_ARGUMENT;
4112             }
4113             best_len = call_result;
4114             goto match_at_end;
4115             break;
4116           }
4117         }
4118         else {
4119         retraction_callout2:
4120           if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
4121             if (of == ONIG_CALLOUT_OF_NAME) {
4122               if (IS_NOT_NULL(func)) {
4123                 STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
4124               }
4125             }
4126             else {
4127               func = msa->mp->retraction_callout_of_contents;
4128               if (IS_NOT_NULL(func)) {
4129                 STACK_PUSH_CALLOUT_CONTENTS(mem, func);
4130               }
4131             }
4132           }
4133         }
4134       }
4135       INC_OP;
4136       JUMP_OUT;
4137 #endif
4138 
4139     CASE_OP(FINISH)
4140       goto match_at_end;
4141 
4142 #ifdef ONIG_DEBUG_STATISTICS
4143     fail:
4144       SOP_OUT;
4145       goto fail2;
4146 #endif
4147     CASE_OP(FAIL)
4148 #ifdef ONIG_DEBUG_STATISTICS
4149     fail2:
4150 #else
4151     fail:
4152 #endif
4153       STACK_POP;
4154       p     = stk->u.state.pcode;
4155       s     = stk->u.state.pstr;
4156       sprev = stk->u.state.pstr_prev;
4157       CHECK_RETRY_LIMIT_IN_MATCH;
4158       JUMP_OUT;
4159 
4160     DEFAULT_OP
4161       MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
4162 
4163   } BYTECODE_INTERPRETER_END;
4164 
4165  match_at_end:
4166   STACK_SAVE;
4167   return best_len;
4168 }
4169 
4170 typedef struct {
4171   regex_t*    reg;
4172   OnigRegion* region;
4173 } RR;
4174 
4175 struct OnigRegSetStruct {
4176   RR*          rs;
4177   int          n;
4178   int          alloc;
4179   OnigEncoding enc;
4180   int          anchor;      /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
4181   OnigLen      anc_dmin;    /* (SEMI_)END_BUF anchor distance */
4182   OnigLen      anc_dmax;    /* (SEMI_)END_BUF anchor distance */
4183   int          all_low_high;
4184   int          anychar_inf;
4185 };
4186 
4187 enum SearchRangeStatus {
4188   SRS_DEAD      = 0,
4189   SRS_LOW_HIGH  = 1,
4190   SRS_ALL_RANGE = 2
4191 };
4192 
4193 typedef struct {
4194   int    state;  /* value of enum SearchRangeStatus */
4195   UChar* low;
4196   UChar* high;
4197   UChar* low_prev;
4198   UChar* sch_range;
4199 } SearchRange;
4200 
4201 #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
4202   r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
4203   if (r != ONIG_MISMATCH) {\
4204     if (r >= 0) {\
4205       goto match;\
4206     }\
4207     else goto finish; /* error */ \
4208   }
4209 
4210 static inline int
regset_search_body_position_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * orig_range,OnigOptionType option,MatchArg * msas,int * rmatch_pos)4211 regset_search_body_position_lead(OnigRegSet* set,
4212            const UChar* str, const UChar* end,
4213            const UChar* start, const UChar* range, /* match start range */
4214            const UChar* orig_range, /* data range */
4215            OnigOptionType option, MatchArg* msas, int* rmatch_pos)
4216 {
4217   int r, n, i;
4218   UChar *s, *prev;
4219   UChar *low, *high, *low_prev;
4220   UChar* sch_range;
4221   regex_t* reg;
4222   OnigEncoding enc;
4223   SearchRange* sr;
4224 
4225   n   = set->n;
4226   enc = set->enc;
4227 
4228   s = (UChar* )start;
4229   if (s > str)
4230     prev = onigenc_get_prev_char_head(enc, str, s);
4231   else
4232     prev = (UChar* )NULL;
4233 
4234   sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
4235   CHECK_NULL_RETURN_MEMERR(sr);
4236 
4237   for (i = 0; i < n; i++) {
4238     reg = set->rs[i].reg;
4239 
4240     sr[i].state = SRS_DEAD;
4241     if (reg->optimize != OPTIMIZE_NONE) {
4242       if (reg->dist_max != INFINITE_LEN) {
4243         if (end - range > reg->dist_max)
4244           sch_range = (UChar* )range + reg->dist_max;
4245         else
4246           sch_range = (UChar* )end;
4247 
4248         if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) {
4249           sr[i].state = SRS_LOW_HIGH;
4250           sr[i].low  = low;
4251           sr[i].high = high;
4252           sr[i].low_prev = low_prev;
4253           sr[i].sch_range = sch_range;
4254         }
4255       }
4256       else {
4257         sch_range = (UChar* )end;
4258         if (forward_search(reg, str, end, s, sch_range,
4259                            &low, &high, (UChar** )NULL)) {
4260           goto total_active;
4261         }
4262       }
4263     }
4264     else {
4265     total_active:
4266       sr[i].state    = SRS_ALL_RANGE;
4267       sr[i].low      = s;
4268       sr[i].high     = (UChar* )range;
4269       sr[i].low_prev = prev;
4270     }
4271   }
4272 
4273 #define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN   500
4274 
4275   if (set->all_low_high != 0
4276       && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
4277     do {
4278       int try_count = 0;
4279       for (i = 0; i < n; i++) {
4280         if (sr[i].state == SRS_DEAD) continue;
4281 
4282         if (s <  sr[i].low) continue;
4283         if (s >= sr[i].high) {
4284           if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4285                              &low, &high, &low_prev) != 0) {
4286             sr[i].low      = low;
4287             sr[i].high     = high;
4288             sr[i].low_prev = low_prev;
4289             if (s < low) continue;
4290           }
4291           else {
4292             sr[i].state = SRS_DEAD;
4293             continue;
4294           }
4295         }
4296 
4297         reg = set->rs[i].reg;
4298         REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4299         try_count++;
4300       } /* for (i) */
4301 
4302       if (s >= range) break;
4303 
4304       if (try_count == 0) {
4305         low = (UChar* )range;
4306         for (i = 0; i < n; i++) {
4307           if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
4308             low = sr[i].low;
4309             low_prev = sr[i].low_prev;
4310           }
4311         }
4312         if (low == range) break;
4313 
4314         s = low;
4315         prev = low_prev;
4316       }
4317       else {
4318         prev = s;
4319         s += enclen(enc, s);
4320       }
4321     } while (1);
4322   }
4323   else {
4324     int prev_is_newline = 1;
4325     do {
4326       for (i = 0; i < n; i++) {
4327         if (sr[i].state == SRS_DEAD) continue;
4328         if (sr[i].state == SRS_LOW_HIGH) {
4329           if (s <  sr[i].low) continue;
4330           if (s >= sr[i].high) {
4331             if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4332                                &low, &high, &low_prev) != 0) {
4333               sr[i].low      = low;
4334               sr[i].high     = high;
4335               /* sr[i].low_prev = low_prev; */
4336               if (s < low) continue;
4337             }
4338             else {
4339               sr[i].state = SRS_DEAD;
4340               continue;
4341             }
4342           }
4343         }
4344 
4345         reg = set->rs[i].reg;
4346         if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
4347           REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4348         }
4349       }
4350 
4351       if (s >= range) break;
4352 
4353       if (set->anychar_inf != 0)
4354         prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
4355 
4356       prev = s;
4357       s += enclen(enc, s);
4358     } while (1);
4359   }
4360 
4361   xfree(sr);
4362   return ONIG_MISMATCH;
4363 
4364  finish:
4365   xfree(sr);
4366   return r;
4367 
4368  match:
4369   xfree(sr);
4370   *rmatch_pos = (int )(s - str);
4371   return i;
4372 }
4373 
4374 static inline int
regset_search_body_regex_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * orig_range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4375 regset_search_body_regex_lead(OnigRegSet* set,
4376               const UChar* str, const UChar* end,
4377               const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
4378               OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
4379 {
4380   int r;
4381   int i;
4382   int n;
4383   int match_index;
4384   const UChar* ep;
4385   regex_t* reg;
4386   OnigRegion* region;
4387 
4388   n = set->n;
4389 
4390   match_index = ONIG_MISMATCH;
4391   ep = orig_range;
4392   for (i = 0; i < n; i++) {
4393     reg    = set->rs[i].reg;
4394     region = set->rs[i].region;
4395     r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
4396     if (r > 0) {
4397       if (str + r < ep) {
4398         match_index = i;
4399         *rmatch_pos = r;
4400         if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
4401           break;
4402 
4403         ep = str + r;
4404       }
4405     }
4406     else if (r == 0) {
4407       match_index = i;
4408       *rmatch_pos = r;
4409       break;
4410     }
4411   }
4412 
4413   return match_index;
4414 }
4415 
4416 extern int
onig_regset_search_with_param(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4417 onig_regset_search_with_param(OnigRegSet* set,
4418            const UChar* str, const UChar* end,
4419            const UChar* start, const UChar* range,
4420            OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
4421            int* rmatch_pos)
4422 {
4423   int r;
4424   int i;
4425   UChar *s, *prev;
4426   regex_t* reg;
4427   OnigEncoding enc;
4428   OnigRegion* region;
4429   MatchArg* msas;
4430   const UChar *orig_start = start;
4431   const UChar *orig_range = range;
4432 
4433   if (set->n == 0)
4434     return ONIG_MISMATCH;
4435 
4436   if (IS_POSIX_REGION(option))
4437     return ONIGERR_INVALID_ARGUMENT;
4438 
4439   r = 0;
4440   enc = set->enc;
4441   msas = (MatchArg* )NULL;
4442 
4443   for (i = 0; i < set->n; i++) {
4444     reg    = set->rs[i].reg;
4445     region = set->rs[i].region;
4446     ADJUST_MATCH_PARAM(reg, mps[i]);
4447     if (IS_NOT_NULL(region)) {
4448       r = onig_region_resize_clear(region, reg->num_mem + 1);
4449       if (r != 0) goto finish_no_msa;
4450     }
4451   }
4452 
4453   if (start > end || start < str) goto mismatch_no_msa;
4454   if (str < end) {
4455     /* forward search only */
4456     if (range <= start)
4457       return ONIGERR_INVALID_ARGUMENT;
4458   }
4459 
4460   if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4461     if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
4462       r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4463       goto finish_no_msa;
4464     }
4465   }
4466 
4467   if (set->anchor != OPTIMIZE_NONE && str < end) {
4468     UChar *min_semi_end, *max_semi_end;
4469 
4470     if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
4471       /* search start-position only */
4472     begin_position:
4473       range = start + 1;
4474     }
4475     else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
4476       /* search str-position only */
4477       if (start != str) goto mismatch_no_msa;
4478       range = str + 1;
4479     }
4480     else if ((set->anchor & ANCR_END_BUF) != 0) {
4481       min_semi_end = max_semi_end = (UChar* )end;
4482 
4483     end_buf:
4484       if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
4485         goto mismatch_no_msa;
4486 
4487       if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
4488         start = min_semi_end - set->anc_dmax;
4489         if (start < end)
4490           start = onigenc_get_right_adjust_char_head(enc, str, start);
4491       }
4492       if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
4493         range = max_semi_end - set->anc_dmin + 1;
4494       }
4495       if (start > range) goto mismatch_no_msa;
4496     }
4497     else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
4498       UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
4499 
4500       max_semi_end = (UChar* )end;
4501       if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
4502         min_semi_end = pre_end;
4503 
4504 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4505         pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
4506         if (IS_NOT_NULL(pre_end) &&
4507             ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
4508           min_semi_end = pre_end;
4509         }
4510 #endif
4511         if (min_semi_end > str && start <= min_semi_end) {
4512           goto end_buf;
4513         }
4514       }
4515       else {
4516         min_semi_end = (UChar* )end;
4517         goto end_buf;
4518       }
4519     }
4520     else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
4521       goto begin_position;
4522     }
4523   }
4524   else if (str == end) { /* empty string */
4525     start = end = str;
4526     s = (UChar* )start;
4527     prev = (UChar* )NULL;
4528 
4529     msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4530     CHECK_NULL_RETURN_MEMERR(msas);
4531     for (i = 0; i < set->n; i++) {
4532       reg = set->rs[i].reg;
4533       MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
4534     }
4535     for (i = 0; i < set->n; i++) {
4536       reg = set->rs[i].reg;
4537       if (reg->threshold_len == 0) {
4538         REGSET_MATCH_AND_RETURN_CHECK(end);
4539       }
4540     }
4541 
4542     goto mismatch;
4543   }
4544 
4545   if (lead == ONIG_REGSET_POSITION_LEAD) {
4546     msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4547     CHECK_NULL_RETURN_MEMERR(msas);
4548 
4549     for (i = 0; i < set->n; i++) {
4550       MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
4551                      orig_start, mps[i]);
4552     }
4553 
4554     r = regset_search_body_position_lead(set, str, end, start, range,
4555                                          orig_range, option, msas, rmatch_pos);
4556   }
4557   else {
4558     r = regset_search_body_regex_lead(set, str, end, start, orig_range,
4559                                       lead, option, mps, rmatch_pos);
4560   }
4561   if (r < 0) goto finish;
4562   else       goto match2;
4563 
4564  mismatch:
4565   r = ONIG_MISMATCH;
4566  finish:
4567   for (i = 0; i < set->n; i++) {
4568     if (IS_NOT_NULL(msas))
4569       MATCH_ARG_FREE(msas[i]);
4570     if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4571         IS_NOT_NULL(set->rs[i].region)) {
4572       onig_region_clear(set->rs[i].region);
4573     }
4574   }
4575   if (IS_NOT_NULL(msas)) xfree(msas);
4576   return r;
4577 
4578  mismatch_no_msa:
4579   r = ONIG_MISMATCH;
4580  finish_no_msa:
4581   return r;
4582 
4583  match:
4584   *rmatch_pos = (int )(s - str);
4585  match2:
4586   for (i = 0; i < set->n; i++) {
4587     if (IS_NOT_NULL(msas))
4588       MATCH_ARG_FREE(msas[i]);
4589     if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4590         IS_NOT_NULL(set->rs[i].region)) {
4591       onig_region_clear(set->rs[i].region);
4592     }
4593   }
4594   if (IS_NOT_NULL(msas)) xfree(msas);
4595   return r; /* regex index */
4596 }
4597 
4598 extern int
onig_regset_search(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,int * rmatch_pos)4599 onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
4600                    const UChar* start, const UChar* range,
4601                    OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
4602 {
4603   int r;
4604   int i;
4605   OnigMatchParam* mp;
4606   OnigMatchParam** mps;
4607 
4608   mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
4609   CHECK_NULL_RETURN_MEMERR(mps);
4610 
4611   mp = (OnigMatchParam* )(mps + set->n);
4612 
4613   for (i = 0; i < set->n; i++) {
4614     onig_initialize_match_param(mp + i);
4615     mps[i] = mp + i;
4616   }
4617 
4618   r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
4619                                     rmatch_pos);
4620   for (i = 0; i < set->n; i++)
4621     onig_free_match_param_content(mp + i);
4622 
4623   xfree(mps);
4624 
4625   return r;
4626 }
4627 
4628 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4629 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4630             const UChar* text, const UChar* text_end, UChar* text_range)
4631 {
4632   UChar *t, *p, *s, *end;
4633 
4634   end = (UChar* )text_end;
4635   end -= target_end - target - 1;
4636   if (end > text_range)
4637     end = text_range;
4638 
4639   s = (UChar* )text;
4640 
4641   while (s < end) {
4642     if (*s == *target) {
4643       p = s + 1;
4644       t = target + 1;
4645       while (t < target_end) {
4646         if (*t != *p++)
4647           break;
4648         t++;
4649       }
4650       if (t == target_end)
4651         return s;
4652     }
4653     s += enclen(enc, s);
4654   }
4655 
4656   return (UChar* )NULL;
4657 }
4658 
4659 static int
str_lower_case_match(OnigEncoding enc,int case_fold_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)4660 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4661                      const UChar* t, const UChar* tend,
4662                      const UChar* p, const UChar* end)
4663 {
4664   int lowlen;
4665   UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4666 
4667   while (t < tend) {
4668     if (p >= end) return 0;
4669     lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4670     q = lowbuf;
4671     while (lowlen > 0) {
4672       if (t >= tend)    return 0;
4673       if (*t++ != *q++) return 0;
4674       lowlen--;
4675     }
4676   }
4677 
4678   return 1;
4679 }
4680 
4681 static UChar*
slow_search_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4682 slow_search_ic(OnigEncoding enc, int case_fold_flag,
4683                UChar* target, UChar* target_end,
4684                const UChar* text, const UChar* text_end, UChar* text_range)
4685 {
4686   UChar *s;
4687 
4688   s = (UChar* )text;
4689 
4690   while (s < text_range) {
4691     if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4692                              s, text_end))
4693       return s;
4694 
4695     s += enclen(enc, s);
4696   }
4697 
4698   return (UChar* )NULL;
4699 }
4700 
4701 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4702 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4703                      const UChar* text, const UChar* adjust_text,
4704                      const UChar* text_end, const UChar* text_start)
4705 {
4706   UChar *t, *p, *s;
4707 
4708   s = (UChar* )text_end;
4709   s -= (target_end - target);
4710   if (s > text_start)
4711     s = (UChar* )text_start;
4712   else
4713     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4714 
4715   while (s >= text) {
4716     if (*s == *target) {
4717       p = s + 1;
4718       t = target + 1;
4719       while (t < target_end) {
4720         if (*t != *p++)
4721           break;
4722         t++;
4723       }
4724       if (t == target_end)
4725         return s;
4726     }
4727     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4728   }
4729 
4730   return (UChar* )NULL;
4731 }
4732 
4733 static UChar*
slow_search_backward_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4734 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4735                         UChar* target, UChar* target_end,
4736                         const UChar* text, const UChar* adjust_text,
4737                         const UChar* text_end, const UChar* text_start)
4738 {
4739   UChar *s;
4740 
4741   s = (UChar* )text_end;
4742   s -= (target_end - target);
4743   if (s > text_start)
4744     s = (UChar* )text_start;
4745   else
4746     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4747 
4748   while (s >= text) {
4749     if (str_lower_case_match(enc, case_fold_flag,
4750                              target, target_end, s, text_end))
4751       return s;
4752 
4753     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4754   }
4755 
4756   return (UChar* )NULL;
4757 }
4758 
4759 
4760 static UChar*
sunday_quick_search_step_forward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4761 sunday_quick_search_step_forward(regex_t* reg,
4762                                  const UChar* target, const UChar* target_end,
4763                                  const UChar* text, const UChar* text_end,
4764                                  const UChar* text_range)
4765 {
4766   const UChar *s, *se, *t, *p, *end;
4767   const UChar *tail;
4768   int skip, tlen1;
4769   int map_offset;
4770   OnigEncoding enc;
4771 
4772 #ifdef ONIG_DEBUG_SEARCH
4773   fprintf(stderr,
4774           "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
4775 #endif
4776 
4777   enc = reg->enc;
4778 
4779   tail = target_end - 1;
4780   tlen1 = (int )(tail - target);
4781   end = text_range;
4782   if (end + tlen1 > text_end)
4783     end = text_end - tlen1;
4784 
4785   map_offset = reg->map_offset;
4786   s = text;
4787 
4788   while (s < end) {
4789     p = se = s + tlen1;
4790     t = tail;
4791     while (*p == *t) {
4792       if (t == target) return (UChar* )s;
4793       p--; t--;
4794     }
4795     if (se + map_offset >= text_end) break;
4796     skip = reg->map[*(se + map_offset)];
4797 #if 0
4798     t = s;
4799     do {
4800       s += enclen(enc, s);
4801     } while ((s - t) < skip && s < end);
4802 #else
4803     s += skip;
4804     if (s < end)
4805       s = onigenc_get_right_adjust_char_head(enc, text, s);
4806 #endif
4807   }
4808 
4809   return (UChar* )NULL;
4810 }
4811 
4812 static UChar*
sunday_quick_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4813 sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
4814                     const UChar* text, const UChar* text_end,
4815                     const UChar* text_range)
4816 {
4817   const UChar *s, *t, *p, *end;
4818   const UChar *tail;
4819   int map_offset;
4820 
4821   end = text_range + (target_end - target);
4822   if (end > text_end)
4823     end = text_end;
4824 
4825   map_offset = reg->map_offset;
4826   tail = target_end - 1;
4827   s = text + (tail - target);
4828 
4829   while (s < end) {
4830     p = s;
4831     t = tail;
4832     while (*p == *t) {
4833       if (t == target) return (UChar* )p;
4834       p--; t--;
4835     }
4836     if (s + map_offset >= text_end) break;
4837     s += reg->map[*(s + map_offset)];
4838   }
4839 
4840   return (UChar* )NULL;
4841 }
4842 
4843 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)4844 map_search(OnigEncoding enc, UChar map[],
4845            const UChar* text, const UChar* text_range)
4846 {
4847   const UChar *s = text;
4848 
4849   while (s < text_range) {
4850     if (map[*s]) return (UChar* )s;
4851 
4852     s += enclen(enc, s);
4853   }
4854   return (UChar* )NULL;
4855 }
4856 
4857 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)4858 map_search_backward(OnigEncoding enc, UChar map[],
4859                     const UChar* text, const UChar* adjust_text,
4860                     const UChar* text_start)
4861 {
4862   const UChar *s = text_start;
4863 
4864   while (s >= text) {
4865     if (map[*s]) return (UChar* )s;
4866 
4867     s = onigenc_get_prev_char_head(enc, adjust_text, s);
4868   }
4869   return (UChar* )NULL;
4870 }
4871 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)4872 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
4873            OnigRegion* region, OnigOptionType option)
4874 {
4875   int r;
4876   OnigMatchParam mp;
4877 
4878   onig_initialize_match_param(&mp);
4879   r = onig_match_with_param(reg, str, end, at, region, option, &mp);
4880   onig_free_match_param_content(&mp);
4881   return r;
4882 }
4883 
4884 extern int
onig_match_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)4885 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
4886                       const UChar* at, OnigRegion* region, OnigOptionType option,
4887                       OnigMatchParam* mp)
4888 {
4889   int r;
4890   UChar *prev;
4891   MatchArg msa;
4892 
4893   ADJUST_MATCH_PARAM(reg, mp);
4894   MATCH_ARG_INIT(msa, reg, option, region, at, mp);
4895   if (region
4896 #ifdef USE_POSIX_API_REGION_OPTION
4897       && !IS_POSIX_REGION(option)
4898 #endif
4899       ) {
4900     r = onig_region_resize_clear(region, reg->num_mem + 1);
4901   }
4902   else
4903     r = 0;
4904 
4905   if (r == 0) {
4906     if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4907       if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
4908         r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4909         goto end;
4910       }
4911     }
4912 
4913     prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
4914     r = match_at(reg, str, end, end, at, prev, &msa);
4915   }
4916 
4917  end:
4918   MATCH_ARG_FREE(msa);
4919   return r;
4920 }
4921 
4922 static int
forward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * start,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)4923 forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
4924                UChar* range, UChar** low, UChar** high, UChar** low_prev)
4925 {
4926   UChar *p, *pprev = (UChar* )NULL;
4927 
4928 #ifdef ONIG_DEBUG_SEARCH
4929   fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
4930           str, end, start, range);
4931 #endif
4932 
4933   p = start;
4934   if (reg->dist_min != 0) {
4935     if (end - p <= reg->dist_min)
4936       return 0; /* fail */
4937 
4938     if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4939       p += reg->dist_min;
4940     }
4941     else {
4942       UChar *q = p + reg->dist_min;
4943       while (p < q) p += enclen(reg->enc, p);
4944     }
4945   }
4946 
4947  retry:
4948   switch (reg->optimize) {
4949   case OPTIMIZE_STR:
4950     p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4951     break;
4952   case OPTIMIZE_STR_CASE_FOLD:
4953     p = slow_search_ic(reg->enc, reg->case_fold_flag,
4954                        reg->exact, reg->exact_end, p, end, range);
4955     break;
4956 
4957   case OPTIMIZE_STR_FAST:
4958     p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
4959     break;
4960 
4961   case OPTIMIZE_STR_FAST_STEP_FORWARD:
4962     p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
4963                                          p, end, range);
4964     break;
4965 
4966   case OPTIMIZE_MAP:
4967     p = map_search(reg->enc, reg->map, p, range);
4968     break;
4969   }
4970 
4971   if (p && p < range) {
4972     if (p - start < reg->dist_min) {
4973     retry_gate:
4974       pprev = p;
4975       p += enclen(reg->enc, p);
4976       goto retry;
4977     }
4978 
4979     if (reg->sub_anchor) {
4980       UChar* prev;
4981 
4982       switch (reg->sub_anchor) {
4983       case ANCR_BEGIN_LINE:
4984         if (!ON_STR_BEGIN(p)) {
4985           prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
4986           if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4987             goto retry_gate;
4988         }
4989         break;
4990 
4991       case ANCR_END_LINE:
4992         if (ON_STR_END(p)) {
4993 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4994           prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4995                                                      (pprev ? pprev : str), p);
4996           if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4997             goto retry_gate;
4998 #endif
4999         }
5000         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5001 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5002                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5003 #endif
5004                  )
5005           goto retry_gate;
5006 
5007         break;
5008       }
5009     }
5010 
5011     if (reg->dist_max == 0) {
5012       *low = p;
5013       if (low_prev) {
5014         if (*low > start)
5015           *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);
5016         else
5017           *low_prev = onigenc_get_prev_char_head(reg->enc,
5018                                                  (pprev ? pprev : str), p);
5019       }
5020       *high = p;
5021     }
5022     else {
5023       if (reg->dist_max != INFINITE_LEN) {
5024         if (p - str < reg->dist_max) {
5025           *low = (UChar* )str;
5026           if (low_prev)
5027             *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
5028         }
5029         else {
5030           *low = p - reg->dist_max;
5031           if (*low > start) {
5032             *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,
5033                                                  *low, (const UChar** )low_prev);
5034           }
5035           else {
5036             if (low_prev)
5037               *low_prev = onigenc_get_prev_char_head(reg->enc,
5038                                                      (pprev ? pprev : str), *low);
5039           }
5040         }
5041       }
5042       /* no needs to adjust *high, *high is used as range check only */
5043       if (p - str < reg->dist_min)
5044         *high = (UChar* )str;
5045       else
5046         *high = p - reg->dist_min;
5047     }
5048 
5049 #ifdef ONIG_DEBUG_SEARCH
5050     fprintf(stderr,
5051             "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
5052             (int )(*low - str), (int )(*high - str),
5053             reg->dist_min, reg->dist_max);
5054 #endif
5055     return 1; /* success */
5056   }
5057 
5058   return 0; /* fail */
5059 }
5060 
5061 
5062 static int
backward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)5063 backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
5064                 const UChar* range, UChar* adjrange, UChar** low, UChar** high)
5065 {
5066   UChar *p;
5067 
5068   p = s;
5069 
5070  retry:
5071   switch (reg->optimize) {
5072   case OPTIMIZE_STR:
5073   exact_method:
5074     p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5075                              range, adjrange, end, p);
5076     break;
5077 
5078   case OPTIMIZE_STR_CASE_FOLD:
5079     p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
5080                                 reg->exact, reg->exact_end,
5081                                 range, adjrange, end, p);
5082     break;
5083 
5084   case OPTIMIZE_STR_FAST:
5085   case OPTIMIZE_STR_FAST_STEP_FORWARD:
5086     goto exact_method;
5087     break;
5088 
5089   case OPTIMIZE_MAP:
5090     p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
5091     break;
5092   }
5093 
5094   if (p) {
5095     if (reg->sub_anchor) {
5096       UChar* prev;
5097 
5098       switch (reg->sub_anchor) {
5099       case ANCR_BEGIN_LINE:
5100         if (!ON_STR_BEGIN(p)) {
5101           prev = onigenc_get_prev_char_head(reg->enc, str, p);
5102           if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5103             p = prev;
5104             goto retry;
5105           }
5106         }
5107         break;
5108 
5109       case ANCR_END_LINE:
5110         if (ON_STR_END(p)) {
5111 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5112           prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5113           if (IS_NULL(prev)) goto fail;
5114           if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5115             p = prev;
5116             goto retry;
5117           }
5118 #endif
5119         }
5120         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5121 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5122                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5123 #endif
5124                  ) {
5125           p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5126           if (IS_NULL(p)) goto fail;
5127           goto retry;
5128         }
5129         break;
5130       }
5131     }
5132 
5133     if (reg->dist_max != INFINITE_LEN) {
5134       if (p - str < reg->dist_max)
5135         *low = (UChar* )str;
5136       else
5137         *low = p - reg->dist_max;
5138 
5139       if (reg->dist_min != 0) {
5140         if (p - str < reg->dist_min)
5141           *high = (UChar* )str;
5142         else
5143           *high = p - reg->dist_min;
5144       }
5145       else {
5146         *high = p;
5147       }
5148 
5149       *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
5150     }
5151 
5152 #ifdef ONIG_DEBUG_SEARCH
5153     fprintf(stderr, "backward_search: low: %d, high: %d\n",
5154             (int )(*low - str), (int )(*high - str));
5155 #endif
5156     return 1; /* success */
5157   }
5158 
5159  fail:
5160 #ifdef ONIG_DEBUG_SEARCH
5161   fprintf(stderr, "backward_search: fail.\n");
5162 #endif
5163   return 0; /* fail */
5164 }
5165 
5166 
5167 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)5168 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5169             const UChar* start, const UChar* range, OnigRegion* region,
5170             OnigOptionType option)
5171 {
5172   int r;
5173   OnigMatchParam mp;
5174   const UChar* data_range;
5175 
5176   onig_initialize_match_param(&mp);
5177 
5178   /* The following is an expanded code of onig_search_with_param()  */
5179   if (range > start)
5180     data_range = range;
5181   else
5182     data_range = end;
5183 
5184   r = search_in_range(reg, str, end, start, range, data_range, region,
5185                       option, &mp);
5186 
5187   onig_free_match_param_content(&mp);
5188   return r;
5189 
5190 }
5191 
5192 static int
search_in_range(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * data_range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5193 search_in_range(regex_t* reg, const UChar* str, const UChar* end,
5194                 const UChar* start, const UChar* range, /* match start range */
5195                 const UChar* data_range, /* subject string range */
5196                 OnigRegion* region,
5197                 OnigOptionType option, OnigMatchParam* mp)
5198 {
5199   int r;
5200   UChar *s, *prev;
5201   MatchArg msa;
5202   const UChar *orig_start = start;
5203 
5204 #ifdef ONIG_DEBUG_SEARCH
5205   fprintf(stderr,
5206      "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
5207      str, (int )(end - str), (int )(start - str), (int )(range - str));
5208 #endif
5209 
5210   ADJUST_MATCH_PARAM(reg, mp);
5211 
5212   if (region
5213 #ifdef USE_POSIX_API_REGION_OPTION
5214       && !IS_POSIX_REGION(option)
5215 #endif
5216       ) {
5217     r = onig_region_resize_clear(region, reg->num_mem + 1);
5218     if (r != 0) goto finish_no_msa;
5219   }
5220 
5221   if (start > end || start < str) goto mismatch_no_msa;
5222 
5223   if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
5224     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5225       r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5226       goto finish_no_msa;
5227     }
5228   }
5229 
5230 
5231 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5232 #define MATCH_AND_RETURN_CHECK(upper_range) \
5233   r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5234   if (r != ONIG_MISMATCH) {\
5235     if (r >= 0) {\
5236       if (! IS_FIND_LONGEST(reg->options)) {\
5237         goto match;\
5238       }\
5239     }\
5240     else goto finish; /* error */ \
5241   }
5242 #else
5243 #define MATCH_AND_RETURN_CHECK(upper_range) \
5244   r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5245   if (r != ONIG_MISMATCH) {\
5246     if (r >= 0) {\
5247       goto match;\
5248     }\
5249     else goto finish; /* error */ \
5250   }
5251 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5252 
5253 
5254   /* anchor optimize: resume search range */
5255   if (reg->anchor != 0 && str < end) {
5256     UChar *min_semi_end, *max_semi_end;
5257 
5258     if (reg->anchor & ANCR_BEGIN_POSITION) {
5259       /* search start-position only */
5260     begin_position:
5261       if (range > start)
5262         range = start + 1;
5263       else
5264         range = start;
5265     }
5266     else if (reg->anchor & ANCR_BEGIN_BUF) {
5267       /* search str-position only */
5268       if (range > start) {
5269         if (start != str) goto mismatch_no_msa;
5270         range = str + 1;
5271       }
5272       else {
5273         if (range <= str) {
5274           start = str;
5275           range = str;
5276         }
5277         else
5278           goto mismatch_no_msa;
5279       }
5280     }
5281     else if (reg->anchor & ANCR_END_BUF) {
5282       min_semi_end = max_semi_end = (UChar* )end;
5283 
5284     end_buf:
5285       if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
5286         goto mismatch_no_msa;
5287 
5288       if (range > start) {
5289         if (reg->anc_dist_max != INFINITE_LEN &&
5290             min_semi_end - start > reg->anc_dist_max) {
5291           start = min_semi_end - reg->anc_dist_max;
5292           if (start < end)
5293             start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
5294         }
5295         if (max_semi_end - (range - 1) < reg->anc_dist_min) {
5296           if (max_semi_end - str + 1 < reg->anc_dist_min)
5297             goto mismatch_no_msa;
5298           else
5299             range = max_semi_end - reg->anc_dist_min + 1;
5300         }
5301 
5302         if (start > range) goto mismatch_no_msa;
5303         /* If start == range, match with empty at end.
5304            Backward search is used. */
5305       }
5306       else {
5307         if (reg->anc_dist_max != INFINITE_LEN &&
5308             min_semi_end - range > reg->anc_dist_max) {
5309           range = min_semi_end - reg->anc_dist_max;
5310         }
5311         if (max_semi_end - start < reg->anc_dist_min) {
5312           if (max_semi_end - str < reg->anc_dist_min)
5313             goto mismatch_no_msa;
5314           else {
5315             start = max_semi_end - reg->anc_dist_min;
5316             start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
5317           }
5318         }
5319         if (range > start) goto mismatch_no_msa;
5320       }
5321     }
5322     else if (reg->anchor & ANCR_SEMI_END_BUF) {
5323       UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
5324 
5325       max_semi_end = (UChar* )end;
5326       if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5327         min_semi_end = pre_end;
5328 
5329 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5330         pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
5331         if (IS_NOT_NULL(pre_end) &&
5332             ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5333           min_semi_end = pre_end;
5334         }
5335 #endif
5336         if (min_semi_end > str && start <= min_semi_end) {
5337           goto end_buf;
5338         }
5339       }
5340       else {
5341         min_semi_end = (UChar* )end;
5342         goto end_buf;
5343       }
5344     }
5345     else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) {
5346       goto begin_position;
5347     }
5348   }
5349   else if (str == end) { /* empty string */
5350     static const UChar* address_for_empty_string = (UChar* )"";
5351 
5352 #ifdef ONIG_DEBUG_SEARCH
5353     fprintf(stderr, "onig_search: empty string.\n");
5354 #endif
5355 
5356     if (reg->threshold_len == 0) {
5357       start = end = str = address_for_empty_string;
5358       s = (UChar* )start;
5359       prev = (UChar* )NULL;
5360 
5361       MATCH_ARG_INIT(msa, reg, option, region, start, mp);
5362       MATCH_AND_RETURN_CHECK(end);
5363       goto mismatch;
5364     }
5365     goto mismatch_no_msa;
5366   }
5367 
5368 #ifdef ONIG_DEBUG_SEARCH
5369   fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5370           (int )(end - str), (int )(start - str), (int )(range - str));
5371 #endif
5372 
5373   MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
5374 
5375   s = (UChar* )start;
5376   if (range > start) {   /* forward search */
5377     if (s > str)
5378       prev = onigenc_get_prev_char_head(reg->enc, str, s);
5379     else
5380       prev = (UChar* )NULL;
5381 
5382     if (reg->optimize != OPTIMIZE_NONE) {
5383       UChar *sch_range, *low, *high, *low_prev;
5384 
5385       if (reg->dist_max != 0) {
5386         if (reg->dist_max == INFINITE_LEN)
5387           sch_range = (UChar* )end;
5388         else {
5389           if ((end - range) < reg->dist_max)
5390             sch_range = (UChar* )end;
5391           else {
5392             sch_range = (UChar* )range + reg->dist_max;
5393           }
5394         }
5395       }
5396       else
5397         sch_range = (UChar* )range;
5398 
5399       if ((end - start) < reg->threshold_len)
5400         goto mismatch;
5401 
5402       if (reg->dist_max != INFINITE_LEN) {
5403         do {
5404           if (! forward_search(reg, str, end, s, sch_range, &low, &high,
5405                                &low_prev)) goto mismatch;
5406           if (s < low) {
5407             s    = low;
5408             prev = low_prev;
5409           }
5410           while (s <= high) {
5411             MATCH_AND_RETURN_CHECK(data_range);
5412             prev = s;
5413             s += enclen(reg->enc, s);
5414           }
5415         } while (s < range);
5416         goto mismatch;
5417       }
5418       else { /* check only. */
5419         if (! forward_search(reg, str, end, s, sch_range, &low, &high,
5420                              (UChar** )NULL)) goto mismatch;
5421 
5422         if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {
5423           do {
5424             MATCH_AND_RETURN_CHECK(data_range);
5425             prev = s;
5426             s += enclen(reg->enc, s);
5427 
5428             if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
5429               while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
5430                 prev = s;
5431                 s += enclen(reg->enc, s);
5432               }
5433             }
5434           } while (s < range);
5435           goto mismatch;
5436         }
5437       }
5438     }
5439 
5440     do {
5441       MATCH_AND_RETURN_CHECK(data_range);
5442       prev = s;
5443       s += enclen(reg->enc, s);
5444     } while (s < range);
5445 
5446     if (s == range) { /* because empty match with /$/. */
5447       MATCH_AND_RETURN_CHECK(data_range);
5448     }
5449   }
5450   else {  /* backward search */
5451     if (range < str) goto mismatch;
5452 
5453     if (orig_start < end)
5454       orig_start += enclen(reg->enc, orig_start); /* is upper range */
5455 
5456     if (reg->optimize != OPTIMIZE_NONE) {
5457       UChar *low, *high, *adjrange, *sch_start;
5458       const UChar *min_range;
5459 
5460       if ((end - range) < reg->threshold_len) goto mismatch;
5461 
5462       if (range < end)
5463         adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
5464       else
5465         adjrange = (UChar* )end;
5466 
5467       if (end - range > reg->dist_min)
5468         min_range = range + reg->dist_min;
5469       else
5470         min_range = end;
5471 
5472       if (reg->dist_max != INFINITE_LEN) {
5473         do {
5474           if (end - s > reg->dist_max)
5475             sch_start = s + reg->dist_max;
5476           else {
5477             sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5478           }
5479 
5480           if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5481                               &low, &high) <= 0)
5482             goto mismatch;
5483 
5484           if (s > high)
5485             s = high;
5486 
5487           while (s >= low) {
5488             prev = onigenc_get_prev_char_head(reg->enc, str, s);
5489             MATCH_AND_RETURN_CHECK(orig_start);
5490             s = prev;
5491           }
5492         } while (s >= range);
5493         goto mismatch;
5494       }
5495       else { /* check only. */
5496         sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5497 
5498         if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5499                             &low, &high) <= 0) goto mismatch;
5500       }
5501     }
5502 
5503     do {
5504       prev = onigenc_get_prev_char_head(reg->enc, str, s);
5505       MATCH_AND_RETURN_CHECK(orig_start);
5506       s = prev;
5507     } while (s >= range);
5508   }
5509 
5510  mismatch:
5511 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5512   if (IS_FIND_LONGEST(reg->options)) {
5513     if (msa.best_len >= 0) {
5514       s = msa.best_s;
5515       goto match;
5516     }
5517   }
5518 #endif
5519   r = ONIG_MISMATCH;
5520 
5521  finish:
5522   MATCH_ARG_FREE(msa);
5523 
5524   /* If result is mismatch and no FIND_NOT_EMPTY option,
5525      then the region is not set in match_at(). */
5526   if (IS_FIND_NOT_EMPTY(reg->options) && region
5527 #ifdef USE_POSIX_API_REGION_OPTION
5528       && !IS_POSIX_REGION(option)
5529 #endif
5530       ) {
5531     onig_region_clear(region);
5532   }
5533 
5534 #ifdef ONIG_DEBUG
5535   if (r != ONIG_MISMATCH)
5536     fprintf(stderr, "onig_search: error %d\n", r);
5537 #endif
5538   return r;
5539 
5540  mismatch_no_msa:
5541   r = ONIG_MISMATCH;
5542  finish_no_msa:
5543 #ifdef ONIG_DEBUG
5544   if (r != ONIG_MISMATCH)
5545     fprintf(stderr, "onig_search: error %d\n", r);
5546 #endif
5547   return r;
5548 
5549  match:
5550   MATCH_ARG_FREE(msa);
5551   return (int )(s - str);
5552 }
5553 
5554 extern int
onig_search_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5555 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
5556                        const UChar* start, const UChar* range, OnigRegion* region,
5557                        OnigOptionType option, OnigMatchParam* mp)
5558 {
5559   const UChar* data_range;
5560 
5561   if (range > start)
5562     data_range = range;
5563   else
5564     data_range = end;
5565 
5566   return search_in_range(reg, str, end, start, range, data_range, region,
5567                          option, mp);
5568 }
5569 
5570 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)5571 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5572           OnigRegion* region, OnigOptionType option,
5573           int (*scan_callback)(int, int, OnigRegion*, void*),
5574           void* callback_arg)
5575 {
5576   int r;
5577   int n;
5578   int rs;
5579   const UChar* start;
5580 
5581   if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
5582     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5583       return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5584 
5585     ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5586   }
5587 
5588   n = 0;
5589   start = str;
5590   while (1) {
5591     r = onig_search(reg, str, end, start, end, region, option);
5592     if (r >= 0) {
5593       rs = scan_callback(n, r, region, callback_arg);
5594       n++;
5595       if (rs != 0)
5596         return rs;
5597 
5598       if (region->end[0] == start - str) {
5599         if (start >= end) break;
5600         start += enclen(reg->enc, start);
5601       }
5602       else
5603         start = str + region->end[0];
5604 
5605       if (start > end)
5606         break;
5607     }
5608     else if (r == ONIG_MISMATCH) {
5609       break;
5610     }
5611     else { /* error */
5612       return r;
5613     }
5614   }
5615 
5616   return n;
5617 }
5618 
5619 extern OnigEncoding
onig_get_encoding(regex_t * reg)5620 onig_get_encoding(regex_t* reg)
5621 {
5622   return reg->enc;
5623 }
5624 
5625 extern OnigOptionType
onig_get_options(regex_t * reg)5626 onig_get_options(regex_t* reg)
5627 {
5628   return reg->options;
5629 }
5630 
5631 extern  OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)5632 onig_get_case_fold_flag(regex_t* reg)
5633 {
5634   return reg->case_fold_flag;
5635 }
5636 
5637 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)5638 onig_get_syntax(regex_t* reg)
5639 {
5640   return reg->syntax;
5641 }
5642 
5643 extern int
onig_number_of_captures(regex_t * reg)5644 onig_number_of_captures(regex_t* reg)
5645 {
5646   return reg->num_mem;
5647 }
5648 
5649 extern int
onig_number_of_capture_histories(regex_t * reg)5650 onig_number_of_capture_histories(regex_t* reg)
5651 {
5652 #ifdef USE_CAPTURE_HISTORY
5653   int i, n;
5654 
5655   n = 0;
5656   for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5657     if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5658       n++;
5659   }
5660   return n;
5661 #else
5662   return 0;
5663 #endif
5664 }
5665 
5666 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)5667 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5668 {
5669   *to = *from;
5670 }
5671 
5672 extern int
onig_regset_new(OnigRegSet ** rset,int n,regex_t * regs[])5673 onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
5674 {
5675 #define REGSET_INITIAL_ALLOC_SIZE   10
5676 
5677   int i;
5678   int r;
5679   int alloc;
5680   OnigRegSet* set;
5681   RR* rs;
5682 
5683   *rset = 0;
5684 
5685   set = (OnigRegSet* )xmalloc(sizeof(*set));
5686   CHECK_NULL_RETURN_MEMERR(set);
5687 
5688   alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
5689   rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
5690   if (IS_NULL(rs)) {
5691     xfree(set);
5692     return ONIGERR_MEMORY;
5693   }
5694 
5695   set->rs    = rs;
5696   set->n     = 0;
5697   set->alloc = alloc;
5698 
5699   for (i = 0; i < n; i++) {
5700     regex_t* reg = regs[i];
5701 
5702     r = onig_regset_add(set, reg);
5703     if (r != 0) {
5704       for (i = 0; i < set->n; i++) {
5705         OnigRegion* region = set->rs[i].region;
5706         if (IS_NOT_NULL(region))
5707           onig_region_free(region, 1);
5708       }
5709       xfree(set->rs);
5710       xfree(set);
5711       return r;
5712     }
5713   }
5714 
5715   *rset = set;
5716   return 0;
5717 }
5718 
5719 static void
update_regset_by_reg(OnigRegSet * set,regex_t * reg)5720 update_regset_by_reg(OnigRegSet* set, regex_t* reg)
5721 {
5722   if (set->n == 1) {
5723     set->enc          = reg->enc;
5724     set->anchor       = reg->anchor;
5725     set->anc_dmin     = reg->anc_dist_min;
5726     set->anc_dmax     = reg->anc_dist_max;
5727     set->all_low_high =
5728       (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
5729     set->anychar_inf  = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
5730   }
5731   else {
5732     int anchor;
5733 
5734     anchor = set->anchor & reg->anchor;
5735     if (anchor != 0) {
5736       OnigLen anc_dmin;
5737       OnigLen anc_dmax;
5738 
5739       anc_dmin = set->anc_dmin;
5740       anc_dmax = set->anc_dmax;
5741       if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
5742       if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
5743       set->anc_dmin = anc_dmin;
5744       set->anc_dmax = anc_dmax;
5745     }
5746 
5747     set->anchor = anchor;
5748 
5749     if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
5750       set->all_low_high = 0;
5751 
5752     if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
5753       set->anychar_inf = 1;
5754   }
5755 }
5756 
5757 extern int
onig_regset_add(OnigRegSet * set,regex_t * reg)5758 onig_regset_add(OnigRegSet* set, regex_t* reg)
5759 {
5760   OnigRegion* region;
5761 
5762   if (IS_FIND_LONGEST(reg->options))
5763     return ONIGERR_INVALID_ARGUMENT;
5764 
5765   if (set->n != 0 && reg->enc != set->enc)
5766     return ONIGERR_INVALID_ARGUMENT;
5767 
5768   if (set->n >= set->alloc) {
5769     RR* nrs;
5770     int new_alloc;
5771 
5772     new_alloc = set->alloc * 2;
5773     nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
5774     CHECK_NULL_RETURN_MEMERR(nrs);
5775 
5776     set->rs    = nrs;
5777     set->alloc = new_alloc;
5778   }
5779 
5780   region = onig_region_new();
5781   CHECK_NULL_RETURN_MEMERR(region);
5782 
5783   set->rs[set->n].reg    = reg;
5784   set->rs[set->n].region = region;
5785   set->n++;
5786 
5787   update_regset_by_reg(set, reg);
5788   return 0;
5789 }
5790 
5791 extern int
onig_regset_replace(OnigRegSet * set,int at,regex_t * reg)5792 onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
5793 {
5794   int i;
5795 
5796   if (at < 0 || at >= set->n)
5797     return ONIGERR_INVALID_ARGUMENT;
5798 
5799   if (IS_NULL(reg)) {
5800     onig_region_free(set->rs[at].region, 1);
5801     for (i = at; i < set->n - 1; i++) {
5802       set->rs[i].reg    = set->rs[i+1].reg;
5803       set->rs[i].region = set->rs[i+1].region;
5804     }
5805     set->n--;
5806   }
5807   else {
5808     if (IS_FIND_LONGEST(reg->options))
5809       return ONIGERR_INVALID_ARGUMENT;
5810 
5811     if (set->n > 1 && reg->enc != set->enc)
5812       return ONIGERR_INVALID_ARGUMENT;
5813 
5814     set->rs[at].reg = reg;
5815   }
5816 
5817   for (i = 0; i < set->n; i++)
5818     update_regset_by_reg(set, set->rs[i].reg);
5819 
5820   return 0;
5821 }
5822 
5823 extern void
onig_regset_free(OnigRegSet * set)5824 onig_regset_free(OnigRegSet* set)
5825 {
5826   int i;
5827 
5828   for (i = 0; i < set->n; i++) {
5829     regex_t* reg;
5830     OnigRegion* region;
5831 
5832     reg    = set->rs[i].reg;
5833     region = set->rs[i].region;
5834     onig_free(reg);
5835     if (IS_NOT_NULL(region))
5836       onig_region_free(region, 1);
5837   }
5838 
5839   xfree(set->rs);
5840   xfree(set);
5841 }
5842 
5843 extern int
onig_regset_number_of_regex(OnigRegSet * set)5844 onig_regset_number_of_regex(OnigRegSet* set)
5845 {
5846   return set->n;
5847 }
5848 
5849 extern regex_t*
onig_regset_get_regex(OnigRegSet * set,int at)5850 onig_regset_get_regex(OnigRegSet* set, int at)
5851 {
5852   if (at < 0 || at >= set->n)
5853     return (regex_t* )0;
5854 
5855   return set->rs[at].reg;
5856 }
5857 
5858 extern OnigRegion*
onig_regset_get_region(OnigRegSet * set,int at)5859 onig_regset_get_region(OnigRegSet* set, int at)
5860 {
5861   if (at < 0 || at >= set->n)
5862     return (OnigRegion* )0;
5863 
5864   return set->rs[at].region;
5865 }
5866 
5867 
5868 #ifdef USE_DIRECT_THREADED_CODE
5869 extern int
onig_init_for_match_at(regex_t * reg)5870 onig_init_for_match_at(regex_t* reg)
5871 {
5872   return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
5873                   (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
5874                   (MatchArg* )NULL);
5875 }
5876 #endif
5877 
5878 
5879 /* for callout functions */
5880 
5881 #ifdef USE_CALLOUT
5882 
5883 extern OnigCalloutFunc
onig_get_progress_callout(void)5884 onig_get_progress_callout(void)
5885 {
5886   return DefaultProgressCallout;
5887 }
5888 
5889 extern int
onig_set_progress_callout(OnigCalloutFunc f)5890 onig_set_progress_callout(OnigCalloutFunc f)
5891 {
5892   DefaultProgressCallout = f;
5893   return ONIG_NORMAL;
5894 }
5895 
5896 extern OnigCalloutFunc
onig_get_retraction_callout(void)5897 onig_get_retraction_callout(void)
5898 {
5899   return DefaultRetractionCallout;
5900 }
5901 
5902 extern int
onig_set_retraction_callout(OnigCalloutFunc f)5903 onig_set_retraction_callout(OnigCalloutFunc f)
5904 {
5905   DefaultRetractionCallout = f;
5906   return ONIG_NORMAL;
5907 }
5908 
5909 extern int
onig_get_callout_num_by_callout_args(OnigCalloutArgs * args)5910 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
5911 {
5912   return args->num;
5913 }
5914 
5915 extern OnigCalloutIn
onig_get_callout_in_by_callout_args(OnigCalloutArgs * args)5916 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
5917 {
5918   return args->in;
5919 }
5920 
5921 extern int
onig_get_name_id_by_callout_args(OnigCalloutArgs * args)5922 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
5923 {
5924   return args->name_id;
5925 }
5926 
5927 extern const UChar*
onig_get_contents_by_callout_args(OnigCalloutArgs * args)5928 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
5929 {
5930   int num;
5931   CalloutListEntry* e;
5932 
5933   num = args->num;
5934   e = onig_reg_callout_list_at(args->regex, num);
5935   if (IS_NULL(e)) return 0;
5936   if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5937     return e->u.content.start;
5938   }
5939 
5940   return 0;
5941 }
5942 
5943 extern const UChar*
onig_get_contents_end_by_callout_args(OnigCalloutArgs * args)5944 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
5945 {
5946   int num;
5947   CalloutListEntry* e;
5948 
5949   num = args->num;
5950   e = onig_reg_callout_list_at(args->regex, num);
5951   if (IS_NULL(e)) return 0;
5952   if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5953     return e->u.content.end;
5954   }
5955 
5956   return 0;
5957 }
5958 
5959 extern int
onig_get_args_num_by_callout_args(OnigCalloutArgs * args)5960 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
5961 {
5962   int num;
5963   CalloutListEntry* e;
5964 
5965   num = args->num;
5966   e = onig_reg_callout_list_at(args->regex, num);
5967   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5968   if (e->of == ONIG_CALLOUT_OF_NAME) {
5969     return e->u.arg.num;
5970   }
5971 
5972   return ONIGERR_INVALID_ARGUMENT;
5973 }
5974 
5975 extern int
onig_get_passed_args_num_by_callout_args(OnigCalloutArgs * args)5976 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
5977 {
5978   int num;
5979   CalloutListEntry* e;
5980 
5981   num = args->num;
5982   e = onig_reg_callout_list_at(args->regex, num);
5983   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5984   if (e->of == ONIG_CALLOUT_OF_NAME) {
5985     return e->u.arg.passed_num;
5986   }
5987 
5988   return ONIGERR_INVALID_ARGUMENT;
5989 }
5990 
5991 extern int
onig_get_arg_by_callout_args(OnigCalloutArgs * args,int index,OnigType * type,OnigValue * val)5992 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
5993                              OnigType* type, OnigValue* val)
5994 {
5995   int num;
5996   CalloutListEntry* e;
5997 
5998   num = args->num;
5999   e = onig_reg_callout_list_at(args->regex, num);
6000   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6001   if (e->of == ONIG_CALLOUT_OF_NAME) {
6002     if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
6003     if (IS_NOT_NULL(val))  *val  = e->u.arg.vals[index];
6004     return ONIG_NORMAL;
6005   }
6006 
6007   return ONIGERR_INVALID_ARGUMENT;
6008 }
6009 
6010 extern const UChar*
onig_get_string_by_callout_args(OnigCalloutArgs * args)6011 onig_get_string_by_callout_args(OnigCalloutArgs* args)
6012 {
6013   return args->string;
6014 }
6015 
6016 extern const UChar*
onig_get_string_end_by_callout_args(OnigCalloutArgs * args)6017 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
6018 {
6019   return args->string_end;
6020 }
6021 
6022 extern const UChar*
onig_get_start_by_callout_args(OnigCalloutArgs * args)6023 onig_get_start_by_callout_args(OnigCalloutArgs* args)
6024 {
6025   return args->start;
6026 }
6027 
6028 extern const UChar*
onig_get_right_range_by_callout_args(OnigCalloutArgs * args)6029 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
6030 {
6031   return args->right_range;
6032 }
6033 
6034 extern const UChar*
onig_get_current_by_callout_args(OnigCalloutArgs * args)6035 onig_get_current_by_callout_args(OnigCalloutArgs* args)
6036 {
6037   return args->current;
6038 }
6039 
6040 extern OnigRegex
onig_get_regex_by_callout_args(OnigCalloutArgs * args)6041 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
6042 {
6043   return args->regex;
6044 }
6045 
6046 extern unsigned long
onig_get_retry_counter_by_callout_args(OnigCalloutArgs * args)6047 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
6048 {
6049   return args->retry_in_match_counter;
6050 }
6051 
6052 
6053 extern int
onig_get_capture_range_in_callout(OnigCalloutArgs * a,int mem_num,int * begin,int * end)6054 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
6055 {
6056   OnigRegex    reg;
6057   const UChar* str;
6058   StackType*   stk_base;
6059   int i;
6060   StackIndex* mem_start_stk;
6061   StackIndex* mem_end_stk;
6062 
6063   i = mem_num;
6064   reg = a->regex;
6065   str = a->string;
6066   stk_base = a->stk_base;
6067   mem_start_stk = a->mem_start_stk;
6068   mem_end_stk   = a->mem_end_stk;
6069 
6070   if (i > 0) {
6071     if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
6072       *begin = (int )(STACK_MEM_START(reg, i) - str);
6073       *end   = (int )(STACK_MEM_END(reg, i)   - str);
6074     }
6075     else {
6076       *begin = *end = ONIG_REGION_NOTPOS;
6077     }
6078   }
6079   else
6080     return ONIGERR_INVALID_ARGUMENT;
6081 
6082   return ONIG_NORMAL;
6083 }
6084 
6085 extern int
onig_get_used_stack_size_in_callout(OnigCalloutArgs * a,int * used_num,int * used_bytes)6086 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
6087 {
6088   int n;
6089 
6090   n = (int )(a->stk - a->stk_base);
6091 
6092   if (used_num != 0)
6093     *used_num = n;
6094 
6095   if (used_bytes != 0)
6096     *used_bytes = n * sizeof(StackType);
6097 
6098   return ONIG_NORMAL;
6099 }
6100 
6101 
6102 /* builtin callout functions */
6103 
6104 extern int
onig_builtin_fail(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6105 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6106 {
6107   return ONIG_CALLOUT_FAIL;
6108 }
6109 
6110 extern int
onig_builtin_mismatch(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6111 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6112 {
6113   return ONIG_MISMATCH;
6114 }
6115 
6116 extern int
onig_builtin_error(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6117 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6118 {
6119   int r;
6120   int n;
6121   OnigValue val;
6122 
6123   r = onig_get_arg_by_callout_args(args, 0, 0, &val);
6124   if (r != ONIG_NORMAL) return r;
6125 
6126   n = (int )val.l;
6127   if (n >= 0) {
6128     n = ONIGERR_INVALID_CALLOUT_BODY;
6129   }
6130   else if (onig_is_error_code_needs_param(n)) {
6131     n = ONIGERR_INVALID_CALLOUT_BODY;
6132   }
6133 
6134   return n;
6135 }
6136 
6137 extern int
onig_builtin_count(OnigCalloutArgs * args,void * user_data)6138 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
6139 {
6140   (void )onig_check_callout_data_and_clear_old_values(args);
6141 
6142   return onig_builtin_total_count(args, user_data);
6143 }
6144 
6145 extern int
onig_builtin_total_count(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6146 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6147 {
6148   int r;
6149   int slot;
6150   OnigType  type;
6151   OnigValue val;
6152   OnigValue aval;
6153   OnigCodePoint count_type;
6154 
6155   r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6156   if (r != ONIG_NORMAL) return r;
6157 
6158   count_type = aval.c;
6159   if (count_type != '>' && count_type != 'X' && count_type != '<')
6160     return ONIGERR_INVALID_CALLOUT_ARG;
6161 
6162   r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
6163                                                                 &type, &val);
6164   if (r < ONIG_NORMAL)
6165     return r;
6166   else if (r > ONIG_NORMAL) {
6167     /* type == void: initial state */
6168     val.l = 0;
6169   }
6170 
6171   if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6172     slot = 2;
6173     if (count_type == '<')
6174       val.l++;
6175     else if (count_type == 'X')
6176       val.l--;
6177   }
6178   else {
6179     slot = 1;
6180     if (count_type != '<')
6181       val.l++;
6182   }
6183 
6184   r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
6185   if (r != ONIG_NORMAL) return r;
6186 
6187   /* slot 1: in progress counter, slot 2: in retraction counter */
6188   r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
6189                                                                 &type, &val);
6190   if (r < ONIG_NORMAL)
6191     return r;
6192   else if (r > ONIG_NORMAL) {
6193     val.l = 0;
6194   }
6195 
6196   val.l++;
6197   r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6198   if (r != ONIG_NORMAL) return r;
6199 
6200   return ONIG_CALLOUT_SUCCESS;
6201 }
6202 
6203 extern int
onig_builtin_max(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6204 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6205 {
6206   int r;
6207   int slot;
6208   long max_val;
6209   OnigCodePoint count_type;
6210   OnigType  type;
6211   OnigValue val;
6212   OnigValue aval;
6213 
6214   (void )onig_check_callout_data_and_clear_old_values(args);
6215 
6216   slot = 0;
6217   r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6218   if (r < ONIG_NORMAL)
6219     return r;
6220   else if (r > ONIG_NORMAL) {
6221     /* type == void: initial state */
6222     type  = ONIG_TYPE_LONG;
6223     val.l = 0;
6224   }
6225 
6226   r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6227   if (r != ONIG_NORMAL) return r;
6228   if (type == ONIG_TYPE_TAG) {
6229     r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
6230     if (r < ONIG_NORMAL) return r;
6231     else if (r > ONIG_NORMAL)
6232       max_val = 0L;
6233     else
6234       max_val = aval.l;
6235   }
6236   else { /* LONG */
6237     max_val = aval.l;
6238   }
6239 
6240   r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
6241   if (r != ONIG_NORMAL) return r;
6242 
6243   count_type = aval.c;
6244   if (count_type != '>' && count_type != 'X' && count_type != '<')
6245     return ONIGERR_INVALID_CALLOUT_ARG;
6246 
6247   if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6248     if (count_type == '<') {
6249       if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6250       val.l++;
6251     }
6252     else if (count_type == 'X')
6253       val.l--;
6254   }
6255   else {
6256     if (count_type != '<') {
6257       if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6258       val.l++;
6259     }
6260   }
6261 
6262   r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6263   if (r != ONIG_NORMAL) return r;
6264 
6265   return ONIG_CALLOUT_SUCCESS;
6266 }
6267 
6268 enum OP_CMP {
6269   OP_EQ,
6270   OP_NE,
6271   OP_LT,
6272   OP_GT,
6273   OP_LE,
6274   OP_GE
6275 };
6276 
6277 extern int
onig_builtin_cmp(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6278 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6279 {
6280   int r;
6281   int slot;
6282   long lv;
6283   long rv;
6284   OnigType  type;
6285   OnigValue val;
6286   regex_t* reg;
6287   enum OP_CMP op;
6288 
6289   reg = args->regex;
6290 
6291   r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6292   if (r != ONIG_NORMAL) return r;
6293 
6294   if (type == ONIG_TYPE_TAG) {
6295     r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6296     if (r < ONIG_NORMAL) return r;
6297     else if (r > ONIG_NORMAL)
6298       lv = 0L;
6299     else
6300       lv = val.l;
6301   }
6302   else { /* ONIG_TYPE_LONG */
6303     lv = val.l;
6304   }
6305 
6306   r = onig_get_arg_by_callout_args(args, 2, &type, &val);
6307   if (r != ONIG_NORMAL) return r;
6308 
6309   if (type == ONIG_TYPE_TAG) {
6310     r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6311     if (r < ONIG_NORMAL) return r;
6312     else if (r > ONIG_NORMAL)
6313       rv = 0L;
6314     else
6315       rv = val.l;
6316   }
6317   else { /* ONIG_TYPE_LONG */
6318     rv = val.l;
6319   }
6320 
6321   slot = 0;
6322   r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6323   if (r < ONIG_NORMAL)
6324     return r;
6325   else if (r > ONIG_NORMAL) {
6326     /* type == void: initial state */
6327     OnigCodePoint c1, c2;
6328     UChar* p;
6329 
6330     r = onig_get_arg_by_callout_args(args, 1, &type, &val);
6331     if (r != ONIG_NORMAL) return r;
6332 
6333     p = val.s.start;
6334     c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6335     p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6336     if (p < val.s.end) {
6337       c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6338       p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6339       if (p != val.s.end)  return ONIGERR_INVALID_CALLOUT_ARG;
6340     }
6341     else
6342       c2 = 0;
6343 
6344     switch (c1) {
6345     case '=':
6346       if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6347       op = OP_EQ;
6348       break;
6349     case '!':
6350       if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6351       op = OP_NE;
6352       break;
6353     case '<':
6354       if (c2 == '=') op = OP_LE;
6355       else if (c2 == 0) op = OP_LT;
6356       else  return ONIGERR_INVALID_CALLOUT_ARG;
6357       break;
6358     case '>':
6359       if (c2 == '=') op = OP_GE;
6360       else if (c2 == 0) op = OP_GT;
6361       else  return ONIGERR_INVALID_CALLOUT_ARG;
6362       break;
6363     default:
6364       return ONIGERR_INVALID_CALLOUT_ARG;
6365       break;
6366     }
6367     val.l = (long )op;
6368     r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6369     if (r != ONIG_NORMAL) return r;
6370   }
6371   else {
6372     op = (enum OP_CMP )val.l;
6373   }
6374 
6375   switch (op) {
6376   case OP_EQ: r = (lv == rv); break;
6377   case OP_NE: r = (lv != rv); break;
6378   case OP_LT: r = (lv <  rv); break;
6379   case OP_GT: r = (lv >  rv); break;
6380   case OP_LE: r = (lv <= rv); break;
6381   case OP_GE: r = (lv >= rv); break;
6382   }
6383 
6384   return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
6385 }
6386 
6387 
6388 #include <stdio.h>
6389 
6390 static FILE* OutFp;
6391 
6392 /* name start with "onig_" for macros. */
6393 static int
onig_builtin_monitor(OnigCalloutArgs * args,void * user_data)6394 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
6395 {
6396   int r;
6397   int num;
6398   size_t tag_len;
6399   const UChar* start;
6400   const UChar* right;
6401   const UChar* current;
6402   const UChar* string;
6403   const UChar* strend;
6404   const UChar* tag_start;
6405   const UChar* tag_end;
6406   regex_t* reg;
6407   OnigCalloutIn in;
6408   OnigType type;
6409   OnigValue val;
6410   char buf[20];
6411   FILE* fp;
6412 
6413   fp = OutFp;
6414 
6415   r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6416   if (r != ONIG_NORMAL) return r;
6417 
6418   in = onig_get_callout_in_by_callout_args(args);
6419   if (in == ONIG_CALLOUT_IN_PROGRESS) {
6420     if (val.c == '<')
6421       return ONIG_CALLOUT_SUCCESS;
6422   }
6423   else {
6424     if (val.c != 'X' && val.c != '<')
6425       return ONIG_CALLOUT_SUCCESS;
6426   }
6427 
6428   num       = onig_get_callout_num_by_callout_args(args);
6429   start     = onig_get_start_by_callout_args(args);
6430   right     = onig_get_right_range_by_callout_args(args);
6431   current   = onig_get_current_by_callout_args(args);
6432   string    = onig_get_string_by_callout_args(args);
6433   strend    = onig_get_string_end_by_callout_args(args);
6434   reg       = onig_get_regex_by_callout_args(args);
6435   tag_start = onig_get_callout_tag_start(reg, num);
6436   tag_end   = onig_get_callout_tag_end(reg, num);
6437 
6438   if (tag_start == 0)
6439     xsnprintf(buf, sizeof(buf), "#%d", num);
6440   else {
6441     /* CAUTION: tag string is not terminated with NULL. */
6442     int i;
6443 
6444     tag_len = tag_end - tag_start;
6445     if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
6446     for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
6447     buf[tag_len] = '\0';
6448   }
6449 
6450   fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
6451           buf,
6452           in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
6453           (int )(current - string),
6454           (int )(start   - string),
6455           (int )(right   - string),
6456           (int )(strend  - string));
6457   fflush(fp);
6458 
6459   return ONIG_CALLOUT_SUCCESS;
6460 }
6461 
6462 extern int
onig_setup_builtin_monitors_by_ascii_encoded_name(void * fp)6463 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
6464 {
6465   int id;
6466   char* name;
6467   OnigEncoding enc;
6468   unsigned int ts[4];
6469   OnigValue opts[4];
6470 
6471   if (IS_NOT_NULL(fp))
6472     OutFp = (FILE* )fp;
6473   else
6474     OutFp = stdout;
6475 
6476   enc = ONIG_ENCODING_ASCII;
6477 
6478   name = "MON";
6479   ts[0] = ONIG_TYPE_CHAR;
6480   opts[0].c = '>';
6481   BC_B_O(name, monitor, 1, ts, 1, opts);
6482 
6483   return ONIG_NORMAL;
6484 }
6485 
6486 #endif /* USE_CALLOUT */
6487