xref: /PHP-7.2/ext/mbstring/oniguruma/src/regexec.c (revision 0ae2f95b)
1 /**********************************************************************
2   regexec.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
32 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
33 
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36   (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37    ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39 
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode* node);
42 
43 static void
history_tree_clear(OnigCaptureTreeNode * node)44 history_tree_clear(OnigCaptureTreeNode* node)
45 {
46   int i;
47 
48   if (IS_NOT_NULL(node)) {
49     for (i = 0; i < node->num_childs; i++) {
50       if (IS_NOT_NULL(node->childs[i])) {
51         history_tree_free(node->childs[i]);
52       }
53     }
54     for (i = 0; i < node->allocated; i++) {
55       node->childs[i] = (OnigCaptureTreeNode* )0;
56     }
57     node->num_childs = 0;
58     node->beg = ONIG_REGION_NOTPOS;
59     node->end = ONIG_REGION_NOTPOS;
60     node->group = -1;
61   }
62 }
63 
64 static void
history_tree_free(OnigCaptureTreeNode * node)65 history_tree_free(OnigCaptureTreeNode* node)
66 {
67   history_tree_clear(node);
68   xfree(node);
69 }
70 
71 static void
history_root_free(OnigRegion * r)72 history_root_free(OnigRegion* r)
73 {
74   if (IS_NOT_NULL(r->history_root)) {
75     history_tree_free(r->history_root);
76     r->history_root = (OnigCaptureTreeNode* )0;
77   }
78 }
79 
80 static OnigCaptureTreeNode*
history_node_new(void)81 history_node_new(void)
82 {
83   OnigCaptureTreeNode* node;
84 
85   node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
86   CHECK_NULL_RETURN(node);
87   node->childs     = (OnigCaptureTreeNode** )0;
88   node->allocated  = 0;
89   node->num_childs = 0;
90   node->group      = -1;
91   node->beg        = ONIG_REGION_NOTPOS;
92   node->end        = ONIG_REGION_NOTPOS;
93 
94   return node;
95 }
96 
97 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)98 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
99 {
100 #define HISTORY_TREE_INIT_ALLOC_SIZE  8
101 
102   if (parent->num_childs >= parent->allocated) {
103     int n, i;
104 
105     if (IS_NULL(parent->childs)) {
106       n = HISTORY_TREE_INIT_ALLOC_SIZE;
107       parent->childs =
108         (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
109     }
110     else {
111       n = parent->allocated * 2;
112       parent->childs =
113         (OnigCaptureTreeNode** )xrealloc(parent->childs,
114                                          sizeof(OnigCaptureTreeNode*) * n);
115     }
116     CHECK_NULL_RETURN_MEMERR(parent->childs);
117     for (i = parent->allocated; i < n; i++) {
118       parent->childs[i] = (OnigCaptureTreeNode* )0;
119     }
120     parent->allocated = n;
121   }
122 
123   parent->childs[parent->num_childs] = child;
124   parent->num_childs++;
125   return 0;
126 }
127 
128 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)129 history_tree_clone(OnigCaptureTreeNode* node)
130 {
131   int i;
132   OnigCaptureTreeNode *clone, *child;
133 
134   clone = history_node_new();
135   CHECK_NULL_RETURN(clone);
136 
137   clone->beg = node->beg;
138   clone->end = node->end;
139   for (i = 0; i < node->num_childs; i++) {
140     child = history_tree_clone(node->childs[i]);
141     if (IS_NULL(child)) {
142       history_tree_free(clone);
143       return (OnigCaptureTreeNode* )0;
144     }
145     history_tree_add_child(clone, child);
146   }
147 
148   return clone;
149 }
150 
151 extern  OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)152 onig_get_capture_tree(OnigRegion* region)
153 {
154   return region->history_root;
155 }
156 #endif /* USE_CAPTURE_HISTORY */
157 
158 extern void
onig_region_clear(OnigRegion * region)159 onig_region_clear(OnigRegion* region)
160 {
161   int i;
162 
163   for (i = 0; i < region->num_regs; i++) {
164     region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
165   }
166 #ifdef USE_CAPTURE_HISTORY
167   history_root_free(region);
168 #endif
169 }
170 
171 extern int
onig_region_resize(OnigRegion * region,int n)172 onig_region_resize(OnigRegion* region, int n)
173 {
174   region->num_regs = n;
175 
176   if (n < ONIG_NREGION)
177     n = ONIG_NREGION;
178 
179   if (region->allocated == 0) {
180     region->beg = (int* )xmalloc(n * sizeof(int));
181     region->end = (int* )xmalloc(n * sizeof(int));
182 
183     if (region->beg == 0 || region->end == 0)
184       return ONIGERR_MEMORY;
185 
186     region->allocated = n;
187   }
188   else if (region->allocated < n) {
189     region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
190     region->end = (int* )xrealloc(region->end, n * sizeof(int));
191 
192     if (region->beg == 0 || region->end == 0)
193       return ONIGERR_MEMORY;
194 
195     region->allocated = n;
196   }
197 
198   return 0;
199 }
200 
201 static int
onig_region_resize_clear(OnigRegion * region,int n)202 onig_region_resize_clear(OnigRegion* region, int n)
203 {
204   int r;
205 
206   r = onig_region_resize(region, n);
207   if (r != 0) return r;
208   onig_region_clear(region);
209   return 0;
210 }
211 
212 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)213 onig_region_set(OnigRegion* region, int at, int beg, int end)
214 {
215   if (at < 0) return ONIGERR_INVALID_ARGUMENT;
216 
217   if (at >= region->allocated) {
218     int r = onig_region_resize(region, at + 1);
219     if (r < 0) return r;
220   }
221 
222   region->beg[at] = beg;
223   region->end[at] = end;
224   return 0;
225 }
226 
227 extern void
onig_region_init(OnigRegion * region)228 onig_region_init(OnigRegion* region)
229 {
230   region->num_regs     = 0;
231   region->allocated    = 0;
232   region->beg          = (int* )0;
233   region->end          = (int* )0;
234   region->history_root = (OnigCaptureTreeNode* )0;
235 }
236 
237 extern OnigRegion*
onig_region_new(void)238 onig_region_new(void)
239 {
240   OnigRegion* r;
241 
242   r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
243   onig_region_init(r);
244   return r;
245 }
246 
247 extern void
onig_region_free(OnigRegion * r,int free_self)248 onig_region_free(OnigRegion* r, int free_self)
249 {
250   if (r) {
251     if (r->allocated > 0) {
252       if (r->beg) xfree(r->beg);
253       if (r->end) xfree(r->end);
254       r->allocated = 0;
255     }
256 #ifdef USE_CAPTURE_HISTORY
257     history_root_free(r);
258 #endif
259     if (free_self) xfree(r);
260   }
261 }
262 
263 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)264 onig_region_copy(OnigRegion* to, OnigRegion* from)
265 {
266 #define RREGC_SIZE   (sizeof(int) * from->num_regs)
267   int i;
268 
269   if (to == from) return;
270 
271   if (to->allocated == 0) {
272     if (from->num_regs > 0) {
273       to->beg = (int* )xmalloc(RREGC_SIZE);
274       to->end = (int* )xmalloc(RREGC_SIZE);
275       to->allocated = from->num_regs;
276     }
277   }
278   else if (to->allocated < from->num_regs) {
279     to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
280     to->end = (int* )xrealloc(to->end, RREGC_SIZE);
281     to->allocated = from->num_regs;
282   }
283 
284   for (i = 0; i < from->num_regs; i++) {
285     to->beg[i] = from->beg[i];
286     to->end[i] = from->end[i];
287   }
288   to->num_regs = from->num_regs;
289 
290 #ifdef USE_CAPTURE_HISTORY
291   history_root_free(to);
292 
293   if (IS_NOT_NULL(from->history_root)) {
294     to->history_root = history_tree_clone(from->history_root);
295   }
296 #endif
297 }
298 
299 
300 /** stack **/
301 #define INVALID_STACK_INDEX   -1
302 
303 /* stack type */
304 /* used by normal-POP */
305 #define STK_ALT                    0x0001
306 #define STK_LOOK_BEHIND_NOT        0x0002
307 #define STK_POS_NOT                0x0003
308 /* handled by normal-POP */
309 #define STK_MEM_START              0x0100
310 #define STK_MEM_END                0x8200
311 #define STK_REPEAT_INC             0x0300
312 #define STK_STATE_CHECK_MARK       0x1000
313 /* avoided by normal-POP */
314 #define STK_NULL_CHECK_START       0x3000
315 #define STK_NULL_CHECK_END         0x5000  /* for recursive call */
316 #define STK_MEM_END_MARK           0x8400
317 #define STK_POS                    0x0500  /* used when POP-POS */
318 #define STK_STOP_BT                0x0600  /* mark for "(?>...)" */
319 #define STK_REPEAT                 0x0700
320 #define STK_CALL_FRAME             0x0800
321 #define STK_RETURN                 0x0900
322 #define STK_VOID                   0x0a00  /* for fill a blank */
323 
324 /* stack type check mask */
325 #define STK_MASK_POP_USED          0x00ff
326 #define STK_MASK_TO_VOID_TARGET    0x10ff
327 #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */
328 
329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
330 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
331   (msa).stack_p  = (void* )0;\
332   (msa).options  = (arg_option);\
333   (msa).region   = (arg_region);\
334   (msa).start    = (arg_start);\
335   (msa).best_len = ONIG_MISMATCH;\
336   (msa).ptr_num  = (reg)->num_repeat + (reg)->num_mem * 2;\
337 } while(0)
338 #else
339 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
340   (msa).stack_p  = (void* )0;\
341   (msa).options  = (arg_option);\
342   (msa).region   = (arg_region);\
343   (msa).start    = (arg_start);\
344   (msa).ptr_num  = (reg)->num_repeat + (reg)->num_mem * 2;\
345 } while(0)
346 #endif
347 
348 #ifdef USE_COMBINATION_EXPLOSION_CHECK
349 
350 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE  16
351 
352 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {	\
353   if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
354     unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
355     offset = ((offset) * (state_num)) >> 3;\
356     if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
357       if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
358         (msa).state_check_buff = (void* )xmalloc(size);\
359       else \
360         (msa).state_check_buff = (void* )xalloca(size);\
361       xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
362               (size_t )(size - (offset))); \
363       (msa).state_check_buff_size = size;\
364     }\
365     else {\
366       (msa).state_check_buff = (void* )0;\
367       (msa).state_check_buff_size = 0;\
368     }\
369   }\
370   else {\
371     (msa).state_check_buff = (void* )0;\
372     (msa).state_check_buff_size = 0;\
373   }\
374 } while(0)
375 
376 #define MATCH_ARG_FREE(msa) do {\
377   if ((msa).stack_p) xfree((msa).stack_p);\
378   if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
379     if ((msa).state_check_buff) xfree((msa).state_check_buff);\
380   }\
381 } while(0)
382 #else
383 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
384 #define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p)
385 #endif
386 
387 
388 #define ALLOCA_PTR_NUM_LIMIT   50
389 
390 #define STACK_INIT(stack_num)  do {\
391   if (msa->stack_p) {\
392     is_alloca  = 0;\
393     alloc_base = msa->stack_p;\
394     stk_base   = (OnigStackType* )(alloc_base\
395                  + (sizeof(OnigStackIndex) * msa->ptr_num));\
396     stk        = stk_base;\
397     stk_end    = stk_base + msa->stack_n;\
398   }\
399   else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
400     is_alloca  = 0;\
401     alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\
402                   + sizeof(OnigStackType) * (stack_num));\
403     stk_base   = (OnigStackType* )(alloc_base\
404                  + (sizeof(OnigStackIndex) * msa->ptr_num));\
405     stk        = stk_base;\
406     stk_end    = stk_base + (stack_num);\
407   }\
408   else {\
409     is_alloca  = 1;\
410     alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\
411                  + sizeof(OnigStackType) * (stack_num));\
412     stk_base   = (OnigStackType* )(alloc_base\
413                  + (sizeof(OnigStackIndex) * msa->ptr_num));\
414     stk        = stk_base;\
415     stk_end    = stk_base + (stack_num);\
416   }\
417 } while(0);
418 
419 
420 #define STACK_SAVE do{\
421   msa->stack_n = stk_end - stk_base;\
422   if (is_alloca != 0) {\
423     size_t size = sizeof(OnigStackIndex) * msa->ptr_num \
424                 + sizeof(OnigStackType) * msa->stack_n;\
425     msa->stack_p = xmalloc(size);\
426     xmemcpy(msa->stack_p, alloc_base, size);\
427   }\
428   else {\
429     msa->stack_p = alloc_base;\
430   };\
431 } while(0)
432 
433 #define UPDATE_FOR_STACK_REALLOC do{\
434   repeat_stk = (OnigStackIndex* )alloc_base;\
435   mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\
436   mem_end_stk   = mem_start_stk + num_mem;\
437   mem_start_stk--; /* for index start from 1 */\
438   mem_end_stk--;   /* for index start from 1 */\
439 } while(0)
440 
441 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
442 
443 extern unsigned int
onig_get_match_stack_limit_size(void)444 onig_get_match_stack_limit_size(void)
445 {
446   return MatchStackLimitSize;
447 }
448 
449 extern int
onig_set_match_stack_limit_size(unsigned int size)450 onig_set_match_stack_limit_size(unsigned int size)
451 {
452   MatchStackLimitSize = size;
453   return 0;
454 }
455 
456 static int
stack_double(int is_alloca,char ** arg_alloc_base,OnigStackType ** arg_stk_base,OnigStackType ** arg_stk_end,OnigStackType ** arg_stk,OnigMatchArg * msa)457 stack_double(int is_alloca, char** arg_alloc_base,
458 	     OnigStackType** arg_stk_base,
459 	     OnigStackType** arg_stk_end, OnigStackType** arg_stk,
460 	     OnigMatchArg* msa)
461 {
462   unsigned int n;
463   int used;
464   size_t size;
465   size_t new_size;
466   char* alloc_base;
467   char* new_alloc_base;
468   OnigStackType *stk_base, *stk_end, *stk;
469 
470   alloc_base = *arg_alloc_base;
471   stk_base = *arg_stk_base;
472   stk_end  = *arg_stk_end;
473   stk      = *arg_stk;
474 
475   n = stk_end - stk_base;
476   size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
477   n *= 2;
478   new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
479   if (is_alloca != 0) {
480     new_alloc_base = (char* )xmalloc(new_size);
481     if (IS_NULL(new_alloc_base)) {
482       STACK_SAVE;
483       return ONIGERR_MEMORY;
484     }
485     xmemcpy(new_alloc_base, alloc_base, size);
486   }
487   else {
488     if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
489       if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
490         return ONIGERR_MATCH_STACK_LIMIT_OVER;
491       else
492         n = MatchStackLimitSize;
493     }
494     new_alloc_base = (char* )xrealloc(alloc_base, new_size);
495     if (IS_NULL(new_alloc_base)) {
496       STACK_SAVE;
497       return ONIGERR_MEMORY;
498     }
499   }
500 
501   alloc_base = new_alloc_base;
502   used = stk - stk_base;
503   *arg_alloc_base = alloc_base;
504   *arg_stk_base   = (OnigStackType* )(alloc_base
505 		       + (sizeof(OnigStackIndex) * msa->ptr_num));
506   *arg_stk      = *arg_stk_base + used;
507   *arg_stk_end  = *arg_stk_base + n;
508   return 0;
509 }
510 
511 #define STACK_ENSURE(n)	do {\
512   if (stk_end - stk < (n)) {\
513     int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk,\
514                          msa);\
515     if (r != 0) { STACK_SAVE; return r; } \
516     is_alloca = 0;\
517     UPDATE_FOR_STACK_REALLOC;\
518   }\
519 } while(0)
520 
521 #define STACK_AT(index)        (stk_base + (index))
522 #define GET_STACK_INDEX(stk)   ((stk) - stk_base)
523 
524 #define STACK_PUSH_TYPE(stack_type) do {\
525   STACK_ENSURE(1);\
526   stk->type = (stack_type);\
527   STACK_INC;\
528 } while(0)
529 
530 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
531 
532 #ifdef USE_COMBINATION_EXPLOSION_CHECK
533 #define STATE_CHECK_POS(s,snum) \
534   (((s) - str) * num_comb_exp_check + ((snum) - 1))
535 #define STATE_CHECK_VAL(v,snum) do {\
536   if (state_check_buff != NULL) {\
537     int x = STATE_CHECK_POS(s,snum);\
538     (v) = state_check_buff[x/8] & (1<<(x%8));\
539   }\
540   else (v) = 0;\
541 } while(0)
542 
543 
544 #define ELSE_IF_STATE_CHECK_MARK(stk) \
545   else if ((stk)->type == STK_STATE_CHECK_MARK) { \
546     int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
547     state_check_buff[x/8] |= (1<<(x%8));				\
548   }
549 
550 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
551   STACK_ENSURE(1);\
552   stk->type = (stack_type);\
553   stk->u.state.pcode     = (pat);\
554   stk->u.state.pstr      = (s);\
555   stk->u.state.pstr_prev = (sprev);\
556   stk->u.state.state_check = 0;\
557   STACK_INC;\
558 } while(0)
559 
560 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
561   stk->type = (stack_type);\
562   stk->u.state.pcode = (pat);\
563   stk->u.state.state_check = 0;\
564   STACK_INC;\
565 } while(0)
566 
567 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
568   STACK_ENSURE(1);\
569   stk->type = STK_ALT;\
570   stk->u.state.pcode     = (pat);\
571   stk->u.state.pstr      = (s);\
572   stk->u.state.pstr_prev = (sprev);\
573   stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
574   STACK_INC;\
575 } while(0)
576 
577 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
578   if (state_check_buff != NULL) {\
579     STACK_ENSURE(1);\
580     stk->type = STK_STATE_CHECK_MARK;\
581     stk->u.state.pstr = (s);\
582     stk->u.state.state_check = (snum);\
583     STACK_INC;\
584   }\
585 } while(0)
586 
587 #else /* USE_COMBINATION_EXPLOSION_CHECK */
588 
589 #define ELSE_IF_STATE_CHECK_MARK(stk)
590 
591 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
592   STACK_ENSURE(1);\
593   stk->type = (stack_type);\
594   stk->u.state.pcode     = (pat);\
595   stk->u.state.pstr      = (s);\
596   stk->u.state.pstr_prev = (sprev);\
597   STACK_INC;\
598 } while(0)
599 
600 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
601   stk->type = (stack_type);\
602   stk->u.state.pcode = (pat);\
603   STACK_INC;\
604 } while(0)
605 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
606 
607 #define STACK_PUSH_ALT(pat,s,sprev)     STACK_PUSH(STK_ALT,pat,s,sprev)
608 #define STACK_PUSH_POS(s,sprev)         STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
609 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
610 #define STACK_PUSH_STOP_BT              STACK_PUSH_TYPE(STK_STOP_BT)
611 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
612         STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
613 
614 #define STACK_PUSH_REPEAT(id, pat) do {\
615   STACK_ENSURE(1);\
616   stk->type = STK_REPEAT;\
617   stk->u.repeat.num    = (id);\
618   stk->u.repeat.pcode  = (pat);\
619   stk->u.repeat.count  = 0;\
620   STACK_INC;\
621 } while(0)
622 
623 #define STACK_PUSH_REPEAT_INC(sindex) do {\
624   STACK_ENSURE(1);\
625   stk->type = STK_REPEAT_INC;\
626   stk->u.repeat_inc.si  = (sindex);\
627   STACK_INC;\
628 } while(0)
629 
630 #define STACK_PUSH_MEM_START(mnum, s) do {\
631   STACK_ENSURE(1);\
632   stk->type = STK_MEM_START;\
633   stk->u.mem.num      = (mnum);\
634   stk->u.mem.pstr     = (s);\
635   stk->u.mem.start    = mem_start_stk[mnum];\
636   stk->u.mem.end      = mem_end_stk[mnum];\
637   mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
638   mem_end_stk[mnum]   = INVALID_STACK_INDEX;\
639   STACK_INC;\
640 } while(0)
641 
642 #define STACK_PUSH_MEM_END(mnum, s) do {\
643   STACK_ENSURE(1);\
644   stk->type = STK_MEM_END;\
645   stk->u.mem.num    = (mnum);\
646   stk->u.mem.pstr   = (s);\
647   stk->u.mem.start  = mem_start_stk[mnum];\
648   stk->u.mem.end    = mem_end_stk[mnum];\
649   mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
650   STACK_INC;\
651 } while(0)
652 
653 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
654   STACK_ENSURE(1);\
655   stk->type = STK_MEM_END_MARK;\
656   stk->u.mem.num = (mnum);\
657   STACK_INC;\
658 } while(0)
659 
660 #define STACK_GET_MEM_START(mnum, k) do {\
661   int level = 0;\
662   k = stk;\
663   while (k > stk_base) {\
664     k--;\
665     if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
666       && k->u.mem.num == (mnum)) {\
667       level++;\
668     }\
669     else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
670       if (level == 0) break;\
671       level--;\
672     }\
673   }\
674 } while(0)
675 
676 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
677   int level = 0;\
678   while (k < stk) {\
679     if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
680       if (level == 0) (start) = k->u.mem.pstr;\
681       level++;\
682     }\
683     else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
684       level--;\
685       if (level == 0) {\
686         (end) = k->u.mem.pstr;\
687         break;\
688       }\
689     }\
690     k++;\
691   }\
692 } while(0)
693 
694 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
695   STACK_ENSURE(1);\
696   stk->type = STK_NULL_CHECK_START;\
697   stk->u.null_check.num  = (cnum);\
698   stk->u.null_check.pstr = (s);\
699   STACK_INC;\
700 } while(0)
701 
702 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
703   STACK_ENSURE(1);\
704   stk->type = STK_NULL_CHECK_END;\
705   stk->u.null_check.num  = (cnum);\
706   STACK_INC;\
707 } while(0)
708 
709 #define STACK_PUSH_CALL_FRAME(pat) do {\
710   STACK_ENSURE(1);\
711   stk->type = STK_CALL_FRAME;\
712   stk->u.call_frame.ret_addr = (pat);\
713   STACK_INC;\
714 } while(0)
715 
716 #define STACK_PUSH_RETURN do {\
717   STACK_ENSURE(1);\
718   stk->type = STK_RETURN;\
719   STACK_INC;\
720 } while(0)
721 
722 
723 #ifdef ONIG_DEBUG
724 #define STACK_BASE_CHECK(p, at) \
725   if ((p) < stk_base) {\
726     fprintf(stderr, "at %s\n", at);\
727     goto stack_error;\
728   }
729 #else
730 #define STACK_BASE_CHECK(p, at)
731 #endif
732 
733 #define STACK_POP_ONE do {\
734   stk--;\
735   STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
736 } while(0)
737 
738 #define STACK_POP  do {\
739   switch (pop_level) {\
740   case STACK_POP_LEVEL_FREE:\
741     while (1) {\
742       stk--;\
743       STACK_BASE_CHECK(stk, "STACK_POP"); \
744       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
745       ELSE_IF_STATE_CHECK_MARK(stk);\
746     }\
747     break;\
748   case STACK_POP_LEVEL_MEM_START:\
749     while (1) {\
750       stk--;\
751       STACK_BASE_CHECK(stk, "STACK_POP 2"); \
752       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
753       else if (stk->type == STK_MEM_START) {\
754         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
755         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
756       }\
757       ELSE_IF_STATE_CHECK_MARK(stk);\
758     }\
759     break;\
760   default:\
761     while (1) {\
762       stk--;\
763       STACK_BASE_CHECK(stk, "STACK_POP 3"); \
764       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
765       else if (stk->type == STK_MEM_START) {\
766         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
767         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
768       }\
769       else if (stk->type == STK_REPEAT_INC) {\
770         STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
771       }\
772       else if (stk->type == STK_MEM_END) {\
773         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
774         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
775       }\
776       ELSE_IF_STATE_CHECK_MARK(stk);\
777     }\
778     break;\
779   }\
780 } while(0)
781 
782 #define STACK_POP_TIL_POS_NOT  do {\
783   while (1) {\
784     stk--;\
785     STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
786     if (stk->type == STK_POS_NOT) break;\
787     else if (stk->type == STK_MEM_START) {\
788       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
789       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
790     }\
791     else if (stk->type == STK_REPEAT_INC) {\
792       STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
793     }\
794     else if (stk->type == STK_MEM_END) {\
795       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
796       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
797     }\
798     ELSE_IF_STATE_CHECK_MARK(stk);\
799   }\
800 } while(0)
801 
802 #define STACK_POP_TIL_LOOK_BEHIND_NOT  do {\
803   while (1) {\
804     stk--;\
805     STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
806     if (stk->type == STK_LOOK_BEHIND_NOT) break;\
807     else if (stk->type == STK_MEM_START) {\
808       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
809       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
810     }\
811     else if (stk->type == STK_REPEAT_INC) {\
812       STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
813     }\
814     else if (stk->type == STK_MEM_END) {\
815       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
816       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
817     }\
818     ELSE_IF_STATE_CHECK_MARK(stk);\
819   }\
820 } while(0)
821 
822 #define STACK_POS_END(k) do {\
823   k = stk;\
824   while (1) {\
825     k--;\
826     STACK_BASE_CHECK(k, "STACK_POS_END"); \
827     if (IS_TO_VOID_TARGET(k)) {\
828       k->type = STK_VOID;\
829     }\
830     else if (k->type == STK_POS) {\
831       k->type = STK_VOID;\
832       break;\
833     }\
834   }\
835 } while(0)
836 
837 #define STACK_STOP_BT_END do {\
838   OnigStackType *k = stk;\
839   while (1) {\
840     k--;\
841     STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
842     if (IS_TO_VOID_TARGET(k)) {\
843       k->type = STK_VOID;\
844     }\
845     else if (k->type == STK_STOP_BT) {\
846       k->type = STK_VOID;\
847       break;\
848     }\
849   }\
850 } while(0)
851 
852 #define STACK_NULL_CHECK(isnull,id,s) do {\
853   OnigStackType* k = stk;\
854   while (1) {\
855     k--;\
856     STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
857     if (k->type == STK_NULL_CHECK_START) {\
858       if (k->u.null_check.num == (id)) {\
859         (isnull) = (k->u.null_check.pstr == (s));\
860         break;\
861       }\
862     }\
863   }\
864 } while(0)
865 
866 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
867   int level = 0;\
868   OnigStackType* k = stk;\
869   while (1) {\
870     k--;\
871     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
872     if (k->type == STK_NULL_CHECK_START) {\
873       if (k->u.null_check.num == (id)) {\
874         if (level == 0) {\
875           (isnull) = (k->u.null_check.pstr == (s));\
876           break;\
877         }\
878         else level--;\
879       }\
880     }\
881     else if (k->type == STK_NULL_CHECK_END) {\
882       level++;\
883     }\
884   }\
885 } while(0)
886 
887 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
888   OnigStackType* k = stk;\
889   while (1) {\
890     k--;\
891     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
892     if (k->type == STK_NULL_CHECK_START) {\
893       if (k->u.null_check.num == (id)) {\
894         if (k->u.null_check.pstr != (s)) {\
895           (isnull) = 0;\
896           break;\
897         }\
898         else {\
899           UChar* endp;\
900           (isnull) = 1;\
901           while (k < stk) {\
902             if (k->type == STK_MEM_START) {\
903               if (k->u.mem.end == INVALID_STACK_INDEX) {\
904                 (isnull) = 0; break;\
905               }\
906               if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
907                 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
908               else\
909                 endp = (UChar* )k->u.mem.end;\
910               if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
911                 (isnull) = 0; break;\
912               }\
913               else if (endp != s) {\
914                 (isnull) = -1; /* empty, but position changed */ \
915               }\
916             }\
917             k++;\
918           }\
919   	  break;\
920         }\
921       }\
922     }\
923   }\
924 } while(0)
925 
926 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
927   int level = 0;\
928   OnigStackType* k = stk;\
929   while (1) {\
930     k--;\
931     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
932     if (k->type == STK_NULL_CHECK_START) {\
933       if (k->u.null_check.num == (id)) {\
934         if (level == 0) {\
935           if (k->u.null_check.pstr != (s)) {\
936             (isnull) = 0;\
937             break;\
938           }\
939           else {\
940             UChar* endp;\
941             (isnull) = 1;\
942             while (k < stk) {\
943               if (k->type == STK_MEM_START) {\
944                 if (k->u.mem.end == INVALID_STACK_INDEX) {\
945                   (isnull) = 0; break;\
946                 }\
947                 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
948                   endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
949                 else\
950                   endp = (UChar* )k->u.mem.end;\
951                 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
952                   (isnull) = 0; break;\
953                 }\
954                 else if (endp != s) {\
955                   (isnull) = -1; /* empty, but position changed */ \
956                 }\
957               }\
958               k++;\
959             }\
960   	    break;\
961           }\
962         }\
963         else {\
964           level--;\
965         }\
966       }\
967     }\
968     else if (k->type == STK_NULL_CHECK_END) {\
969       if (k->u.null_check.num == (id)) level++;\
970     }\
971   }\
972 } while(0)
973 
974 #define STACK_GET_REPEAT(id, k) do {\
975   int level = 0;\
976   k = stk;\
977   while (1) {\
978     k--;\
979     STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
980     if (k->type == STK_REPEAT) {\
981       if (level == 0) {\
982         if (k->u.repeat.num == (id)) {\
983           break;\
984         }\
985       }\
986     }\
987     else if (k->type == STK_CALL_FRAME) level--;\
988     else if (k->type == STK_RETURN)     level++;\
989   }\
990 } while(0)
991 
992 #define STACK_RETURN(addr)  do {\
993   int level = 0;\
994   OnigStackType* k = stk;\
995   while (1) {\
996     k--;\
997     STACK_BASE_CHECK(k, "STACK_RETURN"); \
998     if (k->type == STK_CALL_FRAME) {\
999       if (level == 0) {\
1000         (addr) = k->u.call_frame.ret_addr;\
1001         break;\
1002       }\
1003       else level--;\
1004     }\
1005     else if (k->type == STK_RETURN)\
1006       level++;\
1007   }\
1008 } while(0)
1009 
1010 
1011 #define STRING_CMP(s1,s2,len) do {\
1012   while (len-- > 0) {\
1013     if (*s1++ != *s2++) goto fail;\
1014   }\
1015 } while(0)
1016 
1017 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
1018   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1019     goto fail; \
1020 } while(0)
1021 
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)1022 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1023 			 UChar* s1, UChar** ps2, int mblen)
1024 {
1025   UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1026   UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1027   UChar *p1, *p2, *end1, *s2, *end2;
1028   int len1, len2;
1029 
1030   s2   = *ps2;
1031   end1 = s1 + mblen;
1032   end2 = s2 + mblen;
1033   while (s1 < end1) {
1034     len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
1035     len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
1036     if (len1 != len2) return 0;
1037     p1 = buf1;
1038     p2 = buf2;
1039     while (len1-- > 0) {
1040       if (*p1 != *p2) return 0;
1041       p1++;
1042       p2++;
1043     }
1044   }
1045 
1046   *ps2 = s2;
1047   return 1;
1048 }
1049 
1050 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1051   is_fail = 0;\
1052   while (len-- > 0) {\
1053     if (*s1++ != *s2++) {\
1054       is_fail = 1; break;\
1055     }\
1056   }\
1057 } while(0)
1058 
1059 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1060   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1061     is_fail = 1; \
1062   else \
1063     is_fail = 0; \
1064 } while(0)
1065 
1066 
1067 #define IS_EMPTY_STR           (str == end)
1068 #define ON_STR_BEGIN(s)       ((s) == str)
1069 #define ON_STR_END(s)         ((s) == end)
1070 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1071 #define DATA_ENSURE_CHECK1     (s < right_range)
1072 #define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range)
1073 #define DATA_ENSURE(n)         if (s + (n) > right_range) goto fail
1074 #else
1075 #define DATA_ENSURE_CHECK1     (s < end)
1076 #define DATA_ENSURE_CHECK(n)   (s + (n) <= end)
1077 #define DATA_ENSURE(n)         if (s + (n) > end) goto fail
1078 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1079 
1080 
1081 #ifdef USE_CAPTURE_HISTORY
1082 static int
make_capture_history_tree(OnigCaptureTreeNode * node,OnigStackType ** kp,OnigStackType * stk_top,UChar * str,regex_t * reg)1083 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1084                           OnigStackType* stk_top, UChar* str, regex_t* reg)
1085 {
1086   int n, r;
1087   OnigCaptureTreeNode* child;
1088   OnigStackType* k = *kp;
1089 
1090   while (k < stk_top) {
1091     if (k->type == STK_MEM_START) {
1092       n = k->u.mem.num;
1093       if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1094           BIT_STATUS_AT(reg->capture_history, n) != 0) {
1095         child = history_node_new();
1096         CHECK_NULL_RETURN_MEMERR(child);
1097         child->group = n;
1098         child->beg = (int )(k->u.mem.pstr - str);
1099         r = history_tree_add_child(node, child);
1100         if (r != 0) return r;
1101         *kp = (k + 1);
1102         r = make_capture_history_tree(child, kp, stk_top, str, reg);
1103         if (r != 0) return r;
1104 
1105         k = *kp;
1106         child->end = (int )(k->u.mem.pstr - str);
1107       }
1108     }
1109     else if (k->type == STK_MEM_END) {
1110       if (k->u.mem.num == node->group) {
1111         node->end = (int )(k->u.mem.pstr - str);
1112         *kp = k;
1113         return 0;
1114       }
1115     }
1116     k++;
1117   }
1118 
1119   return 1; /* 1: root node ending. */
1120 }
1121 #endif
1122 
1123 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,UChar * memp)1124 static int mem_is_in_memp(int mem, int num, UChar* memp)
1125 {
1126   int i;
1127   MemNumType m;
1128 
1129   for (i = 0; i < num; i++) {
1130     GET_MEMNUM_INC(m, memp);
1131     if (mem == (int )m) return 1;
1132   }
1133   return 0;
1134 }
1135 
backref_match_at_nested_level(regex_t * reg,OnigStackType * top,OnigStackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,UChar * memp,UChar ** s,const UChar * send)1136 static int backref_match_at_nested_level(regex_t* reg
1137 	 , OnigStackType* top, OnigStackType* stk_base
1138 	 , int ignore_case, int case_fold_flag
1139 	 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1140 {
1141   UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1142   int level;
1143   OnigStackType* k;
1144 
1145   level = 0;
1146   k = top;
1147   k--;
1148   while (k >= stk_base) {
1149     if (k->type == STK_CALL_FRAME) {
1150       level--;
1151     }
1152     else if (k->type == STK_RETURN) {
1153       level++;
1154     }
1155     else if (level == nest) {
1156       if (k->type == STK_MEM_START) {
1157         if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1158           pstart = k->u.mem.pstr;
1159           if (pend != NULL_UCHARP) {
1160             if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1161             p  = pstart;
1162             ss = *s;
1163 
1164             if (ignore_case != 0) {
1165               if (string_cmp_ic(reg->enc, case_fold_flag,
1166                                 pstart, &ss, (int )(pend - pstart)) == 0)
1167                 return 0; /* or goto next_mem; */
1168             }
1169             else {
1170               while (p < pend) {
1171                 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1172               }
1173             }
1174 
1175             *s = ss;
1176             return 1;
1177           }
1178         }
1179       }
1180       else if (k->type == STK_MEM_END) {
1181         if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1182           pend = k->u.mem.pstr;
1183         }
1184       }
1185     }
1186     k--;
1187   }
1188 
1189   return 0;
1190 }
1191 #endif /* USE_BACKREF_WITH_LEVEL */
1192 
1193 
1194 #ifdef ONIG_DEBUG_STATISTICS
1195 
1196 #define USE_TIMEOFDAY
1197 
1198 #ifdef USE_TIMEOFDAY
1199 #ifdef HAVE_SYS_TIME_H
1200 #include <sys/time.h>
1201 #endif
1202 #ifdef HAVE_UNISTD_H
1203 #include <unistd.h>
1204 #endif
1205 static struct timeval ts, te;
1206 #define GETTIME(t)        gettimeofday(&(t), (struct timezone* )0)
1207 #define TIMEDIFF(te,ts)   (((te).tv_usec - (ts).tv_usec) + \
1208                            (((te).tv_sec - (ts).tv_sec)*1000000))
1209 #else
1210 #ifdef HAVE_SYS_TIMES_H
1211 #include <sys/times.h>
1212 #endif
1213 static struct tms ts, te;
1214 #define GETTIME(t)         times(&(t))
1215 #define TIMEDIFF(te,ts)   ((te).tms_utime - (ts).tms_utime)
1216 #endif
1217 
1218 static int OpCounter[256];
1219 static int OpPrevCounter[256];
1220 static unsigned long OpTime[256];
1221 static int OpCurr = OP_FINISH;
1222 static int OpPrevTarget = OP_FAIL;
1223 static int MaxStackDepth = 0;
1224 
1225 #define MOP_IN(opcode) do {\
1226   if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1227   OpCurr = opcode;\
1228   OpCounter[opcode]++;\
1229   GETTIME(ts);\
1230 } while(0)
1231 
1232 #define MOP_OUT do {\
1233   GETTIME(te);\
1234   OpTime[OpCurr] += TIMEDIFF(te, ts);\
1235 } while(0)
1236 
1237 extern void
onig_statistics_init(void)1238 onig_statistics_init(void)
1239 {
1240   int i;
1241   for (i = 0; i < 256; i++) {
1242     OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1243   }
1244   MaxStackDepth = 0;
1245 }
1246 
1247 extern int
onig_print_statistics(FILE * f)1248 onig_print_statistics(FILE* f)
1249 {
1250   int r;
1251   int i;
1252 
1253   r = fprintf(f, "   count      prev        time\n");
1254   if (r < 0) return -1;
1255 
1256   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1257     r = fprintf(f, "%8d: %8d: %10ld: %s\n",
1258                 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1259     if (r < 0) return -1;
1260   }
1261   r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1262   if (r < 0) return -1;
1263 
1264   return 0;
1265 }
1266 
1267 #define STACK_INC do {\
1268   stk++;\
1269   if (stk - stk_base > MaxStackDepth) \
1270     MaxStackDepth = stk - stk_base;\
1271 } while(0)
1272 
1273 #else
1274 #define STACK_INC     stk++
1275 
1276 #define MOP_IN(opcode)
1277 #define MOP_OUT
1278 #endif
1279 
1280 
1281 /* matching region of POSIX API */
1282 typedef int regoff_t;
1283 
1284 typedef struct {
1285   regoff_t  rm_so;
1286   regoff_t  rm_eo;
1287 } posix_regmatch_t;
1288 
1289 /* match data(str - end) from position (sstart). */
1290 /* if sstart == str then set sprev to NULL. */
1291 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * right_range,const UChar * sstart,UChar * sprev,OnigMatchArg * msa)1292 match_at(regex_t* reg, const UChar* str, const UChar* end,
1293 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1294 	 const UChar* right_range,
1295 #endif
1296 	 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1297 {
1298   static UChar FinishCode[] = { OP_FINISH };
1299 
1300   int i, n, num_mem, best_len, pop_level;
1301   LengthType tlen, tlen2;
1302   MemNumType mem;
1303   RelAddrType addr;
1304   UChar *s, *q, *sbegin;
1305   int is_alloca;
1306   char *alloc_base;
1307   OnigStackType *stk_base, *stk, *stk_end;
1308   OnigStackType *stkp; /* used as any purpose. */
1309   OnigStackIndex si;
1310   OnigStackIndex *repeat_stk;
1311   OnigStackIndex *mem_start_stk, *mem_end_stk;
1312 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1313   int scv;
1314   unsigned char* state_check_buff = msa->state_check_buff;
1315   int num_comb_exp_check = reg->num_comb_exp_check;
1316 #endif
1317   UChar *p = reg->p;
1318   OnigOptionType option = reg->options;
1319   OnigEncoding encode = reg->enc;
1320   OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1321 
1322   //n = reg->num_repeat + reg->num_mem * 2;
1323   pop_level = reg->stack_pop_level;
1324   num_mem = reg->num_mem;
1325   STACK_INIT(INIT_MATCH_STACK_SIZE);
1326   UPDATE_FOR_STACK_REALLOC;
1327   for (i = 1; i <= num_mem; i++) {
1328     mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1329   }
1330 
1331 #ifdef ONIG_DEBUG_MATCH
1332   fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1333 	  (int )str, (int )end, (int )sstart, (int )sprev);
1334   fprintf(stderr, "size: %d, start offset: %d\n",
1335 	  (int )(end - str), (int )(sstart - str));
1336 #endif
1337 
1338   STACK_PUSH_ENSURED(STK_ALT, FinishCode);  /* bottom stack */
1339   best_len = ONIG_MISMATCH;
1340   s = (UChar* )sstart;
1341   while (1) {
1342 #ifdef ONIG_DEBUG_MATCH
1343     {
1344       UChar *q, *bp, buf[50];
1345       int len;
1346       fprintf(stderr, "%4d> \"", (int )(s - str));
1347       bp = buf;
1348       for (i = 0, q = s; i < 7 && q < end; i++) {
1349         len = enclen(encode, q);
1350         while (len-- > 0) *bp++ = *q++;
1351       }
1352       if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1353       else         { xmemcpy(bp, "\"",    1); bp += 1; }
1354       *bp = 0;
1355       fputs((char* )buf, stderr);
1356       for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1357       onig_print_compiled_byte_code(stderr, p, NULL, encode);
1358       fprintf(stderr, "\n");
1359     }
1360 #endif
1361 
1362     sbegin = s;
1363     switch (*p++) {
1364     case OP_END:  MOP_IN(OP_END);
1365       n = s - sstart;
1366       if (n > best_len) {
1367         OnigRegion* region;
1368 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1369         if (IS_FIND_LONGEST(option)) {
1370           if (n > msa->best_len) {
1371             msa->best_len = n;
1372             msa->best_s   = (UChar* )sstart;
1373           }
1374           else
1375             goto end_best_len;
1376         }
1377 #endif
1378         best_len = n;
1379         region = msa->region;
1380         if (region) {
1381 #ifdef USE_POSIX_API_REGION_OPTION
1382           if (IS_POSIX_REGION(msa->options)) {
1383             posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1384 
1385             rmt[0].rm_so = sstart - str;
1386             rmt[0].rm_eo = s      - str;
1387             for (i = 1; i <= num_mem; i++) {
1388               if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1389                 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1390                   rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1391                 else
1392                   rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1393 
1394                 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1395                                 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1396                                 : (UChar* )((void* )mem_end_stk[i])) - str;
1397               }
1398               else {
1399                 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1400               }
1401             }
1402           }
1403           else {
1404 #endif /* USE_POSIX_API_REGION_OPTION */
1405             region->beg[0] = sstart - str;
1406             region->end[0] = s      - str;
1407             for (i = 1; i <= num_mem; i++) {
1408               if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1409                 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1410                   region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1411                 else
1412                   region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1413 
1414                 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1415                                   ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1416                                   : (UChar* )((void* )mem_end_stk[i])) - str;
1417               }
1418               else {
1419                 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1420               }
1421             }
1422 
1423 #ifdef USE_CAPTURE_HISTORY
1424             if (reg->capture_history != 0) {
1425               int r;
1426               OnigCaptureTreeNode* node;
1427 
1428               if (IS_NULL(region->history_root)) {
1429                 region->history_root = node = history_node_new();
1430                 CHECK_NULL_RETURN_MEMERR(node);
1431               }
1432               else {
1433                 node = region->history_root;
1434                 history_tree_clear(node);
1435               }
1436 
1437               node->group = 0;
1438               node->beg   = sstart - str;
1439               node->end   = s      - str;
1440 
1441               stkp = stk_base;
1442               r = make_capture_history_tree(region->history_root, &stkp,
1443                                             stk, (UChar* )str, reg);
1444               if (r < 0) {
1445                 best_len = r; /* error code */
1446                 goto finish;
1447               }
1448             }
1449 #endif /* USE_CAPTURE_HISTORY */
1450 #ifdef USE_POSIX_API_REGION_OPTION
1451           } /* else IS_POSIX_REGION() */
1452 #endif
1453         } /* if (region) */
1454       } /* n > best_len */
1455 
1456 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1457     end_best_len:
1458 #endif
1459       MOP_OUT;
1460 
1461       if (IS_FIND_CONDITION(option)) {
1462         if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1463           best_len = ONIG_MISMATCH;
1464           goto fail; /* for retry */
1465         }
1466         if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1467           goto fail; /* for retry */
1468         }
1469       }
1470 
1471       /* default behavior: return first-matching result. */
1472       goto finish;
1473       break;
1474 
1475     case OP_EXACT1:  MOP_IN(OP_EXACT1);
1476       DATA_ENSURE(1);
1477       if (*p != *s) goto fail;
1478       p++; s++;
1479       MOP_OUT;
1480       break;
1481 
1482     case OP_EXACT1_IC:  MOP_IN(OP_EXACT1_IC);
1483       {
1484         int len;
1485         UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1486 
1487         DATA_ENSURE(1);
1488         len = ONIGENC_MBC_CASE_FOLD(encode,
1489                  /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1490                                     case_fold_flag,
1491                                     &s, end, lowbuf);
1492         DATA_ENSURE(0);
1493         q = lowbuf;
1494         while (len-- > 0) {
1495           if (*p != *q) {
1496             goto fail;
1497           }
1498           p++; q++;
1499         }
1500       }
1501       MOP_OUT;
1502       break;
1503 
1504     case OP_EXACT2:  MOP_IN(OP_EXACT2);
1505       DATA_ENSURE(2);
1506       if (*p != *s) goto fail;
1507       p++; s++;
1508       if (*p != *s) goto fail;
1509       sprev = s;
1510       p++; s++;
1511       MOP_OUT;
1512       continue;
1513       break;
1514 
1515     case OP_EXACT3:  MOP_IN(OP_EXACT3);
1516       DATA_ENSURE(3);
1517       if (*p != *s) goto fail;
1518       p++; s++;
1519       if (*p != *s) goto fail;
1520       p++; s++;
1521       if (*p != *s) goto fail;
1522       sprev = s;
1523       p++; s++;
1524       MOP_OUT;
1525       continue;
1526       break;
1527 
1528     case OP_EXACT4:  MOP_IN(OP_EXACT4);
1529       DATA_ENSURE(4);
1530       if (*p != *s) goto fail;
1531       p++; s++;
1532       if (*p != *s) goto fail;
1533       p++; s++;
1534       if (*p != *s) goto fail;
1535       p++; s++;
1536       if (*p != *s) goto fail;
1537       sprev = s;
1538       p++; s++;
1539       MOP_OUT;
1540       continue;
1541       break;
1542 
1543     case OP_EXACT5:  MOP_IN(OP_EXACT5);
1544       DATA_ENSURE(5);
1545       if (*p != *s) goto fail;
1546       p++; s++;
1547       if (*p != *s) goto fail;
1548       p++; s++;
1549       if (*p != *s) goto fail;
1550       p++; s++;
1551       if (*p != *s) goto fail;
1552       p++; s++;
1553       if (*p != *s) goto fail;
1554       sprev = s;
1555       p++; s++;
1556       MOP_OUT;
1557       continue;
1558       break;
1559 
1560     case OP_EXACTN:  MOP_IN(OP_EXACTN);
1561       GET_LENGTH_INC(tlen, p);
1562       DATA_ENSURE(tlen);
1563       while (tlen-- > 0) {
1564         if (*p++ != *s++) goto fail;
1565       }
1566       sprev = s - 1;
1567       MOP_OUT;
1568       continue;
1569       break;
1570 
1571     case OP_EXACTN_IC:  MOP_IN(OP_EXACTN_IC);
1572       {
1573         int len;
1574         UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1575 
1576         GET_LENGTH_INC(tlen, p);
1577         endp = p + tlen;
1578 
1579         while (p < endp) {
1580           sprev = s;
1581           DATA_ENSURE(1);
1582           len = ONIGENC_MBC_CASE_FOLD(encode,
1583                         /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1584                                       case_fold_flag,
1585                                       &s, end, lowbuf);
1586           DATA_ENSURE(0);
1587           q = lowbuf;
1588           while (len-- > 0) {
1589             if (*p != *q) goto fail;
1590             p++; q++;
1591           }
1592         }
1593       }
1594 
1595       MOP_OUT;
1596       continue;
1597       break;
1598 
1599     case OP_EXACTMB2N1:  MOP_IN(OP_EXACTMB2N1);
1600       DATA_ENSURE(2);
1601       if (*p != *s) goto fail;
1602       p++; s++;
1603       if (*p != *s) goto fail;
1604       p++; s++;
1605       MOP_OUT;
1606       break;
1607 
1608     case OP_EXACTMB2N2:  MOP_IN(OP_EXACTMB2N2);
1609       DATA_ENSURE(4);
1610       if (*p != *s) goto fail;
1611       p++; s++;
1612       if (*p != *s) goto fail;
1613       p++; s++;
1614       sprev = s;
1615       if (*p != *s) goto fail;
1616       p++; s++;
1617       if (*p != *s) goto fail;
1618       p++; s++;
1619       MOP_OUT;
1620       continue;
1621       break;
1622 
1623     case OP_EXACTMB2N3:  MOP_IN(OP_EXACTMB2N3);
1624       DATA_ENSURE(6);
1625       if (*p != *s) goto fail;
1626       p++; s++;
1627       if (*p != *s) goto fail;
1628       p++; s++;
1629       if (*p != *s) goto fail;
1630       p++; s++;
1631       if (*p != *s) goto fail;
1632       p++; s++;
1633       sprev = s;
1634       if (*p != *s) goto fail;
1635       p++; s++;
1636       if (*p != *s) goto fail;
1637       p++; s++;
1638       MOP_OUT;
1639       continue;
1640       break;
1641 
1642     case OP_EXACTMB2N:  MOP_IN(OP_EXACTMB2N);
1643       GET_LENGTH_INC(tlen, p);
1644       DATA_ENSURE(tlen * 2);
1645       while (tlen-- > 0) {
1646         if (*p != *s) goto fail;
1647         p++; s++;
1648         if (*p != *s) goto fail;
1649         p++; s++;
1650       }
1651       sprev = s - 2;
1652       MOP_OUT;
1653       continue;
1654       break;
1655 
1656     case OP_EXACTMB3N:  MOP_IN(OP_EXACTMB3N);
1657       GET_LENGTH_INC(tlen, p);
1658       DATA_ENSURE(tlen * 3);
1659       while (tlen-- > 0) {
1660         if (*p != *s) goto fail;
1661         p++; s++;
1662         if (*p != *s) goto fail;
1663         p++; s++;
1664         if (*p != *s) goto fail;
1665         p++; s++;
1666       }
1667       sprev = s - 3;
1668       MOP_OUT;
1669       continue;
1670       break;
1671 
1672     case OP_EXACTMBN:  MOP_IN(OP_EXACTMBN);
1673       GET_LENGTH_INC(tlen,  p);  /* mb-len */
1674       GET_LENGTH_INC(tlen2, p);  /* string len */
1675       tlen2 *= tlen;
1676       DATA_ENSURE(tlen2);
1677       while (tlen2-- > 0) {
1678         if (*p != *s) goto fail;
1679         p++; s++;
1680       }
1681       sprev = s - tlen;
1682       MOP_OUT;
1683       continue;
1684       break;
1685 
1686     case OP_CCLASS:  MOP_IN(OP_CCLASS);
1687       DATA_ENSURE(1);
1688       if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1689       p += SIZE_BITSET;
1690       s += enclen(encode, s);   /* OP_CCLASS can match mb-code. \D, \S */
1691       MOP_OUT;
1692       break;
1693 
1694     case OP_CCLASS_MB:  MOP_IN(OP_CCLASS_MB);
1695       if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1696 
1697     cclass_mb:
1698       GET_LENGTH_INC(tlen, p);
1699       {
1700         OnigCodePoint code;
1701         UChar *ss;
1702         int mb_len;
1703 
1704         DATA_ENSURE(1);
1705         mb_len = enclen(encode, s);
1706         DATA_ENSURE(mb_len);
1707         ss = s;
1708         s += mb_len;
1709         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1710 
1711 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1712         if (! onig_is_in_code_range(p, code)) goto fail;
1713 #else
1714         q = p;
1715         ALIGNMENT_RIGHT(q);
1716         if (! onig_is_in_code_range(q, code)) goto fail;
1717 #endif
1718       }
1719       p += tlen;
1720       MOP_OUT;
1721       break;
1722 
1723     case OP_CCLASS_MIX:  MOP_IN(OP_CCLASS_MIX);
1724       DATA_ENSURE(1);
1725       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1726         p += SIZE_BITSET;
1727         goto cclass_mb;
1728       }
1729       else {
1730         if (BITSET_AT(((BitSetRef )p), *s) == 0)
1731           goto fail;
1732 
1733         p += SIZE_BITSET;
1734         GET_LENGTH_INC(tlen, p);
1735         p += tlen;
1736         s++;
1737       }
1738       MOP_OUT;
1739       break;
1740 
1741     case OP_CCLASS_NOT:  MOP_IN(OP_CCLASS_NOT);
1742       DATA_ENSURE(1);
1743       if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1744       p += SIZE_BITSET;
1745       s += enclen(encode, s);
1746       MOP_OUT;
1747       break;
1748 
1749     case OP_CCLASS_MB_NOT:  MOP_IN(OP_CCLASS_MB_NOT);
1750       DATA_ENSURE(1);
1751       if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1752         s++;
1753         GET_LENGTH_INC(tlen, p);
1754         p += tlen;
1755         goto cc_mb_not_success;
1756       }
1757 
1758     cclass_mb_not:
1759       GET_LENGTH_INC(tlen, p);
1760       {
1761         OnigCodePoint code;
1762         UChar *ss;
1763         int mb_len = enclen(encode, s);
1764 
1765         if (! DATA_ENSURE_CHECK(mb_len)) {
1766           DATA_ENSURE(1);
1767           s = (UChar* )end;
1768           p += tlen;
1769           goto cc_mb_not_success;
1770         }
1771 
1772         ss = s;
1773         s += mb_len;
1774         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1775 
1776 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1777         if (onig_is_in_code_range(p, code)) goto fail;
1778 #else
1779         q = p;
1780         ALIGNMENT_RIGHT(q);
1781         if (onig_is_in_code_range(q, code)) goto fail;
1782 #endif
1783       }
1784       p += tlen;
1785 
1786     cc_mb_not_success:
1787       MOP_OUT;
1788       break;
1789 
1790     case OP_CCLASS_MIX_NOT:  MOP_IN(OP_CCLASS_MIX_NOT);
1791       DATA_ENSURE(1);
1792       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1793         p += SIZE_BITSET;
1794         goto cclass_mb_not;
1795       }
1796       else {
1797         if (BITSET_AT(((BitSetRef )p), *s) != 0)
1798           goto fail;
1799 
1800         p += SIZE_BITSET;
1801         GET_LENGTH_INC(tlen, p);
1802         p += tlen;
1803         s++;
1804       }
1805       MOP_OUT;
1806       break;
1807 
1808     case OP_CCLASS_NODE:  MOP_IN(OP_CCLASS_NODE);
1809       {
1810         OnigCodePoint code;
1811         void *node;
1812         int mb_len;
1813         UChar *ss;
1814 
1815         DATA_ENSURE(1);
1816         GET_POINTER_INC(node, p);
1817         mb_len = enclen(encode, s);
1818         ss = s;
1819         s += mb_len;
1820         DATA_ENSURE(0);
1821         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1822         if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1823       }
1824       MOP_OUT;
1825       break;
1826 
1827     case OP_ANYCHAR:  MOP_IN(OP_ANYCHAR);
1828       DATA_ENSURE(1);
1829       n = enclen(encode, s);
1830       DATA_ENSURE(n);
1831       if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1832       s += n;
1833       MOP_OUT;
1834       break;
1835 
1836     case OP_ANYCHAR_ML:  MOP_IN(OP_ANYCHAR_ML);
1837       DATA_ENSURE(1);
1838       n = enclen(encode, s);
1839       DATA_ENSURE(n);
1840       s += n;
1841       MOP_OUT;
1842       break;
1843 
1844     case OP_ANYCHAR_STAR:  MOP_IN(OP_ANYCHAR_STAR);
1845       while (DATA_ENSURE_CHECK1) {
1846         STACK_PUSH_ALT(p, s, sprev);
1847         n = enclen(encode, s);
1848         DATA_ENSURE(n);
1849         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
1850         sprev = s;
1851         s += n;
1852       }
1853       MOP_OUT;
1854       break;
1855 
1856     case OP_ANYCHAR_ML_STAR:  MOP_IN(OP_ANYCHAR_ML_STAR);
1857       while (DATA_ENSURE_CHECK1) {
1858         STACK_PUSH_ALT(p, s, sprev);
1859         n = enclen(encode, s);
1860         if (n > 1) {
1861           DATA_ENSURE(n);
1862           sprev = s;
1863           s += n;
1864         }
1865         else {
1866           sprev = s;
1867           s++;
1868         }
1869       }
1870       MOP_OUT;
1871       break;
1872 
1873     case OP_ANYCHAR_STAR_PEEK_NEXT:  MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1874       while (DATA_ENSURE_CHECK1) {
1875         if (*p == *s) {
1876           STACK_PUSH_ALT(p + 1, s, sprev);
1877         }
1878         n = enclen(encode, s);
1879         DATA_ENSURE(n);
1880         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
1881         sprev = s;
1882         s += n;
1883       }
1884       p++;
1885       MOP_OUT;
1886       break;
1887 
1888     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1889       while (DATA_ENSURE_CHECK1) {
1890         if (*p == *s) {
1891           STACK_PUSH_ALT(p + 1, s, sprev);
1892         }
1893         n = enclen(encode, s);
1894         if (n > 1) {
1895           DATA_ENSURE(n);
1896           sprev = s;
1897           s += n;
1898         }
1899         else {
1900           sprev = s;
1901           s++;
1902         }
1903       }
1904       p++;
1905       MOP_OUT;
1906       break;
1907 
1908 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1909     case OP_STATE_CHECK_ANYCHAR_STAR:  MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1910       GET_STATE_CHECK_NUM_INC(mem, p);
1911       while (DATA_ENSURE_CHECK1) {
1912         STATE_CHECK_VAL(scv, mem);
1913         if (scv) goto fail;
1914 
1915         STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1916         n = enclen(encode, s);
1917         DATA_ENSURE(n);
1918         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
1919         sprev = s;
1920         s += n;
1921       }
1922       MOP_OUT;
1923       break;
1924 
1925     case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1926       MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1927 
1928       GET_STATE_CHECK_NUM_INC(mem, p);
1929       while (DATA_ENSURE_CHECK1) {
1930         STATE_CHECK_VAL(scv, mem);
1931         if (scv) goto fail;
1932 
1933         STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1934         n = enclen(encode, s);
1935         if (n > 1) {
1936           DATA_ENSURE(n);
1937           sprev = s;
1938           s += n;
1939         }
1940         else {
1941           sprev = s;
1942           s++;
1943         }
1944       }
1945       MOP_OUT;
1946       break;
1947 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1948 
1949     case OP_WORD:  MOP_IN(OP_WORD);
1950       DATA_ENSURE(1);
1951       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1952         goto fail;
1953 
1954       s += enclen(encode, s);
1955       MOP_OUT;
1956       break;
1957 
1958     case OP_NOT_WORD:  MOP_IN(OP_NOT_WORD);
1959       DATA_ENSURE(1);
1960       if (ONIGENC_IS_MBC_WORD(encode, s, end))
1961         goto fail;
1962 
1963       s += enclen(encode, s);
1964       MOP_OUT;
1965       break;
1966 
1967     case OP_WORD_BOUND:  MOP_IN(OP_WORD_BOUND);
1968       if (ON_STR_BEGIN(s)) {
1969         DATA_ENSURE(1);
1970         if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1971           goto fail;
1972       }
1973       else if (ON_STR_END(s)) {
1974         if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1975           goto fail;
1976       }
1977       else {
1978         if (ONIGENC_IS_MBC_WORD(encode, s, end)
1979             == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1980           goto fail;
1981       }
1982       MOP_OUT;
1983       continue;
1984       break;
1985 
1986     case OP_NOT_WORD_BOUND:  MOP_IN(OP_NOT_WORD_BOUND);
1987       if (ON_STR_BEGIN(s)) {
1988         if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1989           goto fail;
1990       }
1991       else if (ON_STR_END(s)) {
1992         if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1993           goto fail;
1994       }
1995       else {
1996         if (ONIGENC_IS_MBC_WORD(encode, s, end)
1997             != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1998           goto fail;
1999       }
2000       MOP_OUT;
2001       continue;
2002       break;
2003 
2004 #ifdef USE_WORD_BEGIN_END
2005     case OP_WORD_BEGIN:  MOP_IN(OP_WORD_BEGIN);
2006       if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2007         if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2008           MOP_OUT;
2009           continue;
2010         }
2011       }
2012       goto fail;
2013       break;
2014 
2015     case OP_WORD_END:  MOP_IN(OP_WORD_END);
2016       if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2017         if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2018           MOP_OUT;
2019           continue;
2020         }
2021       }
2022       goto fail;
2023       break;
2024 #endif
2025 
2026     case OP_BEGIN_BUF:  MOP_IN(OP_BEGIN_BUF);
2027       if (! ON_STR_BEGIN(s)) goto fail;
2028 
2029       MOP_OUT;
2030       continue;
2031       break;
2032 
2033     case OP_END_BUF:  MOP_IN(OP_END_BUF);
2034       if (! ON_STR_END(s)) goto fail;
2035 
2036       MOP_OUT;
2037       continue;
2038       break;
2039 
2040     case OP_BEGIN_LINE:  MOP_IN(OP_BEGIN_LINE);
2041       if (ON_STR_BEGIN(s)) {
2042         if (IS_NOTBOL(msa->options)) goto fail;
2043         MOP_OUT;
2044         continue;
2045       }
2046       else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2047         MOP_OUT;
2048         continue;
2049       }
2050       goto fail;
2051       break;
2052 
2053     case OP_END_LINE:  MOP_IN(OP_END_LINE);
2054       if (ON_STR_END(s)) {
2055 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2056         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2057 #endif
2058           if (IS_NOTEOL(msa->options)) goto fail;
2059           MOP_OUT;
2060           continue;
2061 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2062         }
2063 #endif
2064       }
2065       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2066         MOP_OUT;
2067         continue;
2068       }
2069 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2070       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2071         MOP_OUT;
2072         continue;
2073       }
2074 #endif
2075       goto fail;
2076       break;
2077 
2078     case OP_SEMI_END_BUF:  MOP_IN(OP_SEMI_END_BUF);
2079       if (ON_STR_END(s)) {
2080 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2081         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2082 #endif
2083           if (IS_NOTEOL(msa->options)) goto fail;
2084           MOP_OUT;
2085           continue;
2086 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2087         }
2088 #endif
2089       }
2090       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2091                ON_STR_END(s + enclen(encode, s))) {
2092         MOP_OUT;
2093         continue;
2094       }
2095 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2096       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2097         UChar* ss = s + enclen(encode, s);
2098         ss += enclen(encode, ss);
2099         if (ON_STR_END(ss)) {
2100           MOP_OUT;
2101           continue;
2102         }
2103       }
2104 #endif
2105       goto fail;
2106       break;
2107 
2108     case OP_BEGIN_POSITION:  MOP_IN(OP_BEGIN_POSITION);
2109       if (s != msa->start)
2110         goto fail;
2111 
2112       MOP_OUT;
2113       continue;
2114       break;
2115 
2116     case OP_MEMORY_START_PUSH:  MOP_IN(OP_MEMORY_START_PUSH);
2117       GET_MEMNUM_INC(mem, p);
2118       STACK_PUSH_MEM_START(mem, s);
2119       MOP_OUT;
2120       continue;
2121       break;
2122 
2123     case OP_MEMORY_START:  MOP_IN(OP_MEMORY_START);
2124       GET_MEMNUM_INC(mem, p);
2125       mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2126       MOP_OUT;
2127       continue;
2128       break;
2129 
2130     case OP_MEMORY_END_PUSH:  MOP_IN(OP_MEMORY_END_PUSH);
2131       GET_MEMNUM_INC(mem, p);
2132       STACK_PUSH_MEM_END(mem, s);
2133       MOP_OUT;
2134       continue;
2135       break;
2136 
2137     case OP_MEMORY_END:  MOP_IN(OP_MEMORY_END);
2138       GET_MEMNUM_INC(mem, p);
2139       mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2140       MOP_OUT;
2141       continue;
2142       break;
2143 
2144 #ifdef USE_SUBEXP_CALL
2145     case OP_MEMORY_END_PUSH_REC:  MOP_IN(OP_MEMORY_END_PUSH_REC);
2146       GET_MEMNUM_INC(mem, p);
2147       STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2148       STACK_PUSH_MEM_END(mem, s);
2149       mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2150       MOP_OUT;
2151       continue;
2152       break;
2153 
2154     case OP_MEMORY_END_REC:  MOP_IN(OP_MEMORY_END_REC);
2155       GET_MEMNUM_INC(mem, p);
2156       mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2157       STACK_GET_MEM_START(mem, stkp);
2158 
2159       if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2160         mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2161       else
2162         mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2163 
2164       STACK_PUSH_MEM_END_MARK(mem);
2165       MOP_OUT;
2166       continue;
2167       break;
2168 #endif
2169 
2170     case OP_BACKREF1:  MOP_IN(OP_BACKREF1);
2171       mem = 1;
2172       goto backref;
2173       break;
2174 
2175     case OP_BACKREF2:  MOP_IN(OP_BACKREF2);
2176       mem = 2;
2177       goto backref;
2178       break;
2179 
2180     case OP_BACKREFN:  MOP_IN(OP_BACKREFN);
2181       GET_MEMNUM_INC(mem, p);
2182     backref:
2183       {
2184         int len;
2185         UChar *pstart, *pend;
2186 
2187         /* if you want to remove following line,
2188            you should check in parse and compile time. */
2189         if (mem > num_mem) goto fail;
2190         if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
2191         if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2192 
2193         if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2194           pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2195         else
2196           pstart = (UChar* )((void* )mem_start_stk[mem]);
2197 
2198         pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2199                 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2200                 : (UChar* )((void* )mem_end_stk[mem]));
2201         n = pend - pstart;
2202         DATA_ENSURE(n);
2203         sprev = s;
2204         STRING_CMP(pstart, s, n);
2205         while (sprev + (len = enclen(encode, sprev)) < s)
2206           sprev += len;
2207 
2208         MOP_OUT;
2209         continue;
2210       }
2211       break;
2212 
2213     case OP_BACKREFN_IC:  MOP_IN(OP_BACKREFN_IC);
2214       GET_MEMNUM_INC(mem, p);
2215       {
2216         int len;
2217         UChar *pstart, *pend;
2218 
2219         /* if you want to remove following line,
2220            you should check in parse and compile time. */
2221         if (mem > num_mem) goto fail;
2222         if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
2223         if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2224 
2225         if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2226           pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2227         else
2228           pstart = (UChar* )((void* )mem_start_stk[mem]);
2229 
2230         pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2231                 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2232                 : (UChar* )((void* )mem_end_stk[mem]));
2233         n = pend - pstart;
2234         DATA_ENSURE(n);
2235         sprev = s;
2236         STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2237         while (sprev + (len = enclen(encode, sprev)) < s)
2238           sprev += len;
2239 
2240         MOP_OUT;
2241         continue;
2242       }
2243       break;
2244 
2245     case OP_BACKREF_MULTI:  MOP_IN(OP_BACKREF_MULTI);
2246       {
2247         int len, is_fail;
2248         UChar *pstart, *pend, *swork;
2249 
2250         GET_LENGTH_INC(tlen, p);
2251         for (i = 0; i < tlen; i++) {
2252           GET_MEMNUM_INC(mem, p);
2253 
2254           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
2255           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2256 
2257           if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2258             pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2259           else
2260             pstart = (UChar* )((void* )mem_start_stk[mem]);
2261 
2262           pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2263                   ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2264                   : (UChar* )((void* )mem_end_stk[mem]));
2265           n = pend - pstart;
2266           DATA_ENSURE(n);
2267           sprev = s;
2268           swork = s;
2269           STRING_CMP_VALUE(pstart, swork, n, is_fail);
2270           if (is_fail) continue;
2271           s = swork;
2272           while (sprev + (len = enclen(encode, sprev)) < s)
2273             sprev += len;
2274 
2275           p += (SIZE_MEMNUM * (tlen - i - 1));
2276           break; /* success */
2277         }
2278         if (i == tlen) goto fail;
2279         MOP_OUT;
2280         continue;
2281       }
2282       break;
2283 
2284     case OP_BACKREF_MULTI_IC:  MOP_IN(OP_BACKREF_MULTI_IC);
2285       {
2286         int len, is_fail;
2287         UChar *pstart, *pend, *swork;
2288 
2289         GET_LENGTH_INC(tlen, p);
2290         for (i = 0; i < tlen; i++) {
2291           GET_MEMNUM_INC(mem, p);
2292 
2293           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
2294           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2295 
2296           if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2297             pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2298           else
2299             pstart = (UChar* )((void* )mem_start_stk[mem]);
2300 
2301           pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2302                   ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2303                   : (UChar* )((void* )mem_end_stk[mem]));
2304           n = pend - pstart;
2305           DATA_ENSURE(n);
2306           sprev = s;
2307           swork = s;
2308           STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2309           if (is_fail) continue;
2310           s = swork;
2311           while (sprev + (len = enclen(encode, sprev)) < s)
2312             sprev += len;
2313 
2314           p += (SIZE_MEMNUM * (tlen - i - 1));
2315           break; /* success */
2316         }
2317         if (i == tlen) goto fail;
2318         MOP_OUT;
2319         continue;
2320       }
2321       break;
2322 
2323 #ifdef USE_BACKREF_WITH_LEVEL
2324     case OP_BACKREF_WITH_LEVEL:
2325       {
2326         int len;
2327         OnigOptionType ic;
2328         LengthType level;
2329 
2330         GET_OPTION_INC(ic,    p);
2331         GET_LENGTH_INC(level, p);
2332         GET_LENGTH_INC(tlen,  p);
2333 
2334         sprev = s;
2335         if (backref_match_at_nested_level(reg, stk, stk_base, ic
2336                      , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2337           while (sprev + (len = enclen(encode, sprev)) < s)
2338             sprev += len;
2339 
2340           p += (SIZE_MEMNUM * tlen);
2341         }
2342         else
2343           goto fail;
2344 
2345         MOP_OUT;
2346         continue;
2347       }
2348       break;
2349 #endif
2350 
2351 #if 0   /* no need: IS_DYNAMIC_OPTION() == 0 */
2352     case OP_SET_OPTION_PUSH:  MOP_IN(OP_SET_OPTION_PUSH);
2353       GET_OPTION_INC(option, p);
2354       STACK_PUSH_ALT(p, s, sprev);
2355       p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2356       MOP_OUT;
2357       continue;
2358       break;
2359 
2360     case OP_SET_OPTION:  MOP_IN(OP_SET_OPTION);
2361       GET_OPTION_INC(option, p);
2362       MOP_OUT;
2363       continue;
2364       break;
2365 #endif
2366 
2367     case OP_NULL_CHECK_START:  MOP_IN(OP_NULL_CHECK_START);
2368       GET_MEMNUM_INC(mem, p);    /* mem: null check id */
2369       STACK_PUSH_NULL_CHECK_START(mem, s);
2370       MOP_OUT;
2371       continue;
2372       break;
2373 
2374     case OP_NULL_CHECK_END:  MOP_IN(OP_NULL_CHECK_END);
2375       {
2376         int isnull;
2377 
2378         GET_MEMNUM_INC(mem, p); /* mem: null check id */
2379         STACK_NULL_CHECK(isnull, mem, s);
2380         if (isnull) {
2381 #ifdef ONIG_DEBUG_MATCH
2382           fprintf(stderr, "NULL_CHECK_END: skip  id:%d, s:%d\n",
2383                   (int )mem, (int )s);
2384 #endif
2385         null_check_found:
2386           /* empty loop founded, skip next instruction */
2387           switch (*p++) {
2388           case OP_JUMP:
2389           case OP_PUSH:
2390             p += SIZE_RELADDR;
2391             break;
2392           case OP_REPEAT_INC:
2393           case OP_REPEAT_INC_NG:
2394           case OP_REPEAT_INC_SG:
2395           case OP_REPEAT_INC_NG_SG:
2396             p += SIZE_MEMNUM;
2397             break;
2398           default:
2399             goto unexpected_bytecode_error;
2400             break;
2401           }
2402         }
2403       }
2404       MOP_OUT;
2405       continue;
2406       break;
2407 
2408 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2409     case OP_NULL_CHECK_END_MEMST:  MOP_IN(OP_NULL_CHECK_END_MEMST);
2410       {
2411         int isnull;
2412 
2413         GET_MEMNUM_INC(mem, p); /* mem: null check id */
2414         STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2415         if (isnull) {
2416 #ifdef ONIG_DEBUG_MATCH
2417           fprintf(stderr, "NULL_CHECK_END_MEMST: skip  id:%d, s:%d\n",
2418                   (int )mem, (int )s);
2419 #endif
2420           if (isnull == -1) goto fail;
2421           goto 	null_check_found;
2422         }
2423       }
2424       MOP_OUT;
2425       continue;
2426       break;
2427 #endif
2428 
2429 #ifdef USE_SUBEXP_CALL
2430     case OP_NULL_CHECK_END_MEMST_PUSH:
2431       MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2432       {
2433         int isnull;
2434 
2435         GET_MEMNUM_INC(mem, p); /* mem: null check id */
2436 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2437         STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2438 #else
2439         STACK_NULL_CHECK_REC(isnull, mem, s);
2440 #endif
2441         if (isnull) {
2442 #ifdef ONIG_DEBUG_MATCH
2443           fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip  id:%d, s:%d\n",
2444                   (int )mem, (int )s);
2445 #endif
2446           if (isnull == -1) goto fail;
2447           goto 	null_check_found;
2448         }
2449         else {
2450           STACK_PUSH_NULL_CHECK_END(mem);
2451         }
2452       }
2453       MOP_OUT;
2454       continue;
2455       break;
2456 #endif
2457 
2458     case OP_JUMP:  MOP_IN(OP_JUMP);
2459       GET_RELADDR_INC(addr, p);
2460       p += addr;
2461       MOP_OUT;
2462       CHECK_INTERRUPT_IN_MATCH_AT;
2463       continue;
2464       break;
2465 
2466     case OP_PUSH:  MOP_IN(OP_PUSH);
2467       GET_RELADDR_INC(addr, p);
2468       STACK_PUSH_ALT(p + addr, s, sprev);
2469       MOP_OUT;
2470       continue;
2471       break;
2472 
2473 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2474     case OP_STATE_CHECK_PUSH:  MOP_IN(OP_STATE_CHECK_PUSH);
2475       GET_STATE_CHECK_NUM_INC(mem, p);
2476       STATE_CHECK_VAL(scv, mem);
2477       if (scv) goto fail;
2478 
2479       GET_RELADDR_INC(addr, p);
2480       STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2481       MOP_OUT;
2482       continue;
2483       break;
2484 
2485     case OP_STATE_CHECK_PUSH_OR_JUMP:  MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2486       GET_STATE_CHECK_NUM_INC(mem, p);
2487       GET_RELADDR_INC(addr, p);
2488       STATE_CHECK_VAL(scv, mem);
2489       if (scv) {
2490         p += addr;
2491       }
2492       else {
2493         STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2494       }
2495       MOP_OUT;
2496       continue;
2497       break;
2498 
2499     case OP_STATE_CHECK:  MOP_IN(OP_STATE_CHECK);
2500       GET_STATE_CHECK_NUM_INC(mem, p);
2501       STATE_CHECK_VAL(scv, mem);
2502       if (scv) goto fail;
2503 
2504       STACK_PUSH_STATE_CHECK(s, mem);
2505       MOP_OUT;
2506       continue;
2507       break;
2508 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2509 
2510     case OP_POP:  MOP_IN(OP_POP);
2511       STACK_POP_ONE;
2512       MOP_OUT;
2513       continue;
2514       break;
2515 
2516     case OP_PUSH_OR_JUMP_EXACT1:  MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2517       GET_RELADDR_INC(addr, p);
2518       if (*p == *s && DATA_ENSURE_CHECK1) {
2519         p++;
2520         STACK_PUSH_ALT(p + addr, s, sprev);
2521         MOP_OUT;
2522         continue;
2523       }
2524       p += (addr + 1);
2525       MOP_OUT;
2526       continue;
2527       break;
2528 
2529     case OP_PUSH_IF_PEEK_NEXT:  MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2530       GET_RELADDR_INC(addr, p);
2531       if (*p == *s) {
2532         p++;
2533         STACK_PUSH_ALT(p + addr, s, sprev);
2534         MOP_OUT;
2535         continue;
2536       }
2537       p++;
2538       MOP_OUT;
2539       continue;
2540       break;
2541 
2542     case OP_REPEAT:  MOP_IN(OP_REPEAT);
2543       {
2544         GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
2545         GET_RELADDR_INC(addr, p);
2546 
2547         STACK_ENSURE(1);
2548         repeat_stk[mem] = GET_STACK_INDEX(stk);
2549         STACK_PUSH_REPEAT(mem, p);
2550 
2551         if (reg->repeat_range[mem].lower == 0) {
2552           STACK_PUSH_ALT(p + addr, s, sprev);
2553         }
2554       }
2555       MOP_OUT;
2556       continue;
2557       break;
2558 
2559     case OP_REPEAT_NG:  MOP_IN(OP_REPEAT_NG);
2560       {
2561         GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
2562         GET_RELADDR_INC(addr, p);
2563 
2564         STACK_ENSURE(1);
2565         repeat_stk[mem] = GET_STACK_INDEX(stk);
2566         STACK_PUSH_REPEAT(mem, p);
2567 
2568         if (reg->repeat_range[mem].lower == 0) {
2569           STACK_PUSH_ALT(p, s, sprev);
2570           p += addr;
2571         }
2572       }
2573       MOP_OUT;
2574       continue;
2575       break;
2576 
2577     case OP_REPEAT_INC:  MOP_IN(OP_REPEAT_INC);
2578       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2579       si = repeat_stk[mem];
2580       stkp = STACK_AT(si);
2581 
2582     repeat_inc:
2583       stkp->u.repeat.count++;
2584       if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2585         /* end of repeat. Nothing to do. */
2586       }
2587       else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2588         STACK_PUSH_ALT(p, s, sprev);
2589         p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2590       }
2591       else {
2592         p = stkp->u.repeat.pcode;
2593       }
2594       STACK_PUSH_REPEAT_INC(si);
2595       MOP_OUT;
2596       CHECK_INTERRUPT_IN_MATCH_AT;
2597       continue;
2598       break;
2599 
2600     case OP_REPEAT_INC_SG:  MOP_IN(OP_REPEAT_INC_SG);
2601       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2602       STACK_GET_REPEAT(mem, stkp);
2603       si = GET_STACK_INDEX(stkp);
2604       goto repeat_inc;
2605       break;
2606 
2607     case OP_REPEAT_INC_NG:  MOP_IN(OP_REPEAT_INC_NG);
2608       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2609       si = repeat_stk[mem];
2610       stkp = STACK_AT(si);
2611 
2612     repeat_inc_ng:
2613       stkp->u.repeat.count++;
2614       if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2615         if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2616           UChar* pcode = stkp->u.repeat.pcode;
2617 
2618           STACK_PUSH_REPEAT_INC(si);
2619           STACK_PUSH_ALT(pcode, s, sprev);
2620         }
2621         else {
2622           p = stkp->u.repeat.pcode;
2623           STACK_PUSH_REPEAT_INC(si);
2624         }
2625       }
2626       else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2627         STACK_PUSH_REPEAT_INC(si);
2628       }
2629       MOP_OUT;
2630       CHECK_INTERRUPT_IN_MATCH_AT;
2631       continue;
2632       break;
2633 
2634     case OP_REPEAT_INC_NG_SG:  MOP_IN(OP_REPEAT_INC_NG_SG);
2635       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2636       STACK_GET_REPEAT(mem, stkp);
2637       si = GET_STACK_INDEX(stkp);
2638       goto repeat_inc_ng;
2639       break;
2640 
2641     case OP_PUSH_POS:  MOP_IN(OP_PUSH_POS);
2642       STACK_PUSH_POS(s, sprev);
2643       MOP_OUT;
2644       continue;
2645       break;
2646 
2647     case OP_POP_POS:  MOP_IN(OP_POP_POS);
2648       {
2649         STACK_POS_END(stkp);
2650         s     = stkp->u.state.pstr;
2651         sprev = stkp->u.state.pstr_prev;
2652       }
2653       MOP_OUT;
2654       continue;
2655       break;
2656 
2657     case OP_PUSH_POS_NOT:  MOP_IN(OP_PUSH_POS_NOT);
2658       GET_RELADDR_INC(addr, p);
2659       STACK_PUSH_POS_NOT(p + addr, s, sprev);
2660       MOP_OUT;
2661       continue;
2662       break;
2663 
2664     case OP_FAIL_POS:  MOP_IN(OP_FAIL_POS);
2665       STACK_POP_TIL_POS_NOT;
2666       goto fail;
2667       break;
2668 
2669     case OP_PUSH_STOP_BT:  MOP_IN(OP_PUSH_STOP_BT);
2670       STACK_PUSH_STOP_BT;
2671       MOP_OUT;
2672       continue;
2673       break;
2674 
2675     case OP_POP_STOP_BT:  MOP_IN(OP_POP_STOP_BT);
2676       STACK_STOP_BT_END;
2677       MOP_OUT;
2678       continue;
2679       break;
2680 
2681     case OP_LOOK_BEHIND:  MOP_IN(OP_LOOK_BEHIND);
2682       GET_LENGTH_INC(tlen, p);
2683       s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2684       if (IS_NULL(s)) goto fail;
2685       sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2686       MOP_OUT;
2687       continue;
2688       break;
2689 
2690     case OP_PUSH_LOOK_BEHIND_NOT:  MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2691       GET_RELADDR_INC(addr, p);
2692       GET_LENGTH_INC(tlen, p);
2693       q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2694       if (IS_NULL(q)) {
2695         /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2696            If you want to change to fail, replace following line. */
2697         p += addr;
2698         /* goto fail; */
2699       }
2700       else {
2701         STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2702         s = q;
2703         sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2704       }
2705       MOP_OUT;
2706       continue;
2707       break;
2708 
2709     case OP_FAIL_LOOK_BEHIND_NOT:  MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2710       STACK_POP_TIL_LOOK_BEHIND_NOT;
2711       goto fail;
2712       break;
2713 
2714 #ifdef USE_SUBEXP_CALL
2715     case OP_CALL:  MOP_IN(OP_CALL);
2716       GET_ABSADDR_INC(addr, p);
2717       STACK_PUSH_CALL_FRAME(p);
2718       p = reg->p + addr;
2719       MOP_OUT;
2720       continue;
2721       break;
2722 
2723     case OP_RETURN:  MOP_IN(OP_RETURN);
2724       STACK_RETURN(p);
2725       STACK_PUSH_RETURN;
2726       MOP_OUT;
2727       continue;
2728       break;
2729 #endif
2730 
2731     case OP_FINISH:
2732       goto finish;
2733       break;
2734 
2735     fail:
2736       MOP_OUT;
2737       /* fall */
2738     case OP_FAIL:  MOP_IN(OP_FAIL);
2739       STACK_POP;
2740       p     = stk->u.state.pcode;
2741       s     = stk->u.state.pstr;
2742       sprev = stk->u.state.pstr_prev;
2743 
2744 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2745       if (stk->u.state.state_check != 0) {
2746         stk->type = STK_STATE_CHECK_MARK;
2747         stk++;
2748       }
2749 #endif
2750 
2751       MOP_OUT;
2752       continue;
2753       break;
2754 
2755     default:
2756       goto bytecode_error;
2757 
2758     } /* end of switch */
2759     sprev = sbegin;
2760   } /* end of while(1) */
2761 
2762  finish:
2763   STACK_SAVE;
2764   return best_len;
2765 
2766 #ifdef ONIG_DEBUG
2767  stack_error:
2768   STACK_SAVE;
2769   return ONIGERR_STACK_BUG;
2770 #endif
2771 
2772  bytecode_error:
2773   STACK_SAVE;
2774   return ONIGERR_UNDEFINED_BYTECODE;
2775 
2776  unexpected_bytecode_error:
2777   STACK_SAVE;
2778   return ONIGERR_UNEXPECTED_BYTECODE;
2779 }
2780 
2781 
2782 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2783 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2784 	    const UChar* text, const UChar* text_end, UChar* text_range)
2785 {
2786   UChar *t, *p, *s, *end;
2787 
2788   end = (UChar* )text_end;
2789   end -= target_end - target - 1;
2790   if (end > text_range)
2791     end = text_range;
2792 
2793   s = (UChar* )text;
2794 
2795   while (s < end) {
2796     if (*s == *target) {
2797       p = s + 1;
2798       t = target + 1;
2799       while (t < target_end) {
2800         if (*t != *p++)
2801           break;
2802         t++;
2803       }
2804       if (t == target_end)
2805         return s;
2806     }
2807     s += enclen(enc, s);
2808   }
2809 
2810   return (UChar* )NULL;
2811 }
2812 
2813 static int
str_lower_case_match(OnigEncoding enc,int case_fold_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)2814 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2815                      const UChar* t, const UChar* tend,
2816 		     const UChar* p, const UChar* end)
2817 {
2818   int lowlen;
2819   UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2820 
2821   while (t < tend) {
2822     lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2823     q = lowbuf;
2824     while (lowlen > 0) {
2825       if (*t++ != *q++)	return 0;
2826       lowlen--;
2827     }
2828   }
2829 
2830   return 1;
2831 }
2832 
2833 static UChar*
slow_search_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2834 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2835 	       UChar* target, UChar* target_end,
2836 	       const UChar* text, const UChar* text_end, UChar* text_range)
2837 {
2838   UChar *s, *end;
2839 
2840   end = (UChar* )text_end;
2841   end -= target_end - target - 1;
2842   if (end > text_range)
2843     end = text_range;
2844 
2845   s = (UChar* )text;
2846 
2847   while (s < end) {
2848     if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2849 			     s, text_end))
2850       return s;
2851 
2852     s += enclen(enc, s);
2853   }
2854 
2855   return (UChar* )NULL;
2856 }
2857 
2858 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2859 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2860 		     const UChar* text, const UChar* adjust_text,
2861 		     const UChar* text_end, const UChar* text_start)
2862 {
2863   UChar *t, *p, *s;
2864 
2865   s = (UChar* )text_end;
2866   s -= (target_end - target);
2867   if (s > text_start)
2868     s = (UChar* )text_start;
2869   else
2870     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2871 
2872   while (s >= text) {
2873     if (*s == *target) {
2874       p = s + 1;
2875       t = target + 1;
2876       while (t < target_end) {
2877         if (*t != *p++)
2878           break;
2879         t++;
2880       }
2881       if (t == target_end)
2882         return s;
2883     }
2884     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2885   }
2886 
2887   return (UChar* )NULL;
2888 }
2889 
2890 static UChar*
slow_search_backward_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2891 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2892 			UChar* target, UChar* target_end,
2893 			const UChar* text, const UChar* adjust_text,
2894 			const UChar* text_end, const UChar* text_start)
2895 {
2896   UChar *s;
2897 
2898   s = (UChar* )text_end;
2899   s -= (target_end - target);
2900   if (s > text_start)
2901     s = (UChar* )text_start;
2902   else
2903     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2904 
2905   while (s >= text) {
2906     if (str_lower_case_match(enc, case_fold_flag,
2907                              target, target_end, s, text_end))
2908       return s;
2909 
2910     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2911   }
2912 
2913   return (UChar* )NULL;
2914 }
2915 
2916 static UChar*
bm_search_notrev(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2917 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2918 		 const UChar* text, const UChar* text_end,
2919 		 const UChar* text_range)
2920 {
2921   const UChar *s, *se, *t, *p, *end;
2922   const UChar *tail;
2923   int skip, tlen1;
2924 
2925 #ifdef ONIG_DEBUG_SEARCH
2926   fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2927 	  (int )text, (int )text_end, (int )text_range);
2928 #endif
2929 
2930   tail = target_end - 1;
2931   tlen1 = tail - target;
2932   end = text_range;
2933   if (end + tlen1 > text_end)
2934     end = text_end - tlen1;
2935 
2936   s = text;
2937 
2938   if (IS_NULL(reg->int_map)) {
2939     while (s < end) {
2940       p = se = s + tlen1;
2941       t = tail;
2942       while (*p == *t) {
2943         if (t == target) return (UChar* )s;
2944         p--; t--;
2945       }
2946       skip = reg->map[*se];
2947       t = s;
2948       do {
2949         s += enclen(reg->enc, s);
2950       } while ((s - t) < skip && s < end);
2951     }
2952   }
2953   else {
2954     while (s < end) {
2955       p = se = s + tlen1;
2956       t = tail;
2957       while (*p == *t) {
2958         if (t == target) return (UChar* )s;
2959         p--; t--;
2960       }
2961       skip = reg->int_map[*se];
2962       t = s;
2963       do {
2964         s += enclen(reg->enc, s);
2965       } while ((s - t) < skip && s < end);
2966     }
2967   }
2968 
2969   return (UChar* )NULL;
2970 }
2971 
2972 static UChar*
bm_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2973 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2974 	  const UChar* text, const UChar* text_end, const UChar* text_range)
2975 {
2976   const UChar *s, *t, *p, *end;
2977   const UChar *tail;
2978 
2979   end = text_range + (target_end - target) - 1;
2980   if (end > text_end)
2981     end = text_end;
2982 
2983   tail = target_end - 1;
2984   s = text + (target_end - target) - 1;
2985   if (IS_NULL(reg->int_map)) {
2986     while (s < end) {
2987       p = s;
2988       t = tail;
2989       while (*p == *t) {
2990         if (t == target) return (UChar* )p;
2991         p--; t--;
2992       }
2993       s += reg->map[*s];
2994     }
2995   }
2996   else { /* see int_map[] */
2997     while (s < end) {
2998       p = s;
2999       t = tail;
3000       while (*p == *t) {
3001         if (t == target) return (UChar* )p;
3002         p--; t--;
3003       }
3004       s += reg->int_map[*s];
3005     }
3006   }
3007   return (UChar* )NULL;
3008 }
3009 
3010 #ifdef USE_INT_MAP_BACKWARD
3011 static int
set_bm_backward_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,int ** skip)3012 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3013 		     int** skip)
3014 
3015 {
3016   int i, len;
3017 
3018   if (IS_NULL(*skip)) {
3019     *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3020     if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3021   }
3022 
3023   len = end - s;
3024   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3025     (*skip)[i] = len;
3026 
3027   for (i = len - 1; i > 0; i--)
3028     (*skip)[s[i]] = i;
3029 
3030   return 0;
3031 }
3032 
3033 static UChar*
bm_search_backward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3034 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3035 		   const UChar* text, const UChar* adjust_text,
3036 		   const UChar* text_end, const UChar* text_start)
3037 {
3038   const UChar *s, *t, *p;
3039 
3040   s = text_end - (target_end - target);
3041   if (text_start < s)
3042     s = text_start;
3043   else
3044     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3045 
3046   while (s >= text) {
3047     p = s;
3048     t = target;
3049     while (t < target_end && *p == *t) {
3050       p++; t++;
3051     }
3052     if (t == target_end)
3053       return (UChar* )s;
3054 
3055     s -= reg->int_map_backward[*s];
3056     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3057   }
3058 
3059   return (UChar* )NULL;
3060 }
3061 #endif
3062 
3063 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)3064 map_search(OnigEncoding enc, UChar map[],
3065 	   const UChar* text, const UChar* text_range)
3066 {
3067   const UChar *s = text;
3068 
3069   while (s < text_range) {
3070     if (map[*s]) return (UChar* )s;
3071 
3072     s += enclen(enc, s);
3073   }
3074   return (UChar* )NULL;
3075 }
3076 
3077 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)3078 map_search_backward(OnigEncoding enc, UChar map[],
3079 		    const UChar* text, const UChar* adjust_text,
3080 		    const UChar* text_start)
3081 {
3082   const UChar *s = text_start;
3083 
3084   while (s >= text) {
3085     if (map[*s]) return (UChar* )s;
3086 
3087     s = onigenc_get_prev_char_head(enc, adjust_text, s);
3088   }
3089   return (UChar* )NULL;
3090 }
3091 
3092 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)3093 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3094 	    OnigOptionType option)
3095 {
3096   int r;
3097   UChar *prev;
3098   OnigMatchArg msa;
3099 
3100   MATCH_ARG_INIT(msa, reg, option, region, at);
3101 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3102   {
3103     int offset = at - str;
3104     STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3105   }
3106 #endif
3107 
3108   if (region
3109 #ifdef USE_POSIX_API_REGION_OPTION
3110       && !IS_POSIX_REGION(option)
3111 #endif
3112       ) {
3113     r = onig_region_resize_clear(region, reg->num_mem + 1);
3114   }
3115   else
3116     r = 0;
3117 
3118   if (r == 0) {
3119     if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
3120       if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
3121         r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
3122         goto end;
3123       }
3124     }
3125 
3126     prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3127     r = match_at(reg, str, end,
3128 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3129 		 end,
3130 #endif
3131 		 at, prev, &msa);
3132   }
3133 
3134  end:
3135   MATCH_ARG_FREE(msa);
3136   return r;
3137 }
3138 
3139 static int
forward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)3140 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3141 		     UChar* range, UChar** low, UChar** high, UChar** low_prev)
3142 {
3143   UChar *p, *pprev = (UChar* )NULL;
3144 
3145 #ifdef ONIG_DEBUG_SEARCH
3146   fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3147 	  (int )str, (int )end, (int )s, (int )range);
3148 #endif
3149 
3150   p = s;
3151   if (reg->dmin > 0) {
3152     if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3153       p += reg->dmin;
3154     }
3155     else {
3156       UChar *q = p + reg->dmin;
3157 
3158       if (q >= end) return 0; /* fail */
3159       while (p < q) p += enclen(reg->enc, p);
3160     }
3161   }
3162 
3163  retry:
3164   switch (reg->optimize) {
3165   case ONIG_OPTIMIZE_EXACT:
3166     p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3167     break;
3168   case ONIG_OPTIMIZE_EXACT_IC:
3169     p = slow_search_ic(reg->enc, reg->case_fold_flag,
3170                        reg->exact, reg->exact_end, p, end, range);
3171     break;
3172 
3173   case ONIG_OPTIMIZE_EXACT_BM:
3174     p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3175     break;
3176 
3177   case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3178     p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3179     break;
3180 
3181   case ONIG_OPTIMIZE_MAP:
3182     p = map_search(reg->enc, reg->map, p, range);
3183     break;
3184   }
3185 
3186   if (p && p < range) {
3187     if (p - reg->dmin < s) {
3188     retry_gate:
3189       pprev = p;
3190       p += enclen(reg->enc, p);
3191       goto retry;
3192     }
3193 
3194     if (reg->sub_anchor) {
3195       UChar* prev;
3196 
3197       switch (reg->sub_anchor) {
3198       case ANCHOR_BEGIN_LINE:
3199         if (!ON_STR_BEGIN(p)) {
3200           prev = onigenc_get_prev_char_head(reg->enc,
3201                                             (pprev ? pprev : str), p);
3202           if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3203             goto retry_gate;
3204         }
3205         break;
3206 
3207       case ANCHOR_END_LINE:
3208         if (ON_STR_END(p)) {
3209 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3210           prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3211                                                      (pprev ? pprev : str), p);
3212           if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3213             goto retry_gate;
3214 #endif
3215         }
3216         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3217 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3218                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3219 #endif
3220                  )
3221           goto retry_gate;
3222         break;
3223       }
3224     }
3225 
3226     if (reg->dmax == 0) {
3227       *low = p;
3228       if (low_prev) {
3229         if (*low > s)
3230           *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3231         else
3232           *low_prev = onigenc_get_prev_char_head(reg->enc,
3233                                                  (pprev ? pprev : str), p);
3234       }
3235     }
3236     else {
3237       if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3238         if (p - str < reg->dmax) {
3239           *low = (UChar* )str;
3240           if (low_prev)
3241             *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
3242         }
3243         else {
3244           *low = p - reg->dmax;
3245           if (*low > s) {
3246             *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3247                                                  *low, (const UChar** )low_prev);
3248             if (low_prev && IS_NULL(*low_prev))
3249               *low_prev = onigenc_get_prev_char_head(reg->enc,
3250                                                      (pprev ? pprev : s), *low);
3251           }
3252           else {
3253             if (low_prev)
3254               *low_prev = onigenc_get_prev_char_head(reg->enc,
3255                                                      (pprev ? pprev : str), *low);
3256           }
3257         }
3258       }
3259     }
3260     /* no needs to adjust *high, *high is used as range check only */
3261     *high = p - reg->dmin;
3262 
3263 #ifdef ONIG_DEBUG_SEARCH
3264     fprintf(stderr,
3265     "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3266 	    (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3267 #endif
3268     return 1; /* success */
3269   }
3270 
3271   return 0; /* fail */
3272 }
3273 
3274 
3275 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD   100
3276 
3277 static int
backward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)3278 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3279 		      UChar* s, const UChar* range, UChar* adjrange,
3280 		      UChar** low, UChar** high)
3281 {
3282   UChar *p;
3283 
3284   range += reg->dmin;
3285   p = s;
3286 
3287  retry:
3288   switch (reg->optimize) {
3289   case ONIG_OPTIMIZE_EXACT:
3290   exact_method:
3291     p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3292 			     range, adjrange, end, p);
3293     break;
3294 
3295   case ONIG_OPTIMIZE_EXACT_IC:
3296     p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3297                                 reg->exact, reg->exact_end,
3298                                 range, adjrange, end, p);
3299     break;
3300 
3301   case ONIG_OPTIMIZE_EXACT_BM:
3302   case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3303 #ifdef USE_INT_MAP_BACKWARD
3304     if (IS_NULL(reg->int_map_backward)) {
3305       int r;
3306 
3307       if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3308         goto exact_method;
3309 
3310       r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3311                                &(reg->int_map_backward));
3312       if (r) return r;
3313     }
3314     p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3315                            end, p);
3316 #else
3317     goto exact_method;
3318 #endif
3319     break;
3320 
3321   case ONIG_OPTIMIZE_MAP:
3322     p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3323     break;
3324   }
3325 
3326   if (p) {
3327     if (reg->sub_anchor) {
3328       UChar* prev;
3329 
3330       switch (reg->sub_anchor) {
3331       case ANCHOR_BEGIN_LINE:
3332         if (!ON_STR_BEGIN(p)) {
3333           prev = onigenc_get_prev_char_head(reg->enc, str, p);
3334           if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3335             p = prev;
3336             goto retry;
3337           }
3338         }
3339         break;
3340 
3341       case ANCHOR_END_LINE:
3342         if (ON_STR_END(p)) {
3343 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3344           prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3345           if (IS_NULL(prev)) goto fail;
3346           if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3347             p = prev;
3348             goto retry;
3349           }
3350 #endif
3351         }
3352         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3353 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3354                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3355 #endif
3356                  ) {
3357           p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3358           if (IS_NULL(p)) goto fail;
3359           goto retry;
3360         }
3361         break;
3362       }
3363     }
3364 
3365     /* no needs to adjust *high, *high is used as range check only */
3366     if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3367       *low  = p - reg->dmax;
3368       *high = p - reg->dmin;
3369       *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3370     }
3371 
3372 #ifdef ONIG_DEBUG_SEARCH
3373     fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3374 	    (int )(*low - str), (int )(*high - str));
3375 #endif
3376     return 1; /* success */
3377   }
3378 
3379  fail:
3380 #ifdef ONIG_DEBUG_SEARCH
3381   fprintf(stderr, "backward_search_range: fail.\n");
3382 #endif
3383   return 0; /* fail */
3384 }
3385 
3386 
3387 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)3388 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3389 	    const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3390 {
3391   int r;
3392   UChar *s, *prev;
3393   OnigMatchArg msa;
3394   const UChar *orig_start = start;
3395 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3396   const UChar *orig_range = range;
3397 #endif
3398 
3399 #ifdef ONIG_DEBUG_SEARCH
3400   fprintf(stderr,
3401      "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3402      (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3403 #endif
3404 
3405   if (region
3406 #ifdef USE_POSIX_API_REGION_OPTION
3407       && !IS_POSIX_REGION(option)
3408 #endif
3409       ) {
3410     r = onig_region_resize_clear(region, reg->num_mem + 1);
3411     if (r) goto finish_no_msa;
3412   }
3413 
3414   if (start > end || start < str) goto mismatch_no_msa;
3415 
3416   if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
3417     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
3418       r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
3419       goto finish_no_msa;
3420     }
3421   }
3422 
3423 
3424 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3425 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3426 #define MATCH_AND_RETURN_CHECK(upper_range) \
3427   r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3428   if (r != ONIG_MISMATCH) {\
3429     if (r >= 0) {\
3430       if (! IS_FIND_LONGEST(reg->options)) {\
3431         goto match;\
3432       }\
3433     }\
3434     else goto finish; /* error */ \
3435   }
3436 #else
3437 #define MATCH_AND_RETURN_CHECK(upper_range) \
3438   r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3439   if (r != ONIG_MISMATCH) {\
3440     if (r >= 0) {\
3441       goto match;\
3442     }\
3443     else goto finish; /* error */ \
3444   }
3445 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3446 #else
3447 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3448 #define MATCH_AND_RETURN_CHECK(none) \
3449   r = match_at(reg, str, end, s, prev, &msa);\
3450   if (r != ONIG_MISMATCH) {\
3451     if (r >= 0) {\
3452       if (! IS_FIND_LONGEST(reg->options)) {\
3453         goto match;\
3454       }\
3455     }\
3456     else goto finish; /* error */ \
3457   }
3458 #else
3459 #define MATCH_AND_RETURN_CHECK(none) \
3460   r = match_at(reg, str, end, s, prev, &msa);\
3461   if (r != ONIG_MISMATCH) {\
3462     if (r >= 0) {\
3463       goto match;\
3464     }\
3465     else goto finish; /* error */ \
3466   }
3467 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3468 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3469 
3470 
3471   /* anchor optimize: resume search range */
3472   if (reg->anchor != 0 && str < end) {
3473     UChar *min_semi_end, *max_semi_end;
3474 
3475     if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3476       /* search start-position only */
3477     begin_position:
3478       if (range > start)
3479         range = start + 1;
3480       else
3481         range = start;
3482     }
3483     else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3484       /* search str-position only */
3485       if (range > start) {
3486         if (start != str) goto mismatch_no_msa;
3487         range = str + 1;
3488       }
3489       else {
3490         if (range <= str) {
3491           start = str;
3492           range = str;
3493         }
3494         else
3495           goto mismatch_no_msa;
3496       }
3497     }
3498     else if (reg->anchor & ANCHOR_END_BUF) {
3499       min_semi_end = max_semi_end = (UChar* )end;
3500 
3501     end_buf:
3502       if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
3503 	goto mismatch_no_msa;
3504 
3505       if (range > start) {
3506         if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
3507           start = min_semi_end - reg->anchor_dmax;
3508           if (start < end)
3509             start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3510         }
3511         if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3512           range = max_semi_end - reg->anchor_dmin + 1;
3513         }
3514 
3515         if (start > range) goto mismatch_no_msa;
3516         /* If start == range, match with empty at end.
3517            Backward search is used. */
3518       }
3519       else {
3520         if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
3521           range = min_semi_end - reg->anchor_dmax;
3522         }
3523         if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
3524           start = max_semi_end - reg->anchor_dmin;
3525           start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3526         }
3527         if (range > start) goto mismatch_no_msa;
3528       }
3529     }
3530     else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3531       UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3532 
3533       max_semi_end = (UChar* )end;
3534       if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3535         min_semi_end = pre_end;
3536 
3537 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3538         pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3539         if (IS_NOT_NULL(pre_end) &&
3540             ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3541           min_semi_end = pre_end;
3542         }
3543 #endif
3544         if (min_semi_end > str && start <= min_semi_end) {
3545           goto end_buf;
3546         }
3547       }
3548       else {
3549         min_semi_end = (UChar* )end;
3550         goto end_buf;
3551       }
3552     }
3553     else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3554       goto begin_position;
3555     }
3556   }
3557   else if (str == end) { /* empty string */
3558     static const UChar* address_for_empty_string = (UChar* )"";
3559 
3560 #ifdef ONIG_DEBUG_SEARCH
3561     fprintf(stderr, "onig_search: empty string.\n");
3562 #endif
3563 
3564     if (reg->threshold_len == 0) {
3565       start = end = str = address_for_empty_string;
3566       s = (UChar* )start;
3567       prev = (UChar* )NULL;
3568 
3569       MATCH_ARG_INIT(msa, reg, option, region, start);
3570 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3571       msa.state_check_buff = (void* )0;
3572       msa.state_check_buff_size = 0;   /* NO NEED, for valgrind */
3573 #endif
3574       MATCH_AND_RETURN_CHECK(end);
3575       goto mismatch;
3576     }
3577     goto mismatch_no_msa;
3578   }
3579 
3580 #ifdef ONIG_DEBUG_SEARCH
3581   fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3582 	  (int )(end - str), (int )(start - str), (int )(range - str));
3583 #endif
3584 
3585   MATCH_ARG_INIT(msa, reg, option, region, orig_start);
3586 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3587   {
3588     int offset = (MIN(start, range) - str);
3589     STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3590   }
3591 #endif
3592 
3593   s = (UChar* )start;
3594   if (range > start) {   /* forward search */
3595     if (s > str)
3596       prev = onigenc_get_prev_char_head(reg->enc, str, s);
3597     else
3598       prev = (UChar* )NULL;
3599 
3600     if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3601       UChar *sch_range, *low, *high, *low_prev;
3602 
3603       sch_range = (UChar* )range;
3604       if (reg->dmax != 0) {
3605         if (reg->dmax == ONIG_INFINITE_DISTANCE)
3606           sch_range = (UChar* )end;
3607         else {
3608           sch_range += reg->dmax;
3609           if (sch_range > end) sch_range = (UChar* )end;
3610         }
3611       }
3612 
3613       if ((end - start) < reg->threshold_len)
3614         goto mismatch;
3615 
3616       if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3617         do {
3618           if (! forward_search_range(reg, str, end, s, sch_range,
3619                                      &low, &high, &low_prev)) goto mismatch;
3620           if (s < low) {
3621             s    = low;
3622             prev = low_prev;
3623           }
3624           while (s <= high) {
3625             MATCH_AND_RETURN_CHECK(orig_range);
3626             prev = s;
3627             s += enclen(reg->enc, s);
3628           }
3629         } while (s < range);
3630         goto mismatch;
3631       }
3632       else { /* check only. */
3633         if (! forward_search_range(reg, str, end, s, sch_range,
3634                                    &low, &high, (UChar** )NULL)) goto mismatch;
3635 
3636         if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3637           do {
3638             MATCH_AND_RETURN_CHECK(orig_range);
3639             prev = s;
3640             s += enclen(reg->enc, s);
3641 
3642             if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
3643               while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3644                 prev = s;
3645                 s += enclen(reg->enc, s);
3646               }
3647             }
3648           } while (s < range);
3649           goto mismatch;
3650         }
3651       }
3652     }
3653 
3654     do {
3655       MATCH_AND_RETURN_CHECK(orig_range);
3656       prev = s;
3657       s += enclen(reg->enc, s);
3658     } while (s < range);
3659 
3660     if (s == range) { /* because empty match with /$/. */
3661       MATCH_AND_RETURN_CHECK(orig_range);
3662     }
3663   }
3664   else {  /* backward search */
3665 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3666     if (orig_start < end)
3667       orig_start += enclen(reg->enc, orig_start); /* is upper range */
3668 #endif
3669 
3670     if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3671       UChar *low, *high, *adjrange, *sch_start;
3672 
3673       if (range < end)
3674         adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3675       else
3676         adjrange = (UChar* )end;
3677 
3678       if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3679           (end - range) >= reg->threshold_len) {
3680         do {
3681           sch_start = s + reg->dmax;
3682           if (sch_start > end) sch_start = (UChar* )end;
3683           if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3684                                     &low, &high) <= 0)
3685             goto mismatch;
3686 
3687           if (s > high)
3688             s = high;
3689 
3690           while (s >= low) {
3691             prev = onigenc_get_prev_char_head(reg->enc, str, s);
3692             MATCH_AND_RETURN_CHECK(orig_start);
3693             s = prev;
3694           }
3695         } while (s >= range);
3696         goto mismatch;
3697       }
3698       else { /* check only. */
3699         if ((end - range) < reg->threshold_len) goto mismatch;
3700 
3701         sch_start = s;
3702         if (reg->dmax != 0) {
3703           if (reg->dmax == ONIG_INFINITE_DISTANCE)
3704             sch_start = (UChar* )end;
3705           else {
3706             sch_start += reg->dmax;
3707             if (sch_start > end) sch_start = (UChar* )end;
3708             else
3709               sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3710                                                         start, sch_start);
3711           }
3712         }
3713         if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3714                                   &low, &high) <= 0) goto mismatch;
3715       }
3716     }
3717 
3718     do {
3719       prev = onigenc_get_prev_char_head(reg->enc, str, s);
3720       MATCH_AND_RETURN_CHECK(orig_start);
3721       s = prev;
3722     } while (s >= range);
3723   }
3724 
3725  mismatch:
3726 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3727   if (IS_FIND_LONGEST(reg->options)) {
3728     if (msa.best_len >= 0) {
3729       s = msa.best_s;
3730       goto match;
3731     }
3732   }
3733 #endif
3734   r = ONIG_MISMATCH;
3735 
3736  finish:
3737   MATCH_ARG_FREE(msa);
3738 
3739   /* If result is mismatch and no FIND_NOT_EMPTY option,
3740      then the region is not set in match_at(). */
3741   if (IS_FIND_NOT_EMPTY(reg->options) && region
3742 #ifdef USE_POSIX_API_REGION_OPTION
3743       && !IS_POSIX_REGION(option)
3744 #endif
3745       ) {
3746     onig_region_clear(region);
3747   }
3748 
3749 #ifdef ONIG_DEBUG
3750   if (r != ONIG_MISMATCH)
3751     fprintf(stderr, "onig_search: error %d\n", r);
3752 #endif
3753   return r;
3754 
3755  mismatch_no_msa:
3756   r = ONIG_MISMATCH;
3757  finish_no_msa:
3758 #ifdef ONIG_DEBUG
3759   if (r != ONIG_MISMATCH)
3760     fprintf(stderr, "onig_search: error %d\n", r);
3761 #endif
3762   return r;
3763 
3764  match:
3765   MATCH_ARG_FREE(msa);
3766   return s - str;
3767 }
3768 
3769 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)3770 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
3771 	  OnigRegion* region, OnigOptionType option,
3772 	  int (*scan_callback)(int, int, OnigRegion*, void*),
3773 	  void* callback_arg)
3774 {
3775   int r;
3776   int n;
3777   int rs;
3778   const UChar* start;
3779 
3780   if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
3781     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
3782       return ONIGERR_INVALID_WIDE_CHAR_VALUE;
3783 
3784     ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
3785   }
3786 
3787   n = 0;
3788   start = str;
3789   while (1) {
3790     r = onig_search(reg, str, end, start, end, region, option);
3791     if (r >= 0) {
3792       rs = scan_callback(n, r, region, callback_arg);
3793       n++;
3794       if (rs != 0)
3795         return rs;
3796 
3797       if (region->end[0] == start - str) {
3798         if (start >= end) break;
3799         start += enclen(reg->enc, start);
3800       }
3801       else
3802         start = str + region->end[0];
3803 
3804       if (start > end)
3805         break;
3806     }
3807     else if (r == ONIG_MISMATCH) {
3808       break;
3809     }
3810     else { /* error */
3811       return r;
3812     }
3813   }
3814 
3815   return n;
3816 }
3817 
3818 extern OnigEncoding
onig_get_encoding(regex_t * reg)3819 onig_get_encoding(regex_t* reg)
3820 {
3821   return reg->enc;
3822 }
3823 
3824 extern OnigOptionType
onig_get_options(regex_t * reg)3825 onig_get_options(regex_t* reg)
3826 {
3827   return reg->options;
3828 }
3829 
3830 extern  OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)3831 onig_get_case_fold_flag(regex_t* reg)
3832 {
3833   return reg->case_fold_flag;
3834 }
3835 
3836 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)3837 onig_get_syntax(regex_t* reg)
3838 {
3839   return reg->syntax;
3840 }
3841 
3842 extern int
onig_number_of_captures(regex_t * reg)3843 onig_number_of_captures(regex_t* reg)
3844 {
3845   return reg->num_mem;
3846 }
3847 
3848 extern int
onig_number_of_capture_histories(regex_t * reg)3849 onig_number_of_capture_histories(regex_t* reg)
3850 {
3851 #ifdef USE_CAPTURE_HISTORY
3852   int i, n;
3853 
3854   n = 0;
3855   for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3856     if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3857       n++;
3858   }
3859   return n;
3860 #else
3861   return 0;
3862 #endif
3863 }
3864 
3865 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)3866 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3867 {
3868   *to = *from;
3869 }
3870 
3871