xref: /PHP-5.4/ext/mbstring/oniguruma/regexec.c (revision 7aab46a2)
1 /**********************************************************************
2   regexec.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
32 #ifdef USE_CRNL_AS_LINE_TERMINATOR
33 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
34   (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
35    ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
36 #endif
37 
38 #ifdef USE_CAPTURE_HISTORY
39 static void history_tree_free(OnigCaptureTreeNode* node);
40 
41 static void
history_tree_clear(OnigCaptureTreeNode * node)42 history_tree_clear(OnigCaptureTreeNode* node)
43 {
44   int i;
45 
46   if (IS_NOT_NULL(node)) {
47     for (i = 0; i < node->num_childs; i++) {
48       if (IS_NOT_NULL(node->childs[i])) {
49         history_tree_free(node->childs[i]);
50       }
51     }
52     for (i = 0; i < node->allocated; i++) {
53       node->childs[i] = (OnigCaptureTreeNode* )0;
54     }
55     node->num_childs = 0;
56     node->beg = ONIG_REGION_NOTPOS;
57     node->end = ONIG_REGION_NOTPOS;
58     node->group = -1;
59   }
60 }
61 
62 static void
history_tree_free(OnigCaptureTreeNode * node)63 history_tree_free(OnigCaptureTreeNode* node)
64 {
65   history_tree_clear(node);
66   xfree(node);
67 }
68 
69 static void
history_root_free(OnigRegion * r)70 history_root_free(OnigRegion* r)
71 {
72   if (IS_NOT_NULL(r->history_root)) {
73     history_tree_free(r->history_root);
74     r->history_root = (OnigCaptureTreeNode* )0;
75   }
76 }
77 
78 static OnigCaptureTreeNode*
history_node_new(void)79 history_node_new(void)
80 {
81   OnigCaptureTreeNode* node;
82 
83   node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
84   CHECK_NULL_RETURN(node);
85   node->childs     = (OnigCaptureTreeNode** )0;
86   node->allocated  = 0;
87   node->num_childs = 0;
88   node->group      = -1;
89   node->beg        = ONIG_REGION_NOTPOS;
90   node->end        = ONIG_REGION_NOTPOS;
91 
92   return node;
93 }
94 
95 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)96 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
97 {
98 #define HISTORY_TREE_INIT_ALLOC_SIZE  8
99 
100   if (parent->num_childs >= parent->allocated) {
101     int n, i;
102 
103     if (IS_NULL(parent->childs)) {
104       n = HISTORY_TREE_INIT_ALLOC_SIZE;
105       parent->childs =
106         (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
107     }
108     else {
109       n = parent->allocated * 2;
110       parent->childs =
111         (OnigCaptureTreeNode** )xrealloc(parent->childs,
112                                          sizeof(OnigCaptureTreeNode*) * n);
113     }
114     CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
115     for (i = parent->allocated; i < n; i++) {
116       parent->childs[i] = (OnigCaptureTreeNode* )0;
117     }
118     parent->allocated = n;
119   }
120 
121   parent->childs[parent->num_childs] = child;
122   parent->num_childs++;
123   return 0;
124 }
125 
126 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)127 history_tree_clone(OnigCaptureTreeNode* node)
128 {
129   int i;
130   OnigCaptureTreeNode *clone, *child;
131 
132   clone = history_node_new();
133   CHECK_NULL_RETURN(clone);
134 
135   clone->beg = node->beg;
136   clone->end = node->end;
137   for (i = 0; i < node->num_childs; i++) {
138     child = history_tree_clone(node->childs[i]);
139     if (IS_NULL(child)) {
140       history_tree_free(clone);
141       return (OnigCaptureTreeNode* )0;
142     }
143     history_tree_add_child(clone, child);
144   }
145 
146   return clone;
147 }
148 
149 extern  OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)150 onig_get_capture_tree(OnigRegion* region)
151 {
152   return region->history_root;
153 }
154 #endif /* USE_CAPTURE_HISTORY */
155 
156 extern void
onig_region_clear(OnigRegion * region)157 onig_region_clear(OnigRegion* region)
158 {
159   int i;
160 
161   for (i = 0; i < region->num_regs; i++) {
162     region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
163   }
164 #ifdef USE_CAPTURE_HISTORY
165   history_root_free(region);
166 #endif
167 }
168 
169 extern int
onig_region_resize(OnigRegion * region,int n)170 onig_region_resize(OnigRegion* region, int n)
171 {
172   region->num_regs = n;
173 
174   if (n < ONIG_NREGION)
175     n = ONIG_NREGION;
176 
177   if (region->allocated == 0) {
178     region->beg = (int* )xmalloc(n * sizeof(int));
179     region->end = (int* )xmalloc(n * sizeof(int));
180 
181     if (region->beg == 0 || region->end == 0)
182       return ONIGERR_MEMORY;
183 
184     region->allocated = n;
185   }
186   else if (region->allocated < n) {
187     region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
188     region->end = (int* )xrealloc(region->end, n * sizeof(int));
189 
190     if (region->beg == 0 || region->end == 0)
191       return ONIGERR_MEMORY;
192 
193     region->allocated = n;
194   }
195 
196   return 0;
197 }
198 
199 extern int
onig_region_resize_clear(OnigRegion * region,int n)200 onig_region_resize_clear(OnigRegion* region, int n)
201 {
202   int r;
203 
204   r = onig_region_resize(region, n);
205   if (r != 0) return r;
206   onig_region_clear(region);
207   return 0;
208 }
209 
210 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)211 onig_region_set(OnigRegion* region, int at, int beg, int end)
212 {
213   if (at < 0) return ONIGERR_INVALID_ARGUMENT;
214 
215   if (at >= region->allocated) {
216     int r = onig_region_resize(region, at + 1);
217     if (r < 0) return r;
218   }
219 
220   region->beg[at] = beg;
221   region->end[at] = end;
222   return 0;
223 }
224 
225 extern void
onig_region_init(OnigRegion * region)226 onig_region_init(OnigRegion* region)
227 {
228   region->num_regs     = 0;
229   region->allocated    = 0;
230   region->beg          = (int* )0;
231   region->end          = (int* )0;
232   region->history_root = (OnigCaptureTreeNode* )0;
233 }
234 
235 extern OnigRegion*
onig_region_new(void)236 onig_region_new(void)
237 {
238   OnigRegion* r;
239 
240   r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
241   onig_region_init(r);
242   return r;
243 }
244 
245 extern void
onig_region_free(OnigRegion * r,int free_self)246 onig_region_free(OnigRegion* r, int free_self)
247 {
248   if (r) {
249     if (r->allocated > 0) {
250       if (r->beg) xfree(r->beg);
251       if (r->end) xfree(r->end);
252       r->allocated = 0;
253     }
254 #ifdef USE_CAPTURE_HISTORY
255     history_root_free(r);
256 #endif
257     if (free_self) xfree(r);
258   }
259 }
260 
261 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)262 onig_region_copy(OnigRegion* to, OnigRegion* from)
263 {
264 #define RREGC_SIZE   (sizeof(int) * from->num_regs)
265   int i;
266 
267   if (to == from) return;
268 
269   if (to->allocated == 0) {
270     if (from->num_regs > 0) {
271       to->beg = (int* )xmalloc(RREGC_SIZE);
272       to->end = (int* )xmalloc(RREGC_SIZE);
273       to->allocated = from->num_regs;
274     }
275   }
276   else if (to->allocated < from->num_regs) {
277     to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
278     to->end = (int* )xrealloc(to->end, RREGC_SIZE);
279     to->allocated = from->num_regs;
280   }
281 
282   for (i = 0; i < from->num_regs; i++) {
283     to->beg[i] = from->beg[i];
284     to->end[i] = from->end[i];
285   }
286   to->num_regs = from->num_regs;
287 
288 #ifdef USE_CAPTURE_HISTORY
289   history_root_free(to);
290 
291   if (IS_NOT_NULL(from->history_root)) {
292     to->history_root = history_tree_clone(from->history_root);
293   }
294 #endif
295 }
296 
297 
298 /** stack **/
299 #define INVALID_STACK_INDEX   -1
300 typedef long StackIndex;
301 
302 typedef struct _StackType {
303   unsigned int type;
304   union {
305     struct {
306       UChar *pcode;      /* byte code position */
307       UChar *pstr;       /* string position */
308       UChar *pstr_prev;  /* previous char position of pstr */
309 #ifdef USE_COMBINATION_EXPLOSION_CHECK
310       unsigned int state_check;
311 #endif
312     } state;
313     struct {
314       int   count;       /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
315       UChar *pcode;      /* byte code position (head of repeated target) */
316       int   num;         /* repeat id */
317     } repeat;
318     struct {
319       StackIndex si;     /* index of stack */
320     } repeat_inc;
321     struct {
322       int num;           /* memory num */
323       UChar *pstr;       /* start/end position */
324       /* Following information is setted, if this stack type is MEM-START */
325       StackIndex start;  /* prev. info (for backtrack  "(...)*" ) */
326       StackIndex end;    /* prev. info (for backtrack  "(...)*" ) */
327     } mem;
328     struct {
329       int num;           /* null check id */
330       UChar *pstr;       /* start position */
331     } null_check;
332 #ifdef USE_SUBEXP_CALL
333     struct {
334       UChar *ret_addr;   /* byte code position */
335       int    num;        /* null check id */
336       UChar *pstr;       /* string position */
337     } call_frame;
338 #endif
339   } u;
340 } StackType;
341 
342 /* stack type */
343 /* used by normal-POP */
344 #define STK_ALT                    0x0001
345 #define STK_LOOK_BEHIND_NOT        0x0002
346 #define STK_POS_NOT                0x0003
347 /* handled by normal-POP */
348 #define STK_MEM_START              0x0100
349 #define STK_MEM_END                0x8200
350 #define STK_REPEAT_INC             0x0300
351 #define STK_STATE_CHECK_MARK       0x1000
352 /* avoided by normal-POP */
353 #define STK_NULL_CHECK_START       0x3000
354 #define STK_NULL_CHECK_END         0x5000  /* for recursive call */
355 #define STK_MEM_END_MARK           0x8400
356 #define STK_POS                    0x0500  /* used when POP-POS */
357 #define STK_STOP_BT                0x0600  /* mark for "(?>...)" */
358 #define STK_REPEAT                 0x0700
359 #define STK_CALL_FRAME             0x0800
360 #define STK_RETURN                 0x0900
361 #define STK_VOID                   0x0a00  /* for fill a blank */
362 
363 /* stack type check mask */
364 #define STK_MASK_POP_USED          0x00ff
365 #define STK_MASK_TO_VOID_TARGET    0x10ff
366 #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */
367 
368 typedef struct {
369   void* stack_p;
370   int   stack_n;
371   OnigOptionType options;
372   OnigRegion*    region;
373   const UChar* start;   /* search start position (for \G: BEGIN_POSITION) */
374 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
375   int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */
376   UChar* best_s;
377 #endif
378 #ifdef USE_COMBINATION_EXPLOSION_CHECK
379   void* state_check_buff;
380   int   state_check_buff_size;
381 #endif
382 } MatchArg;
383 
384 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
385 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
386   (msa).stack_p  = (void* )0;\
387   (msa).options  = (arg_option);\
388   (msa).region   = (arg_region);\
389   (msa).start    = (arg_start);\
390   (msa).best_len = ONIG_MISMATCH;\
391 } while (0)
392 #else
393 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
394   (msa).stack_p  = (void* )0;\
395   (msa).options  = (arg_option);\
396   (msa).region   = (arg_region);\
397   (msa).start    = (arg_start);\
398 } while (0)
399 #endif
400 
401 #ifdef USE_COMBINATION_EXPLOSION_CHECK
402 
403 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE  16
404 
405 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {	\
406   if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
407     unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
408     offset = ((offset) * (state_num)) >> 3;\
409     if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
410       if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
411         (msa).state_check_buff = (void* )xmalloc(size);\
412       else \
413         (msa).state_check_buff = (void* )xalloca(size);\
414       xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
415               (size_t )(size - (offset))); \
416       (msa).state_check_buff_size = size;\
417     }\
418     else {\
419       (msa).state_check_buff = (void* )0;\
420       (msa).state_check_buff_size = 0;\
421     }\
422   }\
423   else {\
424     (msa).state_check_buff = (void* )0;\
425     (msa).state_check_buff_size = 0;\
426   }\
427 } while (0)
428 
429 #define MATCH_ARG_FREE(msa) do {\
430   if ((msa).stack_p) xfree((msa).stack_p);\
431   if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
432     if ((msa).state_check_buff) xfree((msa).state_check_buff);\
433   }\
434 } while (0);
435 #else
436 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
437 #define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p)
438 #endif
439 
440 
441 
442 #define STACK_INIT(alloc_addr, ptr_num, stack_num)  do {\
443   if (msa->stack_p) {\
444     alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
445     stk_alloc  = (StackType* )(msa->stack_p);\
446     stk_base   = stk_alloc;\
447     stk        = stk_base;\
448     stk_end    = stk_base + msa->stack_n;\
449   }\
450   else {\
451     alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
452 		       + sizeof(StackType) * (stack_num));\
453     stk_alloc  = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
454     stk_base   = stk_alloc;\
455     stk        = stk_base;\
456     stk_end    = stk_base + (stack_num);\
457   }\
458 } while(0)
459 
460 #define STACK_SAVE do{\
461   if (stk_base != stk_alloc) {\
462     msa->stack_p = stk_base;\
463     msa->stack_n = stk_end - stk_base;\
464   };\
465 } while(0)
466 
467 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
468 
469 extern unsigned int
onig_get_match_stack_limit_size(void)470 onig_get_match_stack_limit_size(void)
471 {
472   return MatchStackLimitSize;
473 }
474 
475 extern int
onig_set_match_stack_limit_size(unsigned int size)476 onig_set_match_stack_limit_size(unsigned int size)
477 {
478   MatchStackLimitSize = size;
479   return 0;
480 }
481 
482 static int
stack_double(StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,StackType * stk_alloc,MatchArg * msa)483 stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
484 	     StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
485 {
486   unsigned int n;
487   StackType *x, *stk_base, *stk_end, *stk;
488 
489   stk_base = *arg_stk_base;
490   stk_end  = *arg_stk_end;
491   stk      = *arg_stk;
492 
493   n = stk_end - stk_base;
494   if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
495     x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
496     if (IS_NULL(x)) {
497       STACK_SAVE;
498       return ONIGERR_MEMORY;
499     }
500     xmemcpy(x, stk_base, n * sizeof(StackType));
501     n *= 2;
502   }
503   else {
504     n *= 2;
505     if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
506       if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
507         return ONIGERR_MATCH_STACK_LIMIT_OVER;
508       else
509         n = MatchStackLimitSize;
510     }
511     x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
512     if (IS_NULL(x)) {
513       STACK_SAVE;
514       return ONIGERR_MEMORY;
515     }
516   }
517   *arg_stk      = x + (stk - stk_base);
518   *arg_stk_base = x;
519   *arg_stk_end  = x + n;
520   return 0;
521 }
522 
523 #define STACK_ENSURE(n)	do {\
524   if (stk_end - stk < (n)) {\
525     int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
526     if (r != 0) { STACK_SAVE; return r; } \
527   }\
528 } while(0)
529 
530 #define STACK_AT(index)        (stk_base + (index))
531 #define GET_STACK_INDEX(stk)   ((stk) - stk_base)
532 
533 #define STACK_PUSH_TYPE(stack_type) do {\
534   STACK_ENSURE(1);\
535   stk->type = (stack_type);\
536   STACK_INC;\
537 } while(0)
538 
539 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
540 
541 #ifdef USE_COMBINATION_EXPLOSION_CHECK
542 #define STATE_CHECK_POS(s,snum) \
543   (((s) - str) * num_comb_exp_check + ((snum) - 1))
544 #define STATE_CHECK_VAL(v,snum) do {\
545   if (state_check_buff != NULL) {\
546     int x = STATE_CHECK_POS(s,snum);\
547     (v) = state_check_buff[x/8] & (1<<(x%8));\
548   }\
549   else (v) = 0;\
550 } while(0)
551 
552 
553 #define ELSE_IF_STATE_CHECK_MARK(stk) \
554   else if ((stk)->type == STK_STATE_CHECK_MARK) { \
555     int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
556     state_check_buff[x/8] |= (1<<(x%8));				\
557   }
558 
559 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
560   STACK_ENSURE(1);\
561   stk->type = (stack_type);\
562   stk->u.state.pcode     = (pat);\
563   stk->u.state.pstr      = (s);\
564   stk->u.state.pstr_prev = (sprev);\
565   stk->u.state.state_check = 0;\
566   STACK_INC;\
567 } while(0)
568 
569 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
570   stk->type = (stack_type);\
571   stk->u.state.pcode = (pat);\
572   stk->u.state.state_check = 0;\
573   STACK_INC;\
574 } while(0)
575 
576 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
577   STACK_ENSURE(1);\
578   stk->type = STK_ALT;\
579   stk->u.state.pcode     = (pat);\
580   stk->u.state.pstr      = (s);\
581   stk->u.state.pstr_prev = (sprev);\
582   stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
583   STACK_INC;\
584 } while(0)
585 
586 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
587   if (state_check_buff != NULL) {\
588     STACK_ENSURE(1);\
589     stk->type = STK_STATE_CHECK_MARK;\
590     stk->u.state.pstr = (s);\
591     stk->u.state.state_check = (snum);\
592     STACK_INC;\
593   }\
594 } while(0)
595 
596 #else /* USE_COMBINATION_EXPLOSION_CHECK */
597 
598 #define ELSE_IF_STATE_CHECK_MARK(stk)
599 
600 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
601   STACK_ENSURE(1);\
602   stk->type = (stack_type);\
603   stk->u.state.pcode     = (pat);\
604   stk->u.state.pstr      = (s);\
605   stk->u.state.pstr_prev = (sprev);\
606   STACK_INC;\
607 } while(0)
608 
609 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
610   stk->type = (stack_type);\
611   stk->u.state.pcode = (pat);\
612   STACK_INC;\
613 } while(0)
614 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
615 
616 #define STACK_PUSH_ALT(pat,s,sprev)     STACK_PUSH(STK_ALT,pat,s,sprev)
617 #define STACK_PUSH_POS(s,sprev)         STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
618 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
619 #define STACK_PUSH_STOP_BT              STACK_PUSH_TYPE(STK_STOP_BT)
620 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
621         STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
622 
623 #define STACK_PUSH_REPEAT(id, pat) do {\
624   STACK_ENSURE(1);\
625   stk->type = STK_REPEAT;\
626   stk->u.repeat.num    = (id);\
627   stk->u.repeat.pcode  = (pat);\
628   stk->u.repeat.count  = 0;\
629   STACK_INC;\
630 } while(0)
631 
632 #define STACK_PUSH_REPEAT_INC(sindex) do {\
633   STACK_ENSURE(1);\
634   stk->type = STK_REPEAT_INC;\
635   stk->u.repeat_inc.si  = (sindex);\
636   STACK_INC;\
637 } while(0)
638 
639 #define STACK_PUSH_MEM_START(mnum, s) do {\
640   STACK_ENSURE(1);\
641   stk->type = STK_MEM_START;\
642   stk->u.mem.num      = (mnum);\
643   stk->u.mem.pstr     = (s);\
644   stk->u.mem.start    = mem_start_stk[mnum];\
645   stk->u.mem.end      = mem_end_stk[mnum];\
646   mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
647   mem_end_stk[mnum]   = INVALID_STACK_INDEX;\
648   STACK_INC;\
649 } while(0)
650 
651 #define STACK_PUSH_MEM_END(mnum, s) do {\
652   STACK_ENSURE(1);\
653   stk->type = STK_MEM_END;\
654   stk->u.mem.num    = (mnum);\
655   stk->u.mem.pstr   = (s);\
656   stk->u.mem.start  = mem_start_stk[mnum];\
657   stk->u.mem.end    = mem_end_stk[mnum];\
658   mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
659   STACK_INC;\
660 } while(0)
661 
662 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
663   STACK_ENSURE(1);\
664   stk->type = STK_MEM_END_MARK;\
665   stk->u.mem.num = (mnum);\
666   STACK_INC;\
667 } while(0)
668 
669 #define STACK_GET_MEM_START(mnum, k) do {\
670   int level = 0;\
671   k = stk;\
672   while (k > stk_base) {\
673     k--;\
674     if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
675       && k->u.mem.num == (mnum)) {\
676       level++;\
677     }\
678     else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
679       if (level == 0) break;\
680       level--;\
681     }\
682   }\
683 } while (0)
684 
685 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
686   int level = 0;\
687   while (k < stk) {\
688     if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
689       if (level == 0) (start) = k->u.mem.pstr;\
690       level++;\
691     }\
692     else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
693       level--;\
694       if (level == 0) {\
695         (end) = k->u.mem.pstr;\
696         break;\
697       }\
698     }\
699     k++;\
700   }\
701 } while (0)
702 
703 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
704   STACK_ENSURE(1);\
705   stk->type = STK_NULL_CHECK_START;\
706   stk->u.null_check.num  = (cnum);\
707   stk->u.null_check.pstr = (s);\
708   STACK_INC;\
709 } while(0)
710 
711 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
712   STACK_ENSURE(1);\
713   stk->type = STK_NULL_CHECK_END;\
714   stk->u.null_check.num  = (cnum);\
715   STACK_INC;\
716 } while(0)
717 
718 #define STACK_PUSH_CALL_FRAME(pat) do {\
719   STACK_ENSURE(1);\
720   stk->type = STK_CALL_FRAME;\
721   stk->u.call_frame.ret_addr = (pat);\
722   STACK_INC;\
723 } while(0)
724 
725 #define STACK_PUSH_RETURN do {\
726   STACK_ENSURE(1);\
727   stk->type = STK_RETURN;\
728   STACK_INC;\
729 } while(0)
730 
731 
732 #ifdef ONIG_DEBUG
733 #define STACK_BASE_CHECK(p, at) \
734   if ((p) < stk_base) {\
735     fprintf(stderr, "at %s\n", at);\
736     goto stack_error;\
737   }
738 #else
739 #define STACK_BASE_CHECK(p, at)
740 #endif
741 
742 #define STACK_POP_ONE do {\
743   stk--;\
744   STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
745 } while(0)
746 
747 #define STACK_POP  do {\
748   switch (pop_level) {\
749   case STACK_POP_LEVEL_FREE:\
750     while (1) {\
751       stk--;\
752       STACK_BASE_CHECK(stk, "STACK_POP"); \
753       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
754       ELSE_IF_STATE_CHECK_MARK(stk);\
755     }\
756     break;\
757   case STACK_POP_LEVEL_MEM_START:\
758     while (1) {\
759       stk--;\
760       STACK_BASE_CHECK(stk, "STACK_POP 2"); \
761       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
762       else if (stk->type == STK_MEM_START) {\
763         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
764         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
765       }\
766       ELSE_IF_STATE_CHECK_MARK(stk);\
767     }\
768     break;\
769   default:\
770     while (1) {\
771       stk--;\
772       STACK_BASE_CHECK(stk, "STACK_POP 3"); \
773       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
774       else if (stk->type == STK_MEM_START) {\
775         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
776         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
777       }\
778       else if (stk->type == STK_REPEAT_INC) {\
779         STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
780       }\
781       else if (stk->type == STK_MEM_END) {\
782         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
783         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
784       }\
785       ELSE_IF_STATE_CHECK_MARK(stk);\
786     }\
787     break;\
788   }\
789 } while(0)
790 
791 #define STACK_POP_TIL_POS_NOT  do {\
792   while (1) {\
793     stk--;\
794     STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
795     if (stk->type == STK_POS_NOT) break;\
796     else if (stk->type == STK_MEM_START) {\
797       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
798       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
799     }\
800     else if (stk->type == STK_REPEAT_INC) {\
801       STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
802     }\
803     else if (stk->type == STK_MEM_END) {\
804       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
805       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
806     }\
807     ELSE_IF_STATE_CHECK_MARK(stk);\
808   }\
809 } while(0)
810 
811 #define STACK_POP_TIL_LOOK_BEHIND_NOT  do {\
812   while (1) {\
813     stk--;\
814     STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
815     if (stk->type == STK_LOOK_BEHIND_NOT) break;\
816     else if (stk->type == STK_MEM_START) {\
817       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
818       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
819     }\
820     else if (stk->type == STK_REPEAT_INC) {\
821       STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
822     }\
823     else if (stk->type == STK_MEM_END) {\
824       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
825       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
826     }\
827     ELSE_IF_STATE_CHECK_MARK(stk);\
828   }\
829 } while(0)
830 
831 #define STACK_POS_END(k) do {\
832   k = stk;\
833   while (1) {\
834     k--;\
835     STACK_BASE_CHECK(k, "STACK_POS_END"); \
836     if (IS_TO_VOID_TARGET(k)) {\
837       k->type = STK_VOID;\
838     }\
839     else if (k->type == STK_POS) {\
840       k->type = STK_VOID;\
841       break;\
842     }\
843   }\
844 } while(0)
845 
846 #define STACK_STOP_BT_END do {\
847   StackType *k = stk;\
848   while (1) {\
849     k--;\
850     STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
851     if (IS_TO_VOID_TARGET(k)) {\
852       k->type = STK_VOID;\
853     }\
854     else if (k->type == STK_STOP_BT) {\
855       k->type = STK_VOID;\
856       break;\
857     }\
858   }\
859 } while(0)
860 
861 #define STACK_NULL_CHECK(isnull,id,s) do {\
862   StackType* k = stk;\
863   while (1) {\
864     k--;\
865     STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
866     if (k->type == STK_NULL_CHECK_START) {\
867       if (k->u.null_check.num == (id)) {\
868         (isnull) = (k->u.null_check.pstr == (s));\
869         break;\
870       }\
871     }\
872   }\
873 } while(0)
874 
875 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
876   int level = 0;\
877   StackType* k = stk;\
878   while (1) {\
879     k--;\
880     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
881     if (k->type == STK_NULL_CHECK_START) {\
882       if (k->u.null_check.num == (id)) {\
883         if (level == 0) {\
884           (isnull) = (k->u.null_check.pstr == (s));\
885           break;\
886         }\
887         else level--;\
888       }\
889     }\
890     else if (k->type == STK_NULL_CHECK_END) {\
891       level++;\
892     }\
893   }\
894 } while(0)
895 
896 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
897   StackType* k = stk;\
898   while (1) {\
899     k--;\
900     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
901     if (k->type == STK_NULL_CHECK_START) {\
902       if (k->u.null_check.num == (id)) {\
903         if (k->u.null_check.pstr != (s)) {\
904           (isnull) = 0;\
905           break;\
906         }\
907         else {\
908           UChar* endp;\
909           (isnull) = 1;\
910           while (k < stk) {\
911             if (k->type == STK_MEM_START) {\
912               if (k->u.mem.end == INVALID_STACK_INDEX) {\
913                 (isnull) = 0; break;\
914               }\
915               if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
916                 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
917               else\
918                 endp = (UChar* )k->u.mem.end;\
919               if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
920                 (isnull) = 0; break;\
921               }\
922               else if (endp != s) {\
923                 (isnull) = -1; /* empty, but position changed */ \
924               }\
925             }\
926             k++;\
927           }\
928   	  break;\
929         }\
930       }\
931     }\
932   }\
933 } while(0)
934 
935 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
936   int level = 0;\
937   StackType* k = stk;\
938   while (1) {\
939     k--;\
940     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
941     if (k->type == STK_NULL_CHECK_START) {\
942       if (k->u.null_check.num == (id)) {\
943         if (level == 0) {\
944           if (k->u.null_check.pstr != (s)) {\
945             (isnull) = 0;\
946             break;\
947           }\
948           else {\
949             UChar* endp;\
950             (isnull) = 1;\
951             while (k < stk) {\
952               if (k->type == STK_MEM_START) {\
953                 if (k->u.mem.end == INVALID_STACK_INDEX) {\
954                   (isnull) = 0; break;\
955                 }\
956                 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
957                   endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
958                 else\
959                   endp = (UChar* )k->u.mem.end;\
960                 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
961                   (isnull) = 0; break;\
962                 }\
963                 else if (endp != s) {\
964                   (isnull) = -1; /* empty, but position changed */ \
965                 }\
966               }\
967               k++;\
968             }\
969   	    break;\
970           }\
971         }\
972         else {\
973           level--;\
974         }\
975       }\
976     }\
977     else if (k->type == STK_NULL_CHECK_END) {\
978       if (k->u.null_check.num == (id)) level++;\
979     }\
980   }\
981 } while(0)
982 
983 #define STACK_GET_REPEAT(id, k) do {\
984   int level = 0;\
985   k = stk;\
986   while (1) {\
987     k--;\
988     STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
989     if (k->type == STK_REPEAT) {\
990       if (level == 0) {\
991         if (k->u.repeat.num == (id)) {\
992           break;\
993         }\
994       }\
995     }\
996     else if (k->type == STK_CALL_FRAME) level--;\
997     else if (k->type == STK_RETURN)     level++;\
998   }\
999 } while (0)
1000 
1001 #define STACK_RETURN(addr)  do {\
1002   int level = 0;\
1003   StackType* k = stk;\
1004   while (1) {\
1005     k--;\
1006     STACK_BASE_CHECK(k, "STACK_RETURN"); \
1007     if (k->type == STK_CALL_FRAME) {\
1008       if (level == 0) {\
1009         (addr) = k->u.call_frame.ret_addr;\
1010         break;\
1011       }\
1012       else level--;\
1013     }\
1014     else if (k->type == STK_RETURN)\
1015       level++;\
1016   }\
1017 } while(0)
1018 
1019 
1020 #define STRING_CMP(s1,s2,len) do {\
1021   while (len-- > 0) {\
1022     if (*s1++ != *s2++) goto fail;\
1023   }\
1024 } while(0)
1025 
1026 #define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
1027   if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
1028     goto fail; \
1029 } while(0)
1030 
string_cmp_ic(OnigEncoding enc,int ambig_flag,UChar * s1,UChar ** ps2,int mblen)1031 static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
1032 			 UChar* s1, UChar** ps2, int mblen)
1033 {
1034   UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
1035   UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
1036   UChar *p1, *p2, *end, *s2, *end2;
1037   int len1, len2;
1038 
1039   s2   = *ps2;
1040   end  = s1 + mblen;
1041   end2 = s2 + mblen;
1042   while (s1 < end) {
1043     len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
1044     len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
1045     if (len1 != len2) return 0;
1046     p1 = buf1;
1047     p2 = buf2;
1048     while (len1-- > 0) {
1049       if (*p1 != *p2) return 0;
1050       p1++;
1051       p2++;
1052     }
1053   }
1054 
1055   *ps2 = s2;
1056   return 1;
1057 }
1058 
1059 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1060   is_fail = 0;\
1061   while (len-- > 0) {\
1062     if (*s1++ != *s2++) {\
1063       is_fail = 1; break;\
1064     }\
1065   }\
1066 } while(0)
1067 
1068 #define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
1069   if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
1070     is_fail = 1; \
1071   else \
1072     is_fail = 0; \
1073 } while(0)
1074 
1075 
1076 #define ON_STR_BEGIN(s)  ((s) == str)
1077 #define ON_STR_END(s)    ((s) == end)
1078 #define IS_EMPTY_STR     (str == end)
1079 
1080 #define DATA_ENSURE(n) \
1081   if (s + (n) > end) goto fail
1082 
1083 #define DATA_ENSURE_CHECK(n)   (s + (n) <= end)
1084 
1085 #ifdef USE_CAPTURE_HISTORY
1086 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)1087 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
1088                           StackType* stk_top, UChar* str, regex_t* reg)
1089 {
1090   int n, r;
1091   OnigCaptureTreeNode* child;
1092   StackType* k = *kp;
1093 
1094   while (k < stk_top) {
1095     if (k->type == STK_MEM_START) {
1096       n = k->u.mem.num;
1097       if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1098           BIT_STATUS_AT(reg->capture_history, n) != 0) {
1099         child = history_node_new();
1100         CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
1101         child->group = n;
1102         child->beg = (int )(k->u.mem.pstr - str);
1103         r = history_tree_add_child(node, child);
1104         if (r != 0) return r;
1105         *kp = (k + 1);
1106         r = make_capture_history_tree(child, kp, stk_top, str, reg);
1107         if (r != 0) return r;
1108 
1109         k = *kp;
1110         child->end = (int )(k->u.mem.pstr - str);
1111       }
1112     }
1113     else if (k->type == STK_MEM_END) {
1114       if (k->u.mem.num == node->group) {
1115         node->end = (int )(k->u.mem.pstr - str);
1116         *kp = k;
1117         return 0;
1118       }
1119     }
1120     k++;
1121   }
1122 
1123   return 1; /* 1: root node ending. */
1124 }
1125 #endif
1126 
1127 #ifdef USE_BACKREF_AT_LEVEL
mem_is_in_memp(int mem,int num,UChar * memp)1128 static int mem_is_in_memp(int mem, int num, UChar* memp)
1129 {
1130   int i;
1131   MemNumType m;
1132 
1133   for (i = 0; i < num; i++) {
1134     GET_MEMNUM_INC(m, memp);
1135     if (mem == (int )m) return 1;
1136   }
1137   return 0;
1138 }
1139 
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int ambig_flag,int nest,int mem_num,UChar * memp,UChar ** s,const UChar * send)1140 static int backref_match_at_nested_level(regex_t* reg
1141 	 , StackType* top, StackType* stk_base
1142 	 , int ignore_case, int ambig_flag
1143 	 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1144 {
1145   UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1146   int level;
1147   StackType* k;
1148 
1149   level = 0;
1150   k = top;
1151   k--;
1152   while (k >= stk_base) {
1153     if (k->type == STK_CALL_FRAME) {
1154       level--;
1155     }
1156     else if (k->type == STK_RETURN) {
1157       level++;
1158     }
1159     else if (level == nest) {
1160       if (k->type == STK_MEM_START) {
1161 	if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1162 	  pstart = k->u.mem.pstr;
1163 	  if (pend != NULL_UCHARP) {
1164 	    if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1165 	    p  = pstart;
1166 	    ss = *s;
1167 
1168 	    if (ignore_case != 0) {
1169 	      if (string_cmp_ic(reg->enc, ambig_flag,
1170 				pstart, &ss, (int )(pend - pstart)) == 0)
1171 		return 0; /* or goto next_mem; */
1172 	    }
1173 	    else {
1174 	      while (p < pend) {
1175 		if (*p++ != *ss++) return 0; /* or goto next_mem; */
1176 	      }
1177 	    }
1178 
1179 	    *s = ss;
1180 	    return 1;
1181 	  }
1182 	}
1183       }
1184       else if (k->type == STK_MEM_END) {
1185 	if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1186 	  pend = k->u.mem.pstr;
1187 	}
1188       }
1189     }
1190     k--;
1191   }
1192 
1193   return 0;
1194 }
1195 #endif /* USE_BACKREF_AT_LEVEL */
1196 
1197 
1198 #ifdef RUBY_PLATFORM
1199 
1200 typedef struct {
1201   int state;
1202   regex_t*  reg;
1203   MatchArg* msa;
1204   StackType* stk_base;
1205 } TrapEnsureArg;
1206 
1207 static VALUE
trap_ensure(VALUE arg)1208 trap_ensure(VALUE arg)
1209 {
1210   TrapEnsureArg* ta = (TrapEnsureArg* )arg;
1211 
1212   if (ta->state == 0) { /* trap_exec() is not normal return */
1213     ONIG_STATE_DEC_THREAD(ta->reg);
1214     if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
1215       xfree(ta->stk_base);
1216 
1217     MATCH_ARG_FREE(*(ta->msa));
1218   }
1219 
1220   return Qnil;
1221 }
1222 
1223 static VALUE
trap_exec(VALUE arg)1224 trap_exec(VALUE arg)
1225 {
1226   TrapEnsureArg* ta;
1227 
1228   rb_trap_exec();
1229 
1230   ta = (TrapEnsureArg* )arg;
1231   ta->state = 1; /* normal return */
1232   return Qnil;
1233 }
1234 
1235 extern void
onig_exec_trap(regex_t * reg,MatchArg * msa,StackType * stk_base)1236 onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
1237 {
1238   VALUE arg;
1239   TrapEnsureArg ta;
1240 
1241   ta.state    = 0;
1242   ta.reg      = reg;
1243   ta.msa      = msa;
1244   ta.stk_base = stk_base;
1245   arg = (VALUE )(&ta);
1246   rb_ensure(trap_exec, arg, trap_ensure, arg);
1247 }
1248 
1249 #define CHECK_INTERRUPT_IN_MATCH_AT do {\
1250   if (rb_trap_pending) {\
1251     if (! rb_prohibit_interrupt) {\
1252       onig_exec_trap(reg, msa, stk_base);\
1253     }\
1254   }\
1255 } while (0)
1256 #else
1257 #define CHECK_INTERRUPT_IN_MATCH_AT
1258 #endif /* RUBY_PLATFORM */
1259 
1260 #ifdef ONIG_DEBUG_STATISTICS
1261 
1262 #define USE_TIMEOFDAY
1263 
1264 #ifdef USE_TIMEOFDAY
1265 #ifdef HAVE_SYS_TIME_H
1266 #include <sys/time.h>
1267 #endif
1268 #ifdef HAVE_UNISTD_H
1269 #include <unistd.h>
1270 #endif
1271 static struct timeval ts, te;
1272 #define GETTIME(t)        gettimeofday(&(t), (struct timezone* )0)
1273 #define TIMEDIFF(te,ts)   (((te).tv_usec - (ts).tv_usec) + \
1274                            (((te).tv_sec - (ts).tv_sec)*1000000))
1275 #else
1276 #ifdef HAVE_SYS_TIMES_H
1277 #include <sys/times.h>
1278 #endif
1279 static struct tms ts, te;
1280 #define GETTIME(t)         times(&(t))
1281 #define TIMEDIFF(te,ts)   ((te).tms_utime - (ts).tms_utime)
1282 #endif
1283 
1284 static int OpCounter[256];
1285 static int OpPrevCounter[256];
1286 static unsigned long OpTime[256];
1287 static int OpCurr = OP_FINISH;
1288 static int OpPrevTarget = OP_FAIL;
1289 static int MaxStackDepth = 0;
1290 
1291 #define STAT_OP_IN(opcode) do {\
1292   if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1293   OpCurr = opcode;\
1294   OpCounter[opcode]++;\
1295   GETTIME(ts);\
1296 } while (0)
1297 
1298 #define STAT_OP_OUT do {\
1299   GETTIME(te);\
1300   OpTime[OpCurr] += TIMEDIFF(te, ts);\
1301 } while (0)
1302 
1303 #ifdef RUBY_PLATFORM
1304 
1305 /*
1306  * :nodoc:
1307  */
onig_stat_print(void)1308 static VALUE onig_stat_print(void)
1309 {
1310   onig_print_statistics(stderr);
1311   return Qnil;
1312 }
1313 #endif
1314 
onig_statistics_init(void)1315 extern void onig_statistics_init(void)
1316 {
1317   int i;
1318   for (i = 0; i < 256; i++) {
1319     OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1320   }
1321   MaxStackDepth = 0;
1322 
1323 #ifdef RUBY_PLATFORM
1324   rb_define_global_function("onig_stat_print", onig_stat_print, 0);
1325 #endif
1326 }
1327 
1328 extern void
onig_print_statistics(FILE * f)1329 onig_print_statistics(FILE* f)
1330 {
1331   int i;
1332   fprintf(f, "   count      prev        time\n");
1333   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1334     fprintf(f, "%8d: %8d: %10ld: %s\n",
1335 	    OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1336   }
1337   fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1338 }
1339 
1340 #define STACK_INC do {\
1341   stk++;\
1342   if (stk - stk_base > MaxStackDepth) \
1343     MaxStackDepth = stk - stk_base;\
1344 } while (0)
1345 
1346 #else
1347 #define STACK_INC     stk++
1348 
1349 #define STAT_OP_IN(opcode)
1350 #define STAT_OP_OUT
1351 #endif
1352 
1353 extern int
onig_is_in_code_range(const UChar * p,OnigCodePoint code)1354 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
1355 {
1356   OnigCodePoint n, *data;
1357   OnigCodePoint low, high, x;
1358 
1359   GET_CODE_POINT(n, p);
1360   data = (OnigCodePoint* )p;
1361   data++;
1362 
1363   for (low = 0, high = n; low < high; ) {
1364     x = (low + high) >> 1;
1365     if (code > data[x * 2 + 1])
1366       low = x + 1;
1367     else
1368       high = x;
1369   }
1370 
1371   return ((low < n && code >= data[low * 2]) ? 1 : 0);
1372 }
1373 
1374 static int
is_code_in_cc(int enclen,OnigCodePoint code,CClassNode * cc)1375 is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
1376 {
1377   int found;
1378 
1379   if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
1380     if (IS_NULL(cc->mbuf)) {
1381       found = 0;
1382     }
1383     else {
1384       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
1385     }
1386   }
1387   else {
1388     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
1389   }
1390 
1391   if (IS_CCLASS_NOT(cc))
1392     return !found;
1393   else
1394     return found;
1395 }
1396 
1397 extern int
onig_is_code_in_cc(OnigEncoding enc,OnigCodePoint code,CClassNode * cc)1398 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
1399 {
1400   int len;
1401 
1402   if (ONIGENC_MBC_MINLEN(enc) > 1) {
1403     len = 2;
1404   }
1405   else {
1406     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
1407   }
1408   return is_code_in_cc(len, code, cc);
1409 }
1410 
1411 
1412 /* matching region of POSIX API */
1413 typedef int regoff_t;
1414 
1415 typedef struct {
1416   regoff_t  rm_so;
1417   regoff_t  rm_eo;
1418 } posix_regmatch_t;
1419 
1420 /* match data(str - end) from position (sstart). */
1421 /* if sstart == str then set sprev to NULL. */
1422 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * sstart,UChar * sprev,MatchArg * msa)1423 match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
1424 	 UChar* sprev, MatchArg* msa)
1425 {
1426   static UChar FinishCode[] = { OP_FINISH };
1427 
1428   int i, n, num_mem, best_len, pop_level;
1429   LengthType tlen, tlen2;
1430   MemNumType mem;
1431   RelAddrType addr;
1432   OnigOptionType option = reg->options;
1433   OnigEncoding encode = reg->enc;
1434   OnigAmbigType ambig_flag = reg->ambig_flag;
1435   UChar *s, *q, *sbegin;
1436   UChar *p = reg->p;
1437   char *alloca_base;
1438   StackType *stk_alloc, *stk_base, *stk, *stk_end;
1439   StackType *stkp; /* used as any purpose. */
1440   StackIndex si;
1441   StackIndex *repeat_stk;
1442   StackIndex *mem_start_stk, *mem_end_stk;
1443 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1444   int scv;
1445   unsigned char* state_check_buff = msa->state_check_buff;
1446   int num_comb_exp_check = reg->num_comb_exp_check;
1447 #endif
1448   n = reg->num_repeat + reg->num_mem * 2;
1449 
1450   STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1451   pop_level = reg->stack_pop_level;
1452   num_mem = reg->num_mem;
1453   repeat_stk = (StackIndex* )alloca_base;
1454 
1455   mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
1456   mem_end_stk   = mem_start_stk + num_mem;
1457   mem_start_stk--; /* for index start from 1,
1458 		      mem_start_stk[1]..mem_start_stk[num_mem] */
1459   mem_end_stk--;   /* for index start from 1,
1460 		      mem_end_stk[1]..mem_end_stk[num_mem] */
1461   for (i = 1; i <= num_mem; i++) {
1462     mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1463   }
1464 
1465 #ifdef ONIG_DEBUG_MATCH
1466   fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1467 	  (int )str, (int )end, (int )sstart, (int )sprev);
1468   fprintf(stderr, "size: %d, start offset: %d\n",
1469 	  (int )(end - str), (int )(sstart - str));
1470 #endif
1471 
1472   STACK_PUSH_ENSURED(STK_ALT, FinishCode);  /* bottom stack */
1473   best_len = ONIG_MISMATCH;
1474   s = (UChar* )sstart;
1475   while (1) {
1476 #ifdef ONIG_DEBUG_MATCH
1477     {
1478       UChar *q, *bp, buf[50];
1479       int len;
1480       fprintf(stderr, "%4d> \"", (int )(s - str));
1481       bp = buf;
1482       for (i = 0, q = s; i < 7 && q < end; i++) {
1483 	len = enc_len(encode, q);
1484 	while (len-- > 0) *bp++ = *q++;
1485       }
1486       if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1487       else         { xmemcpy(bp, "\"",    1); bp += 1; }
1488       *bp = 0;
1489       fputs(buf, stderr);
1490       for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1491       onig_print_compiled_byte_code(stderr, p, NULL, encode);
1492       fprintf(stderr, "\n");
1493     }
1494 #endif
1495 
1496     sbegin = s;
1497     switch (*p++) {
1498     case OP_END:  STAT_OP_IN(OP_END);
1499       n = s - sstart;
1500       if (n > best_len) {
1501 	OnigRegion* region;
1502 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1503 	if (IS_FIND_LONGEST(option)) {
1504 	  if (n > msa->best_len) {
1505 	    msa->best_len = n;
1506 	    msa->best_s   = (UChar* )sstart;
1507 	  }
1508 	  else
1509 	    goto end_best_len;
1510         }
1511 #endif
1512 	best_len = n;
1513 	region = msa->region;
1514 	if (region) {
1515 #ifdef USE_POSIX_REGION_OPTION
1516 	  if (IS_POSIX_REGION(msa->options)) {
1517 	    posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1518 
1519 	    rmt[0].rm_so = sstart - str;
1520 	    rmt[0].rm_eo = s      - str;
1521 	    for (i = 1; i <= num_mem; i++) {
1522 	      if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1523 		if (BIT_STATUS_AT(reg->bt_mem_start, i))
1524 		  rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1525 		else
1526 		  rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1527 
1528 		rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1529 				? STACK_AT(mem_end_stk[i])->u.mem.pstr
1530 				: (UChar* )((void* )mem_end_stk[i])) - str;
1531 	      }
1532 	      else {
1533 		rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1534 	      }
1535 	    }
1536 	  }
1537 	  else {
1538 #endif /* USE_POSIX_REGION_OPTION */
1539 	    region->beg[0] = sstart - str;
1540 	    region->end[0] = s      - str;
1541 	    for (i = 1; i <= num_mem; i++) {
1542 	      if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1543 		if (BIT_STATUS_AT(reg->bt_mem_start, i))
1544 		  region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1545 		else
1546 		  region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1547 
1548 		region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1549 				  ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1550 				  : (UChar* )((void* )mem_end_stk[i])) - str;
1551 	      }
1552 	      else {
1553 		region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1554 	      }
1555 	    }
1556 
1557 #ifdef USE_CAPTURE_HISTORY
1558 	    if (reg->capture_history != 0) {
1559               int r;
1560               OnigCaptureTreeNode* node;
1561 
1562               if (IS_NULL(region->history_root)) {
1563                 region->history_root = node = history_node_new();
1564                 CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
1565               }
1566               else {
1567                 node = region->history_root;
1568                 history_tree_clear(node);
1569               }
1570 
1571               node->group = 0;
1572               node->beg   = sstart - str;
1573               node->end   = s      - str;
1574 
1575               stkp = stk_base;
1576               r = make_capture_history_tree(region->history_root, &stkp,
1577                                             stk, (UChar* )str, reg);
1578               if (r < 0) {
1579                 best_len = r; /* error code */
1580                 goto finish;
1581               }
1582 	    }
1583 #endif /* USE_CAPTURE_HISTORY */
1584 #ifdef USE_POSIX_REGION_OPTION
1585 	  } /* else IS_POSIX_REGION() */
1586 #endif
1587 	} /* if (region) */
1588       } /* n > best_len */
1589 
1590 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1591     end_best_len:
1592 #endif
1593       STAT_OP_OUT;
1594 
1595       if (IS_FIND_CONDITION(option)) {
1596 	if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1597 	  best_len = ONIG_MISMATCH;
1598 	  goto fail; /* for retry */
1599 	}
1600 	if (IS_FIND_LONGEST(option) && s < end) {
1601 	  goto fail; /* for retry */
1602 	}
1603       }
1604 
1605       /* default behavior: return first-matching result. */
1606       goto finish;
1607       break;
1608 
1609     case OP_EXACT1:  STAT_OP_IN(OP_EXACT1);
1610 #if 0
1611       DATA_ENSURE(1);
1612       if (*p != *s) goto fail;
1613       p++; s++;
1614 #endif
1615       if (*p != *s++) goto fail;
1616       DATA_ENSURE(0);
1617       p++;
1618       STAT_OP_OUT;
1619       break;
1620 
1621     case OP_EXACT1_IC:  STAT_OP_IN(OP_EXACT1_IC);
1622       {
1623 	int len;
1624 	UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
1625 
1626 	DATA_ENSURE(1);
1627         ss = s;
1628         sp = p;
1629 
1630 	len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
1631 	DATA_ENSURE(0);
1632 	q = lowbuf;
1633 	while (len-- > 0) {
1634 	  if (*p != *q) {
1635             goto fail;
1636           }
1637 	  p++; q++;
1638 	}
1639       }
1640       STAT_OP_OUT;
1641       break;
1642 
1643     case OP_EXACT2:  STAT_OP_IN(OP_EXACT2);
1644       DATA_ENSURE(2);
1645       if (*p != *s) goto fail;
1646       p++; s++;
1647       if (*p != *s) goto fail;
1648       sprev = s;
1649       p++; s++;
1650       STAT_OP_OUT;
1651       continue;
1652       break;
1653 
1654     case OP_EXACT3:  STAT_OP_IN(OP_EXACT3);
1655       DATA_ENSURE(3);
1656       if (*p != *s) goto fail;
1657       p++; s++;
1658       if (*p != *s) goto fail;
1659       p++; s++;
1660       if (*p != *s) goto fail;
1661       sprev = s;
1662       p++; s++;
1663       STAT_OP_OUT;
1664       continue;
1665       break;
1666 
1667     case OP_EXACT4:  STAT_OP_IN(OP_EXACT4);
1668       DATA_ENSURE(4);
1669       if (*p != *s) goto fail;
1670       p++; s++;
1671       if (*p != *s) goto fail;
1672       p++; s++;
1673       if (*p != *s) goto fail;
1674       p++; s++;
1675       if (*p != *s) goto fail;
1676       sprev = s;
1677       p++; s++;
1678       STAT_OP_OUT;
1679       continue;
1680       break;
1681 
1682     case OP_EXACT5:  STAT_OP_IN(OP_EXACT5);
1683       DATA_ENSURE(5);
1684       if (*p != *s) goto fail;
1685       p++; s++;
1686       if (*p != *s) goto fail;
1687       p++; s++;
1688       if (*p != *s) goto fail;
1689       p++; s++;
1690       if (*p != *s) goto fail;
1691       p++; s++;
1692       if (*p != *s) goto fail;
1693       sprev = s;
1694       p++; s++;
1695       STAT_OP_OUT;
1696       continue;
1697       break;
1698 
1699     case OP_EXACTN:  STAT_OP_IN(OP_EXACTN);
1700       GET_LENGTH_INC(tlen, p);
1701       DATA_ENSURE(tlen);
1702       while (tlen-- > 0) {
1703 	if (*p++ != *s++) goto fail;
1704       }
1705       sprev = s - 1;
1706       STAT_OP_OUT;
1707       continue;
1708       break;
1709 
1710     case OP_EXACTN_IC:  STAT_OP_IN(OP_EXACTN_IC);
1711       {
1712 	int len;
1713 	UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
1714 
1715 	GET_LENGTH_INC(tlen, p);
1716 	endp = p + tlen;
1717 
1718 	while (p < endp) {
1719 	  sprev = s;
1720 	  DATA_ENSURE(1);
1721           ss = s;
1722           sp = p;
1723 
1724 	  len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
1725 	  DATA_ENSURE(0);
1726 	  q = lowbuf;
1727 	  while (len-- > 0) {
1728 	    if (*p != *q) {
1729               goto fail;
1730             }
1731 	    p++; q++;
1732 	  }
1733 	}
1734       }
1735 
1736       STAT_OP_OUT;
1737       continue;
1738       break;
1739 
1740     case OP_EXACTMB2N1:  STAT_OP_IN(OP_EXACTMB2N1);
1741       DATA_ENSURE(2);
1742       if (*p != *s) goto fail;
1743       p++; s++;
1744       if (*p != *s) goto fail;
1745       p++; s++;
1746       STAT_OP_OUT;
1747       break;
1748 
1749     case OP_EXACTMB2N2:  STAT_OP_IN(OP_EXACTMB2N2);
1750       DATA_ENSURE(4);
1751       if (*p != *s) goto fail;
1752       p++; s++;
1753       if (*p != *s) goto fail;
1754       p++; s++;
1755       sprev = s;
1756       if (*p != *s) goto fail;
1757       p++; s++;
1758       if (*p != *s) goto fail;
1759       p++; s++;
1760       STAT_OP_OUT;
1761       continue;
1762       break;
1763 
1764     case OP_EXACTMB2N3:  STAT_OP_IN(OP_EXACTMB2N3);
1765       DATA_ENSURE(6);
1766       if (*p != *s) goto fail;
1767       p++; s++;
1768       if (*p != *s) goto fail;
1769       p++; s++;
1770       if (*p != *s) goto fail;
1771       p++; s++;
1772       if (*p != *s) goto fail;
1773       p++; s++;
1774       sprev = s;
1775       if (*p != *s) goto fail;
1776       p++; s++;
1777       if (*p != *s) goto fail;
1778       p++; s++;
1779       STAT_OP_OUT;
1780       continue;
1781       break;
1782 
1783     case OP_EXACTMB2N:  STAT_OP_IN(OP_EXACTMB2N);
1784       GET_LENGTH_INC(tlen, p);
1785       DATA_ENSURE(tlen * 2);
1786       while (tlen-- > 0) {
1787 	if (*p != *s) goto fail;
1788 	p++; s++;
1789 	if (*p != *s) goto fail;
1790 	p++; s++;
1791       }
1792       sprev = s - 2;
1793       STAT_OP_OUT;
1794       continue;
1795       break;
1796 
1797     case OP_EXACTMB3N:  STAT_OP_IN(OP_EXACTMB3N);
1798       GET_LENGTH_INC(tlen, p);
1799       DATA_ENSURE(tlen * 3);
1800       while (tlen-- > 0) {
1801 	if (*p != *s) goto fail;
1802 	p++; s++;
1803 	if (*p != *s) goto fail;
1804 	p++; s++;
1805 	if (*p != *s) goto fail;
1806 	p++; s++;
1807       }
1808       sprev = s - 3;
1809       STAT_OP_OUT;
1810       continue;
1811       break;
1812 
1813     case OP_EXACTMBN:  STAT_OP_IN(OP_EXACTMBN);
1814       GET_LENGTH_INC(tlen,  p);  /* mb-len */
1815       GET_LENGTH_INC(tlen2, p);  /* string len */
1816       tlen2 *= tlen;
1817       DATA_ENSURE(tlen2);
1818       while (tlen2-- > 0) {
1819 	if (*p != *s) goto fail;
1820 	p++; s++;
1821       }
1822       sprev = s - tlen;
1823       STAT_OP_OUT;
1824       continue;
1825       break;
1826 
1827     case OP_CCLASS:  STAT_OP_IN(OP_CCLASS);
1828       DATA_ENSURE(1);
1829       if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1830       p += SIZE_BITSET;
1831       s += enc_len(encode, s);   /* OP_CCLASS can match mb-code. \D, \S */
1832       STAT_OP_OUT;
1833       break;
1834 
1835     case OP_CCLASS_MB:  STAT_OP_IN(OP_CCLASS_MB);
1836       if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1837 
1838     cclass_mb:
1839       GET_LENGTH_INC(tlen, p);
1840       {
1841 	OnigCodePoint code;
1842 	UChar *ss;
1843 	int mb_len;
1844 
1845 	DATA_ENSURE(1);
1846 	mb_len = enc_len(encode, s);
1847 	DATA_ENSURE(mb_len);
1848 	ss = s;
1849 	s += mb_len;
1850 	code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1851 
1852 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1853 	if (! onig_is_in_code_range(p, code)) goto fail;
1854 #else
1855 	q = p;
1856 	ALIGNMENT_RIGHT(q);
1857 	if (! onig_is_in_code_range(q, code)) goto fail;
1858 #endif
1859       }
1860       p += tlen;
1861       STAT_OP_OUT;
1862       break;
1863 
1864     case OP_CCLASS_MIX:  STAT_OP_IN(OP_CCLASS_MIX);
1865       DATA_ENSURE(1);
1866       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1867 	p += SIZE_BITSET;
1868 	goto cclass_mb;
1869       }
1870       else {
1871 	if (BITSET_AT(((BitSetRef )p), *s) == 0)
1872 	  goto fail;
1873 
1874 	p += SIZE_BITSET;
1875 	GET_LENGTH_INC(tlen, p);
1876 	p += tlen;
1877 	s++;
1878       }
1879       STAT_OP_OUT;
1880       break;
1881 
1882     case OP_CCLASS_NOT:  STAT_OP_IN(OP_CCLASS_NOT);
1883       DATA_ENSURE(1);
1884       if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1885       p += SIZE_BITSET;
1886       s += enc_len(encode, s);
1887       STAT_OP_OUT;
1888       break;
1889 
1890     case OP_CCLASS_MB_NOT:  STAT_OP_IN(OP_CCLASS_MB_NOT);
1891       DATA_ENSURE(1);
1892       if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1893 	s++;
1894 	GET_LENGTH_INC(tlen, p);
1895 	p += tlen;
1896 	goto cc_mb_not_success;
1897       }
1898 
1899     cclass_mb_not:
1900       GET_LENGTH_INC(tlen, p);
1901       {
1902 	OnigCodePoint code;
1903 	UChar *ss;
1904 	int mb_len = enc_len(encode, s);
1905 
1906 	if (s + mb_len > end) {
1907           DATA_ENSURE(1);
1908 	  s = (UChar* )end;
1909 	  p += tlen;
1910 	  goto cc_mb_not_success;
1911 	}
1912 
1913 	ss = s;
1914 	s += mb_len;
1915 	code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1916 
1917 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1918 	if (onig_is_in_code_range(p, code)) goto fail;
1919 #else
1920 	q = p;
1921 	ALIGNMENT_RIGHT(q);
1922 	if (onig_is_in_code_range(q, code)) goto fail;
1923 #endif
1924       }
1925       p += tlen;
1926 
1927     cc_mb_not_success:
1928       STAT_OP_OUT;
1929       break;
1930 
1931     case OP_CCLASS_MIX_NOT:  STAT_OP_IN(OP_CCLASS_MIX_NOT);
1932       DATA_ENSURE(1);
1933       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1934 	p += SIZE_BITSET;
1935 	goto cclass_mb_not;
1936       }
1937       else {
1938 	if (BITSET_AT(((BitSetRef )p), *s) != 0)
1939 	  goto fail;
1940 
1941 	p += SIZE_BITSET;
1942 	GET_LENGTH_INC(tlen, p);
1943 	p += tlen;
1944 	s++;
1945       }
1946       STAT_OP_OUT;
1947       break;
1948 
1949     case OP_CCLASS_NODE:  STAT_OP_IN(OP_CCLASS_NODE);
1950       {
1951 	OnigCodePoint code;
1952         void *node;
1953         int mb_len;
1954         UChar *ss;
1955 
1956         DATA_ENSURE(1);
1957         GET_POINTER_INC(node, p);
1958 	mb_len = enc_len(encode, s);
1959 	ss = s;
1960 	s += mb_len;
1961 	DATA_ENSURE(0);
1962 	code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1963 	if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
1964       }
1965       STAT_OP_OUT;
1966       break;
1967 
1968     case OP_ANYCHAR:  STAT_OP_IN(OP_ANYCHAR);
1969       DATA_ENSURE(1);
1970       n = enc_len(encode, s);
1971       DATA_ENSURE(n);
1972       if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1973       s += n;
1974       STAT_OP_OUT;
1975       break;
1976 
1977     case OP_ANYCHAR_ML:  STAT_OP_IN(OP_ANYCHAR_ML);
1978       DATA_ENSURE(1);
1979       n = enc_len(encode, s);
1980       DATA_ENSURE(n);
1981       s += n;
1982       STAT_OP_OUT;
1983       break;
1984 
1985     case OP_ANYCHAR_STAR:  STAT_OP_IN(OP_ANYCHAR_STAR);
1986       while (s < end) {
1987 	STACK_PUSH_ALT(p, s, sprev);
1988 	n = enc_len(encode, s);
1989         DATA_ENSURE(n);
1990         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
1991         sprev = s;
1992         s += n;
1993       }
1994       STAT_OP_OUT;
1995       break;
1996 
1997     case OP_ANYCHAR_ML_STAR:  STAT_OP_IN(OP_ANYCHAR_ML_STAR);
1998       while (s < end) {
1999 	STACK_PUSH_ALT(p, s, sprev);
2000 	n = enc_len(encode, s);
2001 	if (n > 1) {
2002 	  DATA_ENSURE(n);
2003 	  sprev = s;
2004 	  s += n;
2005 	}
2006 	else {
2007 	  sprev = s;
2008 	  s++;
2009 	}
2010       }
2011       STAT_OP_OUT;
2012       break;
2013 
2014     case OP_ANYCHAR_STAR_PEEK_NEXT:  STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2015       while (s < end) {
2016 	if (*p == *s) {
2017 	  STACK_PUSH_ALT(p + 1, s, sprev);
2018 	}
2019 	n = enc_len(encode, s);
2020         DATA_ENSURE(n);
2021         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
2022         sprev = s;
2023         s += n;
2024       }
2025       p++;
2026       STAT_OP_OUT;
2027       break;
2028 
2029     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2030       while (s < end) {
2031 	if (*p == *s) {
2032 	  STACK_PUSH_ALT(p + 1, s, sprev);
2033 	}
2034 	n = enc_len(encode, s);
2035 	if (n >1) {
2036 	  DATA_ENSURE(n);
2037 	  sprev = s;
2038 	  s += n;
2039 	}
2040 	else {
2041 	  sprev = s;
2042 	  s++;
2043 	}
2044       }
2045       p++;
2046       STAT_OP_OUT;
2047       break;
2048 
2049 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2050     case OP_STATE_CHECK_ANYCHAR_STAR:  STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2051       GET_STATE_CHECK_NUM_INC(mem, p);
2052       while (s < end) {
2053 	STATE_CHECK_VAL(scv, mem);
2054 	if (scv) goto fail;
2055 
2056 	STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
2057 	n = enc_len(encode, s);
2058         DATA_ENSURE(n);
2059         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
2060         sprev = s;
2061         s += n;
2062       }
2063       STAT_OP_OUT;
2064       break;
2065 
2066     case OP_STATE_CHECK_ANYCHAR_ML_STAR:
2067       STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2068 
2069       GET_STATE_CHECK_NUM_INC(mem, p);
2070       while (s < end) {
2071 	STATE_CHECK_VAL(scv, mem);
2072 	if (scv) goto fail;
2073 
2074 	STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
2075 	n = enc_len(encode, s);
2076 	if (n > 1) {
2077 	  DATA_ENSURE(n);
2078 	  sprev = s;
2079 	  s += n;
2080 	}
2081 	else {
2082 	  sprev = s;
2083 	  s++;
2084 	}
2085       }
2086       STAT_OP_OUT;
2087       break;
2088 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2089 
2090     case OP_WORD:  STAT_OP_IN(OP_WORD);
2091       DATA_ENSURE(1);
2092       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2093 	goto fail;
2094 
2095       s += enc_len(encode, s);
2096       STAT_OP_OUT;
2097       break;
2098 
2099     case OP_NOT_WORD:  STAT_OP_IN(OP_NOT_WORD);
2100       DATA_ENSURE(1);
2101       if (ONIGENC_IS_MBC_WORD(encode, s, end))
2102 	goto fail;
2103 
2104       s += enc_len(encode, s);
2105       STAT_OP_OUT;
2106       break;
2107 
2108     case OP_WORD_BOUND:  STAT_OP_IN(OP_WORD_BOUND);
2109       if (ON_STR_BEGIN(s)) {
2110 	DATA_ENSURE(1);
2111 	if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2112 	  goto fail;
2113       }
2114       else if (ON_STR_END(s)) {
2115 	if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2116 	  goto fail;
2117       }
2118       else {
2119 	if (ONIGENC_IS_MBC_WORD(encode, s, end)
2120 	    == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2121 	  goto fail;
2122       }
2123       STAT_OP_OUT;
2124       continue;
2125       break;
2126 
2127     case OP_NOT_WORD_BOUND:  STAT_OP_IN(OP_NOT_WORD_BOUND);
2128       if (ON_STR_BEGIN(s)) {
2129 	if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
2130 	  goto fail;
2131       }
2132       else if (ON_STR_END(s)) {
2133 	if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2134 	  goto fail;
2135       }
2136       else {
2137 	if (ONIGENC_IS_MBC_WORD(encode, s, end)
2138 	    != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2139 	  goto fail;
2140       }
2141       STAT_OP_OUT;
2142       continue;
2143       break;
2144 
2145 #ifdef USE_WORD_BEGIN_END
2146     case OP_WORD_BEGIN:  STAT_OP_IN(OP_WORD_BEGIN);
2147       if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2148 	if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2149 	  STAT_OP_OUT;
2150 	  continue;
2151 	}
2152       }
2153       goto fail;
2154       break;
2155 
2156     case OP_WORD_END:  STAT_OP_IN(OP_WORD_END);
2157       if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2158 	if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2159 	  STAT_OP_OUT;
2160 	  continue;
2161 	}
2162       }
2163       goto fail;
2164       break;
2165 #endif
2166 
2167     case OP_BEGIN_BUF:  STAT_OP_IN(OP_BEGIN_BUF);
2168       if (! ON_STR_BEGIN(s)) goto fail;
2169 
2170       STAT_OP_OUT;
2171       continue;
2172       break;
2173 
2174     case OP_END_BUF:  STAT_OP_IN(OP_END_BUF);
2175       if (! ON_STR_END(s)) goto fail;
2176 
2177       STAT_OP_OUT;
2178       continue;
2179       break;
2180 
2181     case OP_BEGIN_LINE:  STAT_OP_IN(OP_BEGIN_LINE);
2182       if (ON_STR_BEGIN(s)) {
2183 	if (IS_NOTBOL(msa->options)) goto fail;
2184 	STAT_OP_OUT;
2185 	continue;
2186       }
2187       else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2188 	STAT_OP_OUT;
2189 	continue;
2190       }
2191       goto fail;
2192       break;
2193 
2194     case OP_END_LINE:  STAT_OP_IN(OP_END_LINE);
2195       if (ON_STR_END(s)) {
2196 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2197 	if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2198 #endif
2199 	  if (IS_NOTEOL(msa->options)) goto fail;
2200 	  STAT_OP_OUT;
2201 	  continue;
2202 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2203 	}
2204 #endif
2205       }
2206       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2207 	STAT_OP_OUT;
2208 	continue;
2209       }
2210 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2211       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2212 	STAT_OP_OUT;
2213 	continue;
2214       }
2215 #endif
2216       goto fail;
2217       break;
2218 
2219     case OP_SEMI_END_BUF:  STAT_OP_IN(OP_SEMI_END_BUF);
2220       if (ON_STR_END(s)) {
2221 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2222 	if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2223 #endif
2224 	  if (IS_NOTEOL(msa->options)) goto fail;   /* Is it needed? */
2225 	  STAT_OP_OUT;
2226 	  continue;
2227 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2228 	}
2229 #endif
2230       }
2231       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2232 	       ON_STR_END(s + enc_len(encode, s))) {
2233 	STAT_OP_OUT;
2234 	continue;
2235       }
2236 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2237       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2238         UChar* ss = s + enc_len(encode, s);
2239         if (ON_STR_END(ss + enc_len(encode, ss))) {
2240           STAT_OP_OUT;
2241           continue;
2242         }
2243       }
2244 #endif
2245       goto fail;
2246       break;
2247 
2248     case OP_BEGIN_POSITION:  STAT_OP_IN(OP_BEGIN_POSITION);
2249       if (s != msa->start)
2250 	goto fail;
2251 
2252       STAT_OP_OUT;
2253       continue;
2254       break;
2255 
2256     case OP_MEMORY_START_PUSH:  STAT_OP_IN(OP_MEMORY_START_PUSH);
2257       GET_MEMNUM_INC(mem, p);
2258       STACK_PUSH_MEM_START(mem, s);
2259       STAT_OP_OUT;
2260       continue;
2261       break;
2262 
2263     case OP_MEMORY_START:  STAT_OP_IN(OP_MEMORY_START);
2264       GET_MEMNUM_INC(mem, p);
2265       mem_start_stk[mem] = (StackIndex )((void* )s);
2266       STAT_OP_OUT;
2267       continue;
2268       break;
2269 
2270     case OP_MEMORY_END_PUSH:  STAT_OP_IN(OP_MEMORY_END_PUSH);
2271       GET_MEMNUM_INC(mem, p);
2272       STACK_PUSH_MEM_END(mem, s);
2273       STAT_OP_OUT;
2274       continue;
2275       break;
2276 
2277     case OP_MEMORY_END:  STAT_OP_IN(OP_MEMORY_END);
2278       GET_MEMNUM_INC(mem, p);
2279       mem_end_stk[mem] = (StackIndex )((void* )s);
2280       STAT_OP_OUT;
2281       continue;
2282       break;
2283 
2284 #ifdef USE_SUBEXP_CALL
2285     case OP_MEMORY_END_PUSH_REC:  STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
2286       GET_MEMNUM_INC(mem, p);
2287       STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2288       STACK_PUSH_MEM_END(mem, s);
2289       mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2290       STAT_OP_OUT;
2291       continue;
2292       break;
2293 
2294     case OP_MEMORY_END_REC:  STAT_OP_IN(OP_MEMORY_END_REC);
2295       GET_MEMNUM_INC(mem, p);
2296       mem_end_stk[mem] = (StackIndex )((void* )s);
2297       STACK_GET_MEM_START(mem, stkp);
2298 
2299       if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2300 	mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2301       else
2302 	mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
2303 
2304       STACK_PUSH_MEM_END_MARK(mem);
2305       STAT_OP_OUT;
2306       continue;
2307       break;
2308 #endif
2309 
2310     case OP_BACKREF1:  STAT_OP_IN(OP_BACKREF1);
2311       mem = 1;
2312       goto backref;
2313       break;
2314 
2315     case OP_BACKREF2:  STAT_OP_IN(OP_BACKREF2);
2316       mem = 2;
2317       goto backref;
2318       break;
2319 
2320     case OP_BACKREFN:  STAT_OP_IN(OP_BACKREFN);
2321       GET_MEMNUM_INC(mem, p);
2322     backref:
2323       {
2324 	int len;
2325 	UChar *pstart, *pend;
2326 
2327 	/* if you want to remove following line,
2328 	   you should check in parse and compile time. */
2329 	if (mem > num_mem) goto fail;
2330 	if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
2331 	if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2332 
2333 	if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2334 	  pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2335 	else
2336 	  pstart = (UChar* )((void* )mem_start_stk[mem]);
2337 
2338 	pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2339 		? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2340 		: (UChar* )((void* )mem_end_stk[mem]));
2341 	n = pend - pstart;
2342 	DATA_ENSURE(n);
2343 	sprev = s;
2344 	STRING_CMP(pstart, s, n);
2345 	while (sprev + (len = enc_len(encode, sprev)) < s)
2346 	  sprev += len;
2347 
2348 	STAT_OP_OUT;
2349 	continue;
2350       }
2351       break;
2352 
2353     case OP_BACKREFN_IC:  STAT_OP_IN(OP_BACKREFN_IC);
2354       GET_MEMNUM_INC(mem, p);
2355       {
2356 	int len;
2357 	UChar *pstart, *pend;
2358 
2359 	/* if you want to remove following line,
2360 	   you should check in parse and compile time. */
2361 	if (mem > num_mem) goto fail;
2362 	if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
2363 	if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2364 
2365 	if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2366 	  pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2367 	else
2368 	  pstart = (UChar* )((void* )mem_start_stk[mem]);
2369 
2370 	pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2371 		? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2372 		: (UChar* )((void* )mem_end_stk[mem]));
2373 	n = pend - pstart;
2374 	DATA_ENSURE(n);
2375 	sprev = s;
2376 	STRING_CMP_IC(ambig_flag, pstart, &s, n);
2377 	while (sprev + (len = enc_len(encode, sprev)) < s)
2378 	  sprev += len;
2379 
2380 	STAT_OP_OUT;
2381 	continue;
2382       }
2383       break;
2384 
2385     case OP_BACKREF_MULTI:  STAT_OP_IN(OP_BACKREF_MULTI);
2386       {
2387 	int len, is_fail;
2388 	UChar *pstart, *pend, *swork;
2389 
2390 	GET_LENGTH_INC(tlen, p);
2391 	for (i = 0; i < tlen; i++) {
2392 	  GET_MEMNUM_INC(mem, p);
2393 
2394 	  if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
2395 	  if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2396 
2397 	  if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2398 	    pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2399 	  else
2400 	    pstart = (UChar* )((void* )mem_start_stk[mem]);
2401 
2402 	  pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2403 		  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2404 		  : (UChar* )((void* )mem_end_stk[mem]));
2405 	  n = pend - pstart;
2406 	  DATA_ENSURE(n);
2407 	  sprev = s;
2408 	  swork = s;
2409 	  STRING_CMP_VALUE(pstart, swork, n, is_fail);
2410 	  if (is_fail) continue;
2411 	  s = swork;
2412 	  while (sprev + (len = enc_len(encode, sprev)) < s)
2413 	    sprev += len;
2414 
2415 	  p += (SIZE_MEMNUM * (tlen - i - 1));
2416 	  break; /* success */
2417 	}
2418 	if (i == tlen) goto fail;
2419 	STAT_OP_OUT;
2420 	continue;
2421       }
2422       break;
2423 
2424     case OP_BACKREF_MULTI_IC:  STAT_OP_IN(OP_BACKREF_MULTI_IC);
2425       {
2426 	int len, is_fail;
2427 	UChar *pstart, *pend, *swork;
2428 
2429 	GET_LENGTH_INC(tlen, p);
2430 	for (i = 0; i < tlen; i++) {
2431 	  GET_MEMNUM_INC(mem, p);
2432 
2433 	  if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
2434 	  if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2435 
2436 	  if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2437 	    pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2438 	  else
2439 	    pstart = (UChar* )((void* )mem_start_stk[mem]);
2440 
2441 	  pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2442 		  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2443 		  : (UChar* )((void* )mem_end_stk[mem]));
2444 	  n = pend - pstart;
2445 	  DATA_ENSURE(n);
2446 	  sprev = s;
2447 	  swork = s;
2448 	  STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
2449 	  if (is_fail) continue;
2450 	  s = swork;
2451 	  while (sprev + (len = enc_len(encode, sprev)) < s)
2452 	    sprev += len;
2453 
2454 	  p += (SIZE_MEMNUM * (tlen - i - 1));
2455 	  break; /* success */
2456 	}
2457 	if (i == tlen) goto fail;
2458 	STAT_OP_OUT;
2459 	continue;
2460       }
2461       break;
2462 
2463 #ifdef USE_BACKREF_AT_LEVEL
2464     case OP_BACKREF_AT_LEVEL:
2465       {
2466 	int len;
2467 	OnigOptionType ic;
2468 	LengthType level;
2469 
2470 	GET_OPTION_INC(ic,    p);
2471 	GET_LENGTH_INC(level, p);
2472 	GET_LENGTH_INC(tlen,  p);
2473 
2474 	sprev = s;
2475 	if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
2476 				  , (int )level, (int )tlen, p, &s, end)) {
2477 	  while (sprev + (len = enc_len(encode, sprev)) < s)
2478 	    sprev += len;
2479 
2480 	  p += (SIZE_MEMNUM * tlen);
2481 	}
2482 	else
2483 	  goto fail;
2484 
2485 	STAT_OP_OUT;
2486 	continue;
2487       }
2488 
2489       break;
2490 #endif
2491 
2492     case OP_SET_OPTION_PUSH:  STAT_OP_IN(OP_SET_OPTION_PUSH);
2493       GET_OPTION_INC(option, p);
2494       STACK_PUSH_ALT(p, s, sprev);
2495       p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2496       STAT_OP_OUT;
2497       continue;
2498       break;
2499 
2500     case OP_SET_OPTION:  STAT_OP_IN(OP_SET_OPTION);
2501       GET_OPTION_INC(option, p);
2502       STAT_OP_OUT;
2503       continue;
2504       break;
2505 
2506     case OP_NULL_CHECK_START:  STAT_OP_IN(OP_NULL_CHECK_START);
2507       GET_MEMNUM_INC(mem, p);    /* mem: null check id */
2508       STACK_PUSH_NULL_CHECK_START(mem, s);
2509       STAT_OP_OUT;
2510       continue;
2511       break;
2512 
2513     case OP_NULL_CHECK_END:  STAT_OP_IN(OP_NULL_CHECK_END);
2514       {
2515 	int isnull;
2516 
2517 	GET_MEMNUM_INC(mem, p); /* mem: null check id */
2518 	STACK_NULL_CHECK(isnull, mem, s);
2519 	if (isnull) {
2520 #ifdef ONIG_DEBUG_MATCH
2521 	  fprintf(stderr, "NULL_CHECK_END: skip  id:%d, s:%d\n",
2522 		  (int )mem, (int )s);
2523 #endif
2524 	null_check_found:
2525 	  /* empty loop founded, skip next instruction */
2526 	  switch (*p++) {
2527 	  case OP_JUMP:
2528 	  case OP_PUSH:
2529 	    p += SIZE_RELADDR;
2530 	    break;
2531 	  case OP_REPEAT_INC:
2532 	  case OP_REPEAT_INC_NG:
2533 	  case OP_REPEAT_INC_SG:
2534 	  case OP_REPEAT_INC_NG_SG:
2535 	    p += SIZE_MEMNUM;
2536 	    break;
2537 	  default:
2538 	    goto unexpected_bytecode_error;
2539 	    break;
2540 	  }
2541 	}
2542       }
2543       STAT_OP_OUT;
2544       continue;
2545       break;
2546 
2547 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
2548     case OP_NULL_CHECK_END_MEMST:  STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
2549       {
2550 	int isnull;
2551 
2552 	GET_MEMNUM_INC(mem, p); /* mem: null check id */
2553 	STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2554 	if (isnull) {
2555 #ifdef ONIG_DEBUG_MATCH
2556 	  fprintf(stderr, "NULL_CHECK_END_MEMST: skip  id:%d, s:%d\n",
2557 		  (int )mem, (int )s);
2558 #endif
2559 	  if (isnull == -1) goto fail;
2560 	  goto 	null_check_found;
2561 	}
2562       }
2563       STAT_OP_OUT;
2564       continue;
2565       break;
2566 #endif
2567 
2568 #ifdef USE_SUBEXP_CALL
2569     case OP_NULL_CHECK_END_MEMST_PUSH:
2570       STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2571       {
2572 	int isnull;
2573 
2574 	GET_MEMNUM_INC(mem, p); /* mem: null check id */
2575 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
2576 	STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2577 #else
2578 	STACK_NULL_CHECK_REC(isnull, mem, s);
2579 #endif
2580 	if (isnull) {
2581 #ifdef ONIG_DEBUG_MATCH
2582 	  fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip  id:%d, s:%d\n",
2583 		  (int )mem, (int )s);
2584 #endif
2585 	  if (isnull == -1) goto fail;
2586 	  goto 	null_check_found;
2587 	}
2588 	else {
2589 	  STACK_PUSH_NULL_CHECK_END(mem);
2590 	}
2591       }
2592       STAT_OP_OUT;
2593       continue;
2594       break;
2595 #endif
2596 
2597     case OP_JUMP:  STAT_OP_IN(OP_JUMP);
2598       GET_RELADDR_INC(addr, p);
2599       p += addr;
2600       STAT_OP_OUT;
2601       CHECK_INTERRUPT_IN_MATCH_AT;
2602       continue;
2603       break;
2604 
2605     case OP_PUSH:  STAT_OP_IN(OP_PUSH);
2606       GET_RELADDR_INC(addr, p);
2607       STACK_PUSH_ALT(p + addr, s, sprev);
2608       STAT_OP_OUT;
2609       continue;
2610       break;
2611 
2612 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2613     case OP_STATE_CHECK_PUSH:  STAT_OP_IN(OP_STATE_CHECK_PUSH);
2614       GET_STATE_CHECK_NUM_INC(mem, p);
2615       STATE_CHECK_VAL(scv, mem);
2616       if (scv) goto fail;
2617 
2618       GET_RELADDR_INC(addr, p);
2619       STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2620       STAT_OP_OUT;
2621       continue;
2622       break;
2623 
2624     case OP_STATE_CHECK_PUSH_OR_JUMP:  STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2625       GET_STATE_CHECK_NUM_INC(mem, p);
2626       GET_RELADDR_INC(addr, p);
2627       STATE_CHECK_VAL(scv, mem);
2628       if (scv) {
2629 	p += addr;
2630       }
2631       else {
2632 	STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2633       }
2634       STAT_OP_OUT;
2635       continue;
2636       break;
2637 
2638     case OP_STATE_CHECK:  STAT_OP_IN(OP_STATE_CHECK);
2639       GET_STATE_CHECK_NUM_INC(mem, p);
2640       STATE_CHECK_VAL(scv, mem);
2641       if (scv) goto fail;
2642 
2643       STACK_PUSH_STATE_CHECK(s, mem);
2644       STAT_OP_OUT;
2645       continue;
2646       break;
2647 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2648 
2649     case OP_POP:  STAT_OP_IN(OP_POP);
2650       STACK_POP_ONE;
2651       STAT_OP_OUT;
2652       continue;
2653       break;
2654 
2655     case OP_PUSH_OR_JUMP_EXACT1:  STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
2656       GET_RELADDR_INC(addr, p);
2657       if (*p == *s && DATA_ENSURE_CHECK(1)) {
2658 	p++;
2659 	STACK_PUSH_ALT(p + addr, s, sprev);
2660 	STAT_OP_OUT;
2661 	continue;
2662       }
2663       p += (addr + 1);
2664       STAT_OP_OUT;
2665       continue;
2666       break;
2667 
2668     case OP_PUSH_IF_PEEK_NEXT:  STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT);
2669       GET_RELADDR_INC(addr, p);
2670       if (*p == *s) {
2671 	p++;
2672 	STACK_PUSH_ALT(p + addr, s, sprev);
2673 	STAT_OP_OUT;
2674 	continue;
2675       }
2676       p++;
2677       STAT_OP_OUT;
2678       continue;
2679       break;
2680 
2681     case OP_REPEAT:  STAT_OP_IN(OP_REPEAT);
2682       {
2683 	GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
2684 	GET_RELADDR_INC(addr, p);
2685 
2686 	STACK_ENSURE(1);
2687 	repeat_stk[mem] = GET_STACK_INDEX(stk);
2688 	STACK_PUSH_REPEAT(mem, p);
2689 
2690 	if (reg->repeat_range[mem].lower == 0) {
2691 	  STACK_PUSH_ALT(p + addr, s, sprev);
2692 	}
2693       }
2694       STAT_OP_OUT;
2695       continue;
2696       break;
2697 
2698     case OP_REPEAT_NG:  STAT_OP_IN(OP_REPEAT_NG);
2699       {
2700 	GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
2701 	GET_RELADDR_INC(addr, p);
2702 
2703 	STACK_ENSURE(1);
2704 	repeat_stk[mem] = GET_STACK_INDEX(stk);
2705 	STACK_PUSH_REPEAT(mem, p);
2706 
2707 	if (reg->repeat_range[mem].lower == 0) {
2708 	  STACK_PUSH_ALT(p, s, sprev);
2709 	  p += addr;
2710 	}
2711       }
2712       STAT_OP_OUT;
2713       continue;
2714       break;
2715 
2716     case OP_REPEAT_INC:  STAT_OP_IN(OP_REPEAT_INC);
2717       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2718       si = repeat_stk[mem];
2719       stkp = STACK_AT(si);
2720 
2721     repeat_inc:
2722       stkp->u.repeat.count++;
2723       if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2724         /* end of repeat. Nothing to do. */
2725       }
2726       else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2727         STACK_PUSH_ALT(p, s, sprev);
2728         p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2729       }
2730       else {
2731         p = stkp->u.repeat.pcode;
2732       }
2733       STACK_PUSH_REPEAT_INC(si);
2734       STAT_OP_OUT;
2735       CHECK_INTERRUPT_IN_MATCH_AT;
2736       continue;
2737       break;
2738 
2739     case OP_REPEAT_INC_SG:  STAT_OP_IN(OP_REPEAT_INC_SG);
2740       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2741       STACK_GET_REPEAT(mem, stkp);
2742       si = GET_STACK_INDEX(stkp);
2743       goto repeat_inc;
2744       break;
2745 
2746     case OP_REPEAT_INC_NG:  STAT_OP_IN(OP_REPEAT_INC_NG);
2747       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2748       si = repeat_stk[mem];
2749       stkp = STACK_AT(si);
2750 
2751     repeat_inc_ng:
2752       stkp->u.repeat.count++;
2753       if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2754         if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2755           UChar* pcode = stkp->u.repeat.pcode;
2756 
2757           STACK_PUSH_REPEAT_INC(si);
2758           STACK_PUSH_ALT(pcode, s, sprev);
2759         }
2760         else {
2761           p = stkp->u.repeat.pcode;
2762           STACK_PUSH_REPEAT_INC(si);
2763         }
2764       }
2765       else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2766         STACK_PUSH_REPEAT_INC(si);
2767       }
2768       STAT_OP_OUT;
2769       CHECK_INTERRUPT_IN_MATCH_AT;
2770       continue;
2771       break;
2772 
2773     case OP_REPEAT_INC_NG_SG:  STAT_OP_IN(OP_REPEAT_INC_NG_SG);
2774       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2775       STACK_GET_REPEAT(mem, stkp);
2776       si = GET_STACK_INDEX(stkp);
2777       goto repeat_inc_ng;
2778       break;
2779 
2780     case OP_PUSH_POS:  STAT_OP_IN(OP_PUSH_POS);
2781       STACK_PUSH_POS(s, sprev);
2782       STAT_OP_OUT;
2783       continue;
2784       break;
2785 
2786     case OP_POP_POS:  STAT_OP_IN(OP_POP_POS);
2787       {
2788 	STACK_POS_END(stkp);
2789 	s     = stkp->u.state.pstr;
2790 	sprev = stkp->u.state.pstr_prev;
2791       }
2792       STAT_OP_OUT;
2793       continue;
2794       break;
2795 
2796     case OP_PUSH_POS_NOT:  STAT_OP_IN(OP_PUSH_POS_NOT);
2797       GET_RELADDR_INC(addr, p);
2798       STACK_PUSH_POS_NOT(p + addr, s, sprev);
2799       STAT_OP_OUT;
2800       continue;
2801       break;
2802 
2803     case OP_FAIL_POS:  STAT_OP_IN(OP_FAIL_POS);
2804       STACK_POP_TIL_POS_NOT;
2805       goto fail;
2806       break;
2807 
2808     case OP_PUSH_STOP_BT:  STAT_OP_IN(OP_PUSH_STOP_BT);
2809       STACK_PUSH_STOP_BT;
2810       STAT_OP_OUT;
2811       continue;
2812       break;
2813 
2814     case OP_POP_STOP_BT:  STAT_OP_IN(OP_POP_STOP_BT);
2815       STACK_STOP_BT_END;
2816       STAT_OP_OUT;
2817       continue;
2818       break;
2819 
2820     case OP_LOOK_BEHIND:  STAT_OP_IN(OP_LOOK_BEHIND);
2821       GET_LENGTH_INC(tlen, p);
2822       s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2823       if (IS_NULL(s)) goto fail;
2824       sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2825       STAT_OP_OUT;
2826       continue;
2827       break;
2828 
2829     case OP_PUSH_LOOK_BEHIND_NOT:  STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2830       GET_RELADDR_INC(addr, p);
2831       GET_LENGTH_INC(tlen, p);
2832       q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2833       if (IS_NULL(q)) {
2834 	/* too short case -> success. ex. /(?<!XXX)a/.match("a")
2835 	   If you want to change to fail, replace following line. */
2836 	p += addr;
2837 	/* goto fail; */
2838       }
2839       else {
2840 	STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2841 	s = q;
2842 	sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2843       }
2844       STAT_OP_OUT;
2845       continue;
2846       break;
2847 
2848     case OP_FAIL_LOOK_BEHIND_NOT:  STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2849       STACK_POP_TIL_LOOK_BEHIND_NOT;
2850       goto fail;
2851       break;
2852 
2853 #ifdef USE_SUBEXP_CALL
2854     case OP_CALL:  STAT_OP_IN(OP_CALL);
2855       GET_ABSADDR_INC(addr, p);
2856       STACK_PUSH_CALL_FRAME(p);
2857       p = reg->p + addr;
2858       STAT_OP_OUT;
2859       continue;
2860       break;
2861 
2862     case OP_RETURN:  STAT_OP_IN(OP_RETURN);
2863       STACK_RETURN(p);
2864       STACK_PUSH_RETURN;
2865       STAT_OP_OUT;
2866       continue;
2867       break;
2868 #endif
2869 
2870     case OP_FINISH:
2871       goto finish;
2872       break;
2873 
2874     fail:
2875       STAT_OP_OUT;
2876       /* fall */
2877     case OP_FAIL:  STAT_OP_IN(OP_FAIL);
2878       STACK_POP;
2879       p     = stk->u.state.pcode;
2880       s     = stk->u.state.pstr;
2881       sprev = stk->u.state.pstr_prev;
2882 
2883 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2884       if (stk->u.state.state_check != 0) {
2885         stk->type = STK_STATE_CHECK_MARK;
2886         stk++;
2887       }
2888 #endif
2889 
2890       STAT_OP_OUT;
2891       continue;
2892       break;
2893 
2894     default:
2895       goto bytecode_error;
2896 
2897     } /* end of switch */
2898     sprev = sbegin;
2899   } /* end of while(1) */
2900 
2901  finish:
2902   STACK_SAVE;
2903   return best_len;
2904 
2905 #ifdef ONIG_DEBUG
2906  stack_error:
2907   STACK_SAVE;
2908   return ONIGERR_STACK_BUG;
2909 #endif
2910 
2911  bytecode_error:
2912   STACK_SAVE;
2913   return ONIGERR_UNDEFINED_BYTECODE;
2914 
2915  unexpected_bytecode_error:
2916   STACK_SAVE;
2917   return ONIGERR_UNEXPECTED_BYTECODE;
2918 }
2919 
2920 
2921 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2922 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2923 	    const UChar* text, const UChar* text_end, UChar* text_range)
2924 {
2925   UChar *t, *p, *s, *end;
2926 
2927   end = (UChar* )text_end;
2928   end -= target_end - target - 1;
2929   if (end > text_range)
2930     end = text_range;
2931 
2932   s = (UChar* )text;
2933 
2934   while (s < end) {
2935     if (*s == *target) {
2936       p = s + 1;
2937       t = target + 1;
2938       while (t < target_end) {
2939 	if (*t != *p++)
2940 	  break;
2941 	t++;
2942       }
2943       if (t == target_end)
2944 	return s;
2945     }
2946     s += enc_len(enc, s);
2947   }
2948 
2949   return (UChar* )NULL;
2950 }
2951 
2952 static int
str_lower_case_match(OnigEncoding enc,int ambig_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)2953 str_lower_case_match(OnigEncoding enc, int ambig_flag,
2954                      const UChar* t, const UChar* tend,
2955 		     const UChar* p, const UChar* end)
2956 {
2957   int lowlen;
2958   UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
2959   const UChar* tsave;
2960   const UChar* psave;
2961 
2962   tsave = t;
2963   psave = p;
2964 
2965   while (t < tend) {
2966     lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
2967     q = lowbuf;
2968     while (lowlen > 0) {
2969       if (*t++ != *q++) {
2970 	return 0;
2971       }
2972       lowlen--;
2973     }
2974   }
2975 
2976   return 1;
2977 }
2978 
2979 static UChar*
slow_search_ic(OnigEncoding enc,int ambig_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2980 slow_search_ic(OnigEncoding enc, int ambig_flag,
2981 	       UChar* target, UChar* target_end,
2982 	       const UChar* text, const UChar* text_end, UChar* text_range)
2983 {
2984   UChar *s, *end;
2985 
2986   end = (UChar* )text_end;
2987   end -= target_end - target - 1;
2988   if (end > text_range)
2989     end = text_range;
2990 
2991   s = (UChar* )text;
2992 
2993   while (s < end) {
2994     if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
2995       return s;
2996 
2997     s += enc_len(enc, s);
2998   }
2999 
3000   return (UChar* )NULL;
3001 }
3002 
3003 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3004 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3005 		     const UChar* text, const UChar* adjust_text,
3006 		     const UChar* text_end, const UChar* text_start)
3007 {
3008   UChar *t, *p, *s;
3009 
3010   s = (UChar* )text_end;
3011   s -= (target_end - target);
3012   if (s > text_start)
3013     s = (UChar* )text_start;
3014   else
3015     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
3016 
3017   while (s >= text) {
3018     if (*s == *target) {
3019       p = s + 1;
3020       t = target + 1;
3021       while (t < target_end) {
3022 	if (*t != *p++)
3023 	  break;
3024 	t++;
3025       }
3026       if (t == target_end)
3027 	return s;
3028     }
3029     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
3030   }
3031 
3032   return (UChar* )NULL;
3033 }
3034 
3035 static UChar*
slow_search_backward_ic(OnigEncoding enc,int ambig_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3036 slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
3037 			UChar* target, UChar* target_end,
3038 			const UChar* text, const UChar* adjust_text,
3039 			const UChar* text_end, const UChar* text_start)
3040 {
3041   UChar *s;
3042 
3043   s = (UChar* )text_end;
3044   s -= (target_end - target);
3045   if (s > text_start)
3046     s = (UChar* )text_start;
3047   else
3048     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
3049 
3050   while (s >= text) {
3051     if (str_lower_case_match(enc, ambig_flag,
3052                              target, target_end, s, text_end))
3053       return s;
3054 
3055     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
3056   }
3057 
3058   return (UChar* )NULL;
3059 }
3060 
3061 static UChar*
bm_search_notrev(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)3062 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3063 		 const UChar* text, const UChar* text_end,
3064 		 const UChar* text_range)
3065 {
3066   const UChar *s, *se, *t, *p, *end;
3067   const UChar *tail;
3068   int skip, tlen1;
3069 
3070 #ifdef ONIG_DEBUG_SEARCH
3071   fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
3072 	  (int )text, (int )text_end, (int )text_range);
3073 #endif
3074 
3075   tail = target_end - 1;
3076   tlen1 = tail - target;
3077   end = text_range;
3078   if (end + tlen1 > text_end)
3079     end = text_end - tlen1;
3080 
3081   s = text;
3082 
3083   if (IS_NULL(reg->int_map)) {
3084     while (s < end) {
3085       p = se = s + tlen1;
3086       t = tail;
3087       while (t >= target && *p == *t) {
3088         p--; t--;
3089       }
3090       if (t < target) return (UChar* )s;
3091 
3092       skip = reg->map[*se];
3093       t = s;
3094       do {
3095         s += enc_len(reg->enc, s);
3096       } while ((s - t) < skip && s < end);
3097     }
3098   }
3099   else {
3100     while (s < end) {
3101       p = se = s + tlen1;
3102       t = tail;
3103       while (t >= target && *p == *t) {
3104         p--; t--;
3105       }
3106       if (t < target) return (UChar* )s;
3107 
3108       skip = reg->int_map[*se];
3109       t = s;
3110       do {
3111         s += enc_len(reg->enc, s);
3112       } while ((s - t) < skip && s < end);
3113     }
3114   }
3115 
3116   return (UChar* )NULL;
3117 }
3118 
3119 static UChar*
bm_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)3120 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3121 	  const UChar* text, const UChar* text_end, const UChar* text_range)
3122 {
3123   const UChar *s, *t, *p, *end;
3124   const UChar *tail;
3125 
3126   end = text_range + (target_end - target) - 1;
3127   if (end > text_end)
3128     end = text_end;
3129 
3130   tail = target_end - 1;
3131   s = text + (target_end - target) - 1;
3132   if (IS_NULL(reg->int_map)) {
3133     while (s < end) {
3134       p = s;
3135       t = tail;
3136       while (t >= target && *p == *t) {
3137 	p--; t--;
3138       }
3139       if (t < target) return (UChar* )(p + 1);
3140       s += reg->map[*s];
3141     }
3142   }
3143   else { /* see int_map[] */
3144     while (s < end) {
3145       p = s;
3146       t = tail;
3147       while (t >= target && *p == *t) {
3148 	p--; t--;
3149       }
3150       if (t < target) return (UChar* )(p + 1);
3151       s += reg->int_map[*s];
3152     }
3153   }
3154   return (UChar* )NULL;
3155 }
3156 
3157 static int
set_bm_backward_skip(UChar * s,UChar * end,OnigEncoding enc,int ** skip)3158 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
3159 
3160 {
3161   int i, len;
3162 
3163   if (IS_NULL(*skip)) {
3164     *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3165     if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3166   }
3167 
3168   len = end - s;
3169   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3170     (*skip)[i] = len;
3171 
3172   for (i = len - 1; i > 0; i--)
3173     (*skip)[s[i]] = i;
3174 
3175   return 0;
3176 }
3177 
3178 static UChar*
bm_search_backward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3179 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3180 		   const UChar* text, const UChar* adjust_text,
3181 		   const UChar* text_end, const UChar* text_start)
3182 {
3183   const UChar *s, *t, *p;
3184 
3185   s = text_end - (target_end - target);
3186   if (text_start < s)
3187     s = text_start;
3188   else
3189     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3190 
3191   while (s >= text) {
3192     p = s;
3193     t = target;
3194     while (t < target_end && *p == *t) {
3195       p++; t++;
3196     }
3197     if (t == target_end)
3198       return (UChar* )s;
3199 
3200     s -= reg->int_map_backward[*s];
3201     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3202   }
3203 
3204   return (UChar* )NULL;
3205 }
3206 
3207 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)3208 map_search(OnigEncoding enc, UChar map[],
3209 	   const UChar* text, const UChar* text_range)
3210 {
3211   const UChar *s = text;
3212 
3213   while (s < text_range) {
3214     if (map[*s]) return (UChar* )s;
3215 
3216     s += enc_len(enc, s);
3217   }
3218   return (UChar* )NULL;
3219 }
3220 
3221 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)3222 map_search_backward(OnigEncoding enc, UChar map[],
3223 		    const UChar* text, const UChar* adjust_text,
3224 		    const UChar* text_start)
3225 {
3226   const UChar *s = text_start;
3227 
3228   while (s >= text) {
3229     if (map[*s]) return (UChar* )s;
3230 
3231     s = onigenc_get_prev_char_head(enc, adjust_text, s);
3232   }
3233   return (UChar* )NULL;
3234 }
3235 
3236 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)3237 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3238 	    OnigOptionType option)
3239 {
3240   int r;
3241   UChar *prev;
3242   MatchArg msa;
3243 
3244 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3245  start:
3246   THREAD_ATOMIC_START;
3247   if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3248     ONIG_STATE_INC(reg);
3249     if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3250       onig_chain_reduce(reg);
3251       ONIG_STATE_INC(reg);
3252     }
3253   }
3254   else {
3255     int n;
3256 
3257     THREAD_ATOMIC_END;
3258     n = 0;
3259     while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3260       if (++n > THREAD_PASS_LIMIT_COUNT)
3261 	return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3262       THREAD_PASS;
3263     }
3264     goto start;
3265   }
3266   THREAD_ATOMIC_END;
3267 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3268 
3269   MATCH_ARG_INIT(msa, option, region, at);
3270 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3271   {
3272     int offset = at - str;
3273     STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3274   }
3275 #endif
3276 
3277   if (region
3278 #ifdef USE_POSIX_REGION_OPTION
3279       && !IS_POSIX_REGION(option)
3280 #endif
3281       ) {
3282     r = onig_region_resize_clear(region, reg->num_mem + 1);
3283   }
3284   else
3285     r = 0;
3286 
3287   if (r == 0) {
3288     prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3289     r = match_at(reg, str, end, at, prev, &msa);
3290   }
3291 
3292   MATCH_ARG_FREE(msa);
3293   ONIG_STATE_DEC_THREAD(reg);
3294   return r;
3295 }
3296 
3297 static int
forward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)3298 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3299 		     UChar* range, UChar** low, UChar** high, UChar** low_prev)
3300 {
3301   UChar *p, *pprev = (UChar* )NULL;
3302 
3303 #ifdef ONIG_DEBUG_SEARCH
3304   fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3305 	  (int )str, (int )end, (int )s, (int )range);
3306 #endif
3307 
3308   p = s;
3309   if (reg->dmin > 0) {
3310     if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3311       p += reg->dmin;
3312     }
3313     else {
3314       UChar *q = p + reg->dmin;
3315       while (p < q) p += enc_len(reg->enc, p);
3316     }
3317   }
3318 
3319  retry:
3320   switch (reg->optimize) {
3321   case ONIG_OPTIMIZE_EXACT:
3322     p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3323     break;
3324   case ONIG_OPTIMIZE_EXACT_IC:
3325     p = slow_search_ic(reg->enc, reg->ambig_flag,
3326                        reg->exact, reg->exact_end, p, end, range);
3327     break;
3328 
3329   case ONIG_OPTIMIZE_EXACT_BM:
3330     p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3331     break;
3332 
3333   case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3334     p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3335     break;
3336 
3337   case ONIG_OPTIMIZE_MAP:
3338     p = map_search(reg->enc, reg->map, p, range);
3339     break;
3340   }
3341 
3342   if (p && p < range) {
3343     if (p - reg->dmin < s) {
3344     retry_gate:
3345       pprev = p;
3346       p += enc_len(reg->enc, p);
3347       goto retry;
3348     }
3349 
3350     if (reg->sub_anchor) {
3351       UChar* prev;
3352 
3353       switch (reg->sub_anchor) {
3354       case ANCHOR_BEGIN_LINE:
3355 	if (!ON_STR_BEGIN(p)) {
3356 	  prev = onigenc_get_prev_char_head(reg->enc,
3357 					    (pprev ? pprev : str), p);
3358 	  if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3359 	    goto retry_gate;
3360 	}
3361 	break;
3362 
3363       case ANCHOR_END_LINE:
3364 	if (ON_STR_END(p)) {
3365 	  prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3366 					    (pprev ? pprev : str), p);
3367 	  if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3368 	    goto retry_gate;
3369 	}
3370 	else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3371 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3372               && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3373 #endif
3374                 )
3375 	  goto retry_gate;
3376 	break;
3377       }
3378     }
3379 
3380     if (reg->dmax == 0) {
3381       *low = p;
3382       if (low_prev) {
3383 	if (*low > s)
3384 	  *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3385 	else
3386 	  *low_prev = onigenc_get_prev_char_head(reg->enc,
3387 						 (pprev ? pprev : str), p);
3388       }
3389     }
3390     else {
3391       if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3392 	*low = p - reg->dmax;
3393 	if (*low > s) {
3394 	  *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3395 							      *low, (const UChar** )low_prev);
3396 	  if (low_prev && IS_NULL(*low_prev))
3397 	    *low_prev = onigenc_get_prev_char_head(reg->enc,
3398 						   (pprev ? pprev : s), *low);
3399 	}
3400 	else {
3401 	  if (low_prev)
3402 	    *low_prev = onigenc_get_prev_char_head(reg->enc,
3403 					       (pprev ? pprev : str), *low);
3404 	}
3405       }
3406     }
3407     /* no needs to adjust *high, *high is used as range check only */
3408     *high = p - reg->dmin;
3409 
3410 #ifdef ONIG_DEBUG_SEARCH
3411     fprintf(stderr,
3412     "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3413 	    (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3414 #endif
3415     return 1; /* success */
3416   }
3417 
3418   return 0; /* fail */
3419 }
3420 
3421 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3422 				    int** skip));
3423 
3424 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD   100
3425 
3426 static int
backward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)3427 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3428 		      UChar* s, const UChar* range, UChar* adjrange,
3429 		      UChar** low, UChar** high)
3430 {
3431   int r;
3432   UChar *p;
3433 
3434   range += reg->dmin;
3435   p = s;
3436 
3437  retry:
3438   switch (reg->optimize) {
3439   case ONIG_OPTIMIZE_EXACT:
3440   exact_method:
3441     p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3442 			     range, adjrange, end, p);
3443     break;
3444 
3445   case ONIG_OPTIMIZE_EXACT_IC:
3446     p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
3447                                 reg->exact, reg->exact_end,
3448                                 range, adjrange, end, p);
3449     break;
3450 
3451   case ONIG_OPTIMIZE_EXACT_BM:
3452   case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3453     if (IS_NULL(reg->int_map_backward)) {
3454       if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3455 	goto exact_method;
3456 
3457       r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3458 			       &(reg->int_map_backward));
3459       if (r) return r;
3460     }
3461     p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3462 			   end, p);
3463     break;
3464 
3465   case ONIG_OPTIMIZE_MAP:
3466     p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3467     break;
3468   }
3469 
3470   if (p) {
3471     if (reg->sub_anchor) {
3472       UChar* prev;
3473 
3474       switch (reg->sub_anchor) {
3475       case ANCHOR_BEGIN_LINE:
3476 	if (!ON_STR_BEGIN(p)) {
3477 	  prev = onigenc_get_prev_char_head(reg->enc, str, p);
3478 	  if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3479 	    p = prev;
3480 	    goto retry;
3481 	  }
3482 	}
3483 	break;
3484 
3485       case ANCHOR_END_LINE:
3486 	if (ON_STR_END(p)) {
3487 	  prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3488 	  if (IS_NULL(prev)) goto fail;
3489 	  if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3490 	    p = prev;
3491 	    goto retry;
3492 	  }
3493 	}
3494 	else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3495 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3496               && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3497 #endif
3498                 ) {
3499 	  p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3500 	  if (IS_NULL(p)) goto fail;
3501 	  goto retry;
3502 	}
3503 	break;
3504       }
3505     }
3506 
3507     /* no needs to adjust *high, *high is used as range check only */
3508     if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3509       *low  = p - reg->dmax;
3510       *high = p - reg->dmin;
3511       *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3512     }
3513 
3514 #ifdef ONIG_DEBUG_SEARCH
3515     fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3516 	    (int )(*low - str), (int )(*high - str));
3517 #endif
3518     return 1; /* success */
3519   }
3520 
3521  fail:
3522 #ifdef ONIG_DEBUG_SEARCH
3523   fprintf(stderr, "backward_search_range: fail.\n");
3524 #endif
3525   return 0; /* fail */
3526 }
3527 
3528 
3529 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)3530 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3531 	    const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3532 {
3533   int r;
3534   UChar *s, *prev;
3535   MatchArg msa;
3536   const UChar *orig_start = start;
3537 
3538 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3539  start:
3540   THREAD_ATOMIC_START;
3541   if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3542     ONIG_STATE_INC(reg);
3543     if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3544       onig_chain_reduce(reg);
3545       ONIG_STATE_INC(reg);
3546     }
3547   }
3548   else {
3549     int n;
3550 
3551     THREAD_ATOMIC_END;
3552     n = 0;
3553     while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3554       if (++n > THREAD_PASS_LIMIT_COUNT)
3555 	return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3556       THREAD_PASS;
3557     }
3558     goto start;
3559   }
3560   THREAD_ATOMIC_END;
3561 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3562 
3563 #ifdef ONIG_DEBUG_SEARCH
3564   fprintf(stderr,
3565      "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3566      (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3567 #endif
3568 
3569   if (region
3570 #ifdef USE_POSIX_REGION_OPTION
3571       && !IS_POSIX_REGION(option)
3572 #endif
3573       ) {
3574     r = onig_region_resize_clear(region, reg->num_mem + 1);
3575     if (r) goto finish_no_msa;
3576   }
3577 
3578   if (start > end || start < str) goto mismatch_no_msa;
3579 
3580 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3581 #define MATCH_AND_RETURN_CHECK \
3582   r = match_at(reg, str, end, s, prev, &msa);\
3583   if (r != ONIG_MISMATCH) {\
3584     if (r >= 0) {\
3585       if (! IS_FIND_LONGEST(reg->options)) {\
3586         goto match;\
3587       }\
3588     }\
3589     else goto finish; /* error */ \
3590   }
3591 #else
3592 #define MATCH_AND_RETURN_CHECK \
3593   r = match_at(reg, str, end, s, prev, &msa);\
3594   if (r != ONIG_MISMATCH) {\
3595     if (r >= 0) {\
3596       goto match;\
3597     }\
3598     else goto finish; /* error */ \
3599   }
3600 #endif
3601 
3602   /* anchor optimize: resume search range */
3603   if (reg->anchor != 0 && str < end) {
3604     UChar *min_semi_end, *max_semi_end;
3605 
3606     if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3607       /* search start-position only */
3608     begin_position:
3609       if (range > start)
3610 	range = start + 1;
3611       else
3612 	range = start;
3613     }
3614     else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3615       /* search str-position only */
3616       if (range > start) {
3617 	if (start != str) goto mismatch_no_msa;
3618 	range = str + 1;
3619       }
3620       else {
3621 	if (range <= str) {
3622 	  start = str;
3623 	  range = str;
3624 	}
3625 	else
3626 	  goto mismatch_no_msa;
3627       }
3628     }
3629     else if (reg->anchor & ANCHOR_END_BUF) {
3630       min_semi_end = max_semi_end = (UChar* )end;
3631 
3632     end_buf:
3633       if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3634 	goto mismatch_no_msa;
3635 
3636       if (range > start) {
3637 	if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3638 	  start = min_semi_end - reg->anchor_dmax;
3639 	  if (start < end)
3640 	    start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3641 	  else { /* match with empty at end */
3642 	    start = onigenc_get_prev_char_head(reg->enc, str, end);
3643 	  }
3644 	}
3645 	if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3646 	  range = max_semi_end - reg->anchor_dmin + 1;
3647 	}
3648 
3649 	if (start >= range) goto mismatch_no_msa;
3650       }
3651       else {
3652 	if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3653 	  range = min_semi_end - reg->anchor_dmax;
3654 	}
3655 	if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3656 	  start = max_semi_end - reg->anchor_dmin;
3657 	  start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3658 	}
3659 	if (range > start) goto mismatch_no_msa;
3660       }
3661     }
3662     else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3663       UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3664 
3665       max_semi_end = (UChar* )end;
3666       if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3667 	min_semi_end = pre_end;
3668 
3669 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3670 	pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3671 	if (IS_NOT_NULL(pre_end) &&
3672 	    ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3673 	  min_semi_end = pre_end;
3674 	}
3675 #endif
3676 	if (min_semi_end > str && start <= min_semi_end) {
3677 	  goto end_buf;
3678 	}
3679       }
3680       else {
3681 	min_semi_end = (UChar* )end;
3682 	goto end_buf;
3683       }
3684     }
3685     else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3686       goto begin_position;
3687     }
3688   }
3689   else if (str == end) { /* empty string */
3690     static const UChar* address_for_empty_string = (UChar* )"";
3691 
3692 #ifdef ONIG_DEBUG_SEARCH
3693     fprintf(stderr, "onig_search: empty string.\n");
3694 #endif
3695 
3696     if (reg->threshold_len == 0) {
3697       start = end = str = address_for_empty_string;
3698       s = (UChar* )start;
3699       prev = (UChar* )NULL;
3700 
3701       MATCH_ARG_INIT(msa, option, region, start);
3702 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3703       msa.state_check_buff      = (void* )0;
3704       msa.state_check_buff_size = 0;
3705 #endif
3706       MATCH_AND_RETURN_CHECK;
3707       goto mismatch;
3708     }
3709     goto mismatch_no_msa;
3710   }
3711 
3712 #ifdef ONIG_DEBUG_SEARCH
3713   fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3714 	  (int )(end - str), (int )(start - str), (int )(range - str));
3715 #endif
3716 
3717   MATCH_ARG_INIT(msa, option, region, orig_start);
3718 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3719   {
3720     int offset = (MIN(start, range) - str);
3721     STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3722   }
3723 #endif
3724 
3725   s = (UChar* )start;
3726   if (range > start) {   /* forward search */
3727     if (s > str)
3728       prev = onigenc_get_prev_char_head(reg->enc, str, s);
3729     else
3730       prev = (UChar* )NULL;
3731 
3732     if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3733       UChar *sch_range, *low, *high, *low_prev;
3734 
3735       sch_range = (UChar* )range;
3736       if (reg->dmax != 0) {
3737 	if (reg->dmax == ONIG_INFINITE_DISTANCE)
3738 	  sch_range = (UChar* )end;
3739 	else {
3740 	  sch_range += reg->dmax;
3741 	  if (sch_range > end) sch_range = (UChar* )end;
3742 	}
3743       }
3744 
3745       if ((end - start) < reg->threshold_len)
3746         goto mismatch;
3747 
3748       if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3749 	do {
3750 	  if (! forward_search_range(reg, str, end, s, sch_range,
3751 				     &low, &high, &low_prev)) goto mismatch;
3752 	  if (s < low) {
3753 	    s    = low;
3754 	    prev = low_prev;
3755 	  }
3756 	  while (s <= high) {
3757 	    MATCH_AND_RETURN_CHECK;
3758 	    prev = s;
3759 	    s += enc_len(reg->enc, s);
3760 	  }
3761 	} while (s < range);
3762 	goto mismatch;
3763       }
3764       else { /* check only. */
3765 	if (! forward_search_range(reg, str, end, s, sch_range,
3766 				   &low, &high, (UChar** )NULL)) goto mismatch;
3767 
3768         if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3769           do {
3770             MATCH_AND_RETURN_CHECK;
3771             prev = s;
3772             s += enc_len(reg->enc, s);
3773 
3774             while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3775               prev = s;
3776               s += enc_len(reg->enc, s);
3777             }
3778           } while (s < range);
3779           goto mismatch;
3780         }
3781       }
3782     }
3783 
3784     do {
3785       MATCH_AND_RETURN_CHECK;
3786       prev = s;
3787       s += enc_len(reg->enc, s);
3788     } while (s < range);
3789 
3790     if (s == range) { /* because empty match with /$/. */
3791       MATCH_AND_RETURN_CHECK;
3792     }
3793   }
3794   else {  /* backward search */
3795     if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3796       UChar *low, *high, *adjrange, *sch_start;
3797 
3798       if (range < end)
3799 	adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3800       else
3801 	adjrange = (UChar* )end;
3802 
3803       if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3804 	  (end - range) >= reg->threshold_len) {
3805 	do {
3806 	  sch_start = s + reg->dmax;
3807 	  if (sch_start > end) sch_start = (UChar* )end;
3808 	  if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3809 				    &low, &high) <= 0)
3810 	    goto mismatch;
3811 
3812 	  if (s > high)
3813 	    s = high;
3814 
3815 	  while (s >= low) {
3816 	    prev = onigenc_get_prev_char_head(reg->enc, str, s);
3817 	    MATCH_AND_RETURN_CHECK;
3818 	    s = prev;
3819 	  }
3820 	} while (s >= range);
3821 	goto mismatch;
3822       }
3823       else { /* check only. */
3824 	if ((end - range) < reg->threshold_len) goto mismatch;
3825 
3826 	sch_start = s;
3827 	if (reg->dmax != 0) {
3828 	  if (reg->dmax == ONIG_INFINITE_DISTANCE)
3829 	    sch_start = (UChar* )end;
3830 	  else {
3831 	    sch_start += reg->dmax;
3832 	    if (sch_start > end) sch_start = (UChar* )end;
3833 	    else
3834 	      sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3835 						    start, sch_start);
3836 	  }
3837 	}
3838 	if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3839 				  &low, &high) <= 0) goto mismatch;
3840       }
3841     }
3842 
3843     do {
3844       prev = onigenc_get_prev_char_head(reg->enc, str, s);
3845       MATCH_AND_RETURN_CHECK;
3846       s = prev;
3847     } while (s >= range);
3848   }
3849 
3850  mismatch:
3851 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3852   if (IS_FIND_LONGEST(reg->options)) {
3853     if (msa.best_len >= 0) {
3854       s = msa.best_s;
3855       goto match;
3856     }
3857   }
3858 #endif
3859   r = ONIG_MISMATCH;
3860 
3861  finish:
3862   MATCH_ARG_FREE(msa);
3863   ONIG_STATE_DEC_THREAD(reg);
3864 
3865   /* If result is mismatch and no FIND_NOT_EMPTY option,
3866      then the region is not setted in match_at(). */
3867   if (IS_FIND_NOT_EMPTY(reg->options) && region
3868 #ifdef USE_POSIX_REGION_OPTION
3869       && !IS_POSIX_REGION(option)
3870 #endif
3871       ) {
3872     onig_region_clear(region);
3873   }
3874 
3875 #ifdef ONIG_DEBUG
3876   if (r != ONIG_MISMATCH)
3877     fprintf(stderr, "onig_search: error %d\n", r);
3878 #endif
3879   return r;
3880 
3881  mismatch_no_msa:
3882   r = ONIG_MISMATCH;
3883  finish_no_msa:
3884   ONIG_STATE_DEC_THREAD(reg);
3885 #ifdef ONIG_DEBUG
3886   if (r != ONIG_MISMATCH)
3887     fprintf(stderr, "onig_search: error %d\n", r);
3888 #endif
3889   return r;
3890 
3891  match:
3892   ONIG_STATE_DEC_THREAD(reg);
3893   MATCH_ARG_FREE(msa);
3894   return s - str;
3895 }
3896 
3897 extern OnigEncoding
onig_get_encoding(regex_t * reg)3898 onig_get_encoding(regex_t* reg)
3899 {
3900   return reg->enc;
3901 }
3902 
3903 extern OnigOptionType
onig_get_options(regex_t * reg)3904 onig_get_options(regex_t* reg)
3905 {
3906   return reg->options;
3907 }
3908 
3909 extern  OnigAmbigType
onig_get_ambig_flag(regex_t * reg)3910 onig_get_ambig_flag(regex_t* reg)
3911 {
3912   return reg->ambig_flag;
3913 }
3914 
3915 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)3916 onig_get_syntax(regex_t* reg)
3917 {
3918   return reg->syntax;
3919 }
3920 
3921 extern int
onig_number_of_captures(regex_t * reg)3922 onig_number_of_captures(regex_t* reg)
3923 {
3924   return reg->num_mem;
3925 }
3926 
3927 extern int
onig_number_of_capture_histories(regex_t * reg)3928 onig_number_of_capture_histories(regex_t* reg)
3929 {
3930 #ifdef USE_CAPTURE_HISTORY
3931   int i, n;
3932 
3933   n = 0;
3934   for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3935     if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3936       n++;
3937   }
3938   return n;
3939 #else
3940   return 0;
3941 #endif
3942 }
3943 
3944 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)3945 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3946 {
3947   *to = *from;
3948 }
3949 
3950