xref: /PHP-5.4/ext/mbstring/oniguruma/regcomp.c (revision 7aab46a2)
1 /**********************************************************************
2   regcomp.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regparse.h"
31 
32 OnigAmbigType OnigDefaultAmbigFlag =
33   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
34    ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
35 
36 extern OnigAmbigType
onig_get_default_ambig_flag(void)37 onig_get_default_ambig_flag(void)
38 {
39   return OnigDefaultAmbigFlag;
40 }
41 
42 extern int
onig_set_default_ambig_flag(OnigAmbigType ambig_flag)43 onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
44 {
45   OnigDefaultAmbigFlag = ambig_flag;
46   return 0;
47 }
48 
49 
50 static UChar*
k_strdup(UChar * s,UChar * end)51 k_strdup(UChar* s, UChar* end)
52 {
53   int len = end - s;
54 
55   if (len > 0) {
56     UChar* r = (UChar* )xmalloc(len + 1);
57     CHECK_NULL_RETURN(r);
58     xmemcpy(r, s, len);
59     r[len] = (UChar )0;
60     return r;
61   }
62   else return NULL;
63 }
64 
65 /*
66   Caution: node should not be a string node.
67            (s and end member address break)
68 */
69 static void
swap_node(Node * a,Node * b)70 swap_node(Node* a, Node* b)
71 {
72   Node c;
73   c = *a; *a = *b; *b = c;
74 }
75 
76 static OnigDistance
distance_add(OnigDistance d1,OnigDistance d2)77 distance_add(OnigDistance d1, OnigDistance d2)
78 {
79   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
80     return ONIG_INFINITE_DISTANCE;
81   else {
82     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
83     else return ONIG_INFINITE_DISTANCE;
84   }
85 }
86 
87 static OnigDistance
distance_multiply(OnigDistance d,int m)88 distance_multiply(OnigDistance d, int m)
89 {
90   if (m == 0) return 0;
91 
92   if (d < ONIG_INFINITE_DISTANCE / m)
93     return d * m;
94   else
95     return ONIG_INFINITE_DISTANCE;
96 }
97 
98 static int
bitset_is_empty(BitSetRef bs)99 bitset_is_empty(BitSetRef bs)
100 {
101   int i;
102   for (i = 0; i < BITSET_SIZE; i++) {
103     if (bs[i] != 0) return 0;
104   }
105   return 1;
106 }
107 
108 #ifdef ONIG_DEBUG
109 static int
bitset_on_num(BitSetRef bs)110 bitset_on_num(BitSetRef bs)
111 {
112   int i, n;
113 
114   n = 0;
115   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
116     if (BITSET_AT(bs, i)) n++;
117   }
118   return n;
119 }
120 #endif
121 
122 extern int
onig_bbuf_init(BBuf * buf,int size)123 onig_bbuf_init(BBuf* buf, int size)
124 {
125   buf->p = (UChar* )xmalloc(size);
126   if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
127 
128   buf->alloc = size;
129   buf->used  = 0;
130   return 0;
131 }
132 
133 
134 #ifdef USE_SUBEXP_CALL
135 
136 static int
unset_addr_list_init(UnsetAddrList * uslist,int size)137 unset_addr_list_init(UnsetAddrList* uslist, int size)
138 {
139   UnsetAddr* p;
140 
141   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
142   CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
143   uslist->num   = 0;
144   uslist->alloc = size;
145   uslist->us    = p;
146   return 0;
147 }
148 
149 static void
unset_addr_list_end(UnsetAddrList * uslist)150 unset_addr_list_end(UnsetAddrList* uslist)
151 {
152   if (IS_NOT_NULL(uslist->us))
153     xfree(uslist->us);
154 }
155 
156 static int
unset_addr_list_add(UnsetAddrList * uslist,int offset,struct _Node * node)157 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
158 {
159   UnsetAddr* p;
160   int size;
161 
162   if (uslist->num >= uslist->alloc) {
163     size = uslist->alloc * 2;
164     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
165     CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
166     uslist->alloc = size;
167     uslist->us    = p;
168   }
169 
170   uslist->us[uslist->num].offset = offset;
171   uslist->us[uslist->num].target = node;
172   uslist->num++;
173   return 0;
174 }
175 #endif /* USE_SUBEXP_CALL */
176 
177 
178 static int
add_opcode(regex_t * reg,int opcode)179 add_opcode(regex_t* reg, int opcode)
180 {
181   BBUF_ADD1(reg, opcode);
182   return 0;
183 }
184 
185 #ifdef USE_COMBINATION_EXPLOSION_CHECK
186 static int
add_state_check_num(regex_t * reg,int num)187 add_state_check_num(regex_t* reg, int num)
188 {
189   StateCheckNumType n = (StateCheckNumType )num;
190 
191   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
192   return 0;
193 }
194 #endif
195 
196 static int
add_rel_addr(regex_t * reg,int addr)197 add_rel_addr(regex_t* reg, int addr)
198 {
199   RelAddrType ra = (RelAddrType )addr;
200 
201   BBUF_ADD(reg, &ra, SIZE_RELADDR);
202   return 0;
203 }
204 
205 static int
add_abs_addr(regex_t * reg,int addr)206 add_abs_addr(regex_t* reg, int addr)
207 {
208   AbsAddrType ra = (AbsAddrType )addr;
209 
210   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
211   return 0;
212 }
213 
214 static int
add_length(regex_t * reg,int len)215 add_length(regex_t* reg, int len)
216 {
217   LengthType l = (LengthType )len;
218 
219   BBUF_ADD(reg, &l, SIZE_LENGTH);
220   return 0;
221 }
222 
223 static int
add_mem_num(regex_t * reg,int num)224 add_mem_num(regex_t* reg, int num)
225 {
226   MemNumType n = (MemNumType )num;
227 
228   BBUF_ADD(reg, &n, SIZE_MEMNUM);
229   return 0;
230 }
231 
232 static int
add_pointer(regex_t * reg,void * addr)233 add_pointer(regex_t* reg, void* addr)
234 {
235   PointerType ptr = (PointerType )addr;
236 
237   BBUF_ADD(reg, &ptr, SIZE_POINTER);
238   return 0;
239 }
240 
241 static int
add_option(regex_t * reg,OnigOptionType option)242 add_option(regex_t* reg, OnigOptionType option)
243 {
244   BBUF_ADD(reg, &option, SIZE_OPTION);
245   return 0;
246 }
247 
248 static int
add_opcode_rel_addr(regex_t * reg,int opcode,int addr)249 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
250 {
251   int r;
252 
253   r = add_opcode(reg, opcode);
254   if (r) return r;
255   r = add_rel_addr(reg, addr);
256   return r;
257 }
258 
259 static int
add_bytes(regex_t * reg,UChar * bytes,int len)260 add_bytes(regex_t* reg, UChar* bytes, int len)
261 {
262   BBUF_ADD(reg, bytes, len);
263   return 0;
264 }
265 
266 static int
add_bitset(regex_t * reg,BitSetRef bs)267 add_bitset(regex_t* reg, BitSetRef bs)
268 {
269   BBUF_ADD(reg, bs, SIZE_BITSET);
270   return 0;
271 }
272 
273 static int
add_opcode_option(regex_t * reg,int opcode,OnigOptionType option)274 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
275 {
276   int r;
277 
278   r = add_opcode(reg, opcode);
279   if (r) return r;
280   r = add_option(reg, option);
281   return r;
282 }
283 
284 static int compile_length_tree(Node* node, regex_t* reg);
285 static int compile_tree(Node* node, regex_t* reg);
286 
287 
288 #define IS_NEED_STR_LEN_OP_EXACT(op) \
289    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
290     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
291 
292 static int
select_str_opcode(int mb_len,int str_len,int ignore_case)293 select_str_opcode(int mb_len, int str_len, int ignore_case)
294 {
295   int op;
296 
297   if (ignore_case) {
298     switch (str_len) {
299     case 1:  op = OP_EXACT1_IC; break;
300     default: op = OP_EXACTN_IC; break;
301     }
302   }
303   else {
304     switch (mb_len) {
305     case 1:
306       switch (str_len) {
307       case 1:  op = OP_EXACT1; break;
308       case 2:  op = OP_EXACT2; break;
309       case 3:  op = OP_EXACT3; break;
310       case 4:  op = OP_EXACT4; break;
311       case 5:  op = OP_EXACT5; break;
312       default: op = OP_EXACTN; break;
313       }
314       break;
315 
316     case 2:
317       switch (str_len) {
318       case 1:  op = OP_EXACTMB2N1; break;
319       case 2:  op = OP_EXACTMB2N2; break;
320       case 3:  op = OP_EXACTMB2N3; break;
321       default: op = OP_EXACTMB2N;  break;
322       }
323       break;
324 
325     case 3:
326       op = OP_EXACTMB3N;
327       break;
328 
329     default:
330       op = OP_EXACTMBN;
331       break;
332     }
333   }
334   return op;
335 }
336 
337 static int
compile_tree_empty_check(Node * node,regex_t * reg,int empty_info)338 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
339 {
340   int r;
341   int saved_num_null_check = reg->num_null_check;
342 
343   if (empty_info != 0) {
344     r = add_opcode(reg, OP_NULL_CHECK_START);
345     if (r) return r;
346     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
347     if (r) return r;
348     reg->num_null_check++;
349   }
350 
351   r = compile_tree(node, reg);
352   if (r) return r;
353 
354   if (empty_info != 0) {
355     if (empty_info == NQ_TARGET_IS_EMPTY)
356       r = add_opcode(reg, OP_NULL_CHECK_END);
357     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
358       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
359     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
360       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
361 
362     if (r) return r;
363     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
364   }
365   return r;
366 }
367 
368 #ifdef USE_SUBEXP_CALL
369 static int
compile_call(CallNode * node,regex_t * reg)370 compile_call(CallNode* node, regex_t* reg)
371 {
372   int r;
373 
374   r = add_opcode(reg, OP_CALL);
375   if (r) return r;
376   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
377                           node->target);
378   if (r) return r;
379   r = add_abs_addr(reg, 0 /*dummy addr.*/);
380   return r;
381 }
382 #endif
383 
384 static int
compile_tree_n_times(Node * node,int n,regex_t * reg)385 compile_tree_n_times(Node* node, int n, regex_t* reg)
386 {
387   int i, r;
388 
389   for (i = 0; i < n; i++) {
390     r = compile_tree(node, reg);
391     if (r) return r;
392   }
393   return 0;
394 }
395 
396 static int
add_compile_string_length(UChar * s,int mb_len,int str_len,regex_t * reg,int ignore_case)397 add_compile_string_length(UChar* s, int mb_len, int str_len,
398                           regex_t* reg, int ignore_case)
399 {
400   int len;
401   int op = select_str_opcode(mb_len, str_len, ignore_case);
402 
403   len = SIZE_OPCODE;
404 
405   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
406   if (IS_NEED_STR_LEN_OP_EXACT(op))
407     len += SIZE_LENGTH;
408 
409   len += mb_len * str_len;
410   return len;
411 }
412 
413 static int
add_compile_string(UChar * s,int mb_len,int str_len,regex_t * reg,int ignore_case)414 add_compile_string(UChar* s, int mb_len, int str_len,
415                    regex_t* reg, int ignore_case)
416 {
417   int op = select_str_opcode(mb_len, str_len, ignore_case);
418   add_opcode(reg, op);
419 
420   if (op == OP_EXACTMBN)
421     add_length(reg, mb_len);
422 
423   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
424     if (op == OP_EXACTN_IC)
425       add_length(reg, mb_len * str_len);
426     else
427       add_length(reg, str_len);
428   }
429 
430   add_bytes(reg, s, mb_len * str_len);
431   return 0;
432 }
433 
434 
435 static int
compile_length_string_node(Node * node,regex_t * reg)436 compile_length_string_node(Node* node, regex_t* reg)
437 {
438   int rlen, r, len, prev_len, slen, ambig;
439   OnigEncoding enc = reg->enc;
440   UChar *p, *prev;
441   StrNode* sn;
442 
443   sn = &(NSTRING(node));
444   if (sn->end <= sn->s)
445     return 0;
446 
447   ambig = NSTRING_IS_AMBIG(node);
448 
449   p = prev = sn->s;
450   prev_len = enc_len(enc, p);
451   p += prev_len;
452   slen = 1;
453   rlen = 0;
454 
455   for (; p < sn->end; ) {
456     len = enc_len(enc, p);
457     if (len == prev_len) {
458       slen++;
459     }
460     else {
461       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
462       rlen += r;
463       prev = p;
464       slen = 1;
465       prev_len = len;
466     }
467     p += len;
468   }
469   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
470   rlen += r;
471   return rlen;
472 }
473 
474 static int
compile_length_string_raw_node(StrNode * sn,regex_t * reg)475 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
476 {
477   if (sn->end <= sn->s)
478     return 0;
479 
480   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
481 }
482 
483 static int
compile_string_node(Node * node,regex_t * reg)484 compile_string_node(Node* node, regex_t* reg)
485 {
486   int r, len, prev_len, slen, ambig;
487   OnigEncoding enc = reg->enc;
488   UChar *p, *prev, *end;
489   StrNode* sn;
490 
491   sn = &(NSTRING(node));
492   if (sn->end <= sn->s)
493     return 0;
494 
495   end = sn->end;
496   ambig = NSTRING_IS_AMBIG(node);
497 
498   p = prev = sn->s;
499   prev_len = enc_len(enc, p);
500   p += prev_len;
501   slen = 1;
502 
503   for (; p < end; ) {
504     len = enc_len(enc, p);
505     if (len == prev_len) {
506       slen++;
507     }
508     else {
509       r = add_compile_string(prev, prev_len, slen, reg, ambig);
510       if (r) return r;
511 
512       prev  = p;
513       slen  = 1;
514       prev_len = len;
515     }
516 
517     p += len;
518   }
519   return add_compile_string(prev, prev_len, slen, reg, ambig);
520 }
521 
522 static int
compile_string_raw_node(StrNode * sn,regex_t * reg)523 compile_string_raw_node(StrNode* sn, regex_t* reg)
524 {
525   if (sn->end <= sn->s)
526     return 0;
527 
528   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
529 }
530 
531 static int
add_multi_byte_cclass(BBuf * mbuf,regex_t * reg)532 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
533 {
534 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
535   add_length(reg, mbuf->used);
536   return add_bytes(reg, mbuf->p, mbuf->used);
537 #else
538   static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
539 
540   int r, pad_size;
541   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
542 
543   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
544   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
545   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
546 
547   r = add_bytes(reg, mbuf->p, mbuf->used);
548 
549   /* padding for return value from compile_length_cclass_node() to be fix. */
550   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
551   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
552   return r;
553 #endif
554 }
555 
556 static int
compile_length_cclass_node(CClassNode * cc,regex_t * reg)557 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
558 {
559   int len;
560 
561   if (IS_CCLASS_SHARE(cc)) {
562     len = SIZE_OPCODE + SIZE_POINTER;
563     return len;
564   }
565 
566   if (IS_NULL(cc->mbuf)) {
567     len = SIZE_OPCODE + SIZE_BITSET;
568   }
569   else {
570     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
571       len = SIZE_OPCODE;
572     }
573     else {
574       len = SIZE_OPCODE + SIZE_BITSET;
575     }
576 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
577     len += SIZE_LENGTH + cc->mbuf->used;
578 #else
579     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
580 #endif
581   }
582 
583   return len;
584 }
585 
586 static int
compile_cclass_node(CClassNode * cc,regex_t * reg)587 compile_cclass_node(CClassNode* cc, regex_t* reg)
588 {
589   int r;
590 
591   if (IS_CCLASS_SHARE(cc)) {
592     add_opcode(reg, OP_CCLASS_NODE);
593     r = add_pointer(reg, cc);
594     return r;
595   }
596 
597   if (IS_NULL(cc->mbuf)) {
598     if (IS_CCLASS_NOT(cc))
599       add_opcode(reg, OP_CCLASS_NOT);
600     else
601       add_opcode(reg, OP_CCLASS);
602 
603     r = add_bitset(reg, cc->bs);
604   }
605   else {
606     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
607       if (IS_CCLASS_NOT(cc))
608         add_opcode(reg, OP_CCLASS_MB_NOT);
609       else
610         add_opcode(reg, OP_CCLASS_MB);
611 
612       r = add_multi_byte_cclass(cc->mbuf, reg);
613     }
614     else {
615       if (IS_CCLASS_NOT(cc))
616         add_opcode(reg, OP_CCLASS_MIX_NOT);
617       else
618         add_opcode(reg, OP_CCLASS_MIX);
619 
620       r = add_bitset(reg, cc->bs);
621       if (r) return r;
622       r = add_multi_byte_cclass(cc->mbuf, reg);
623     }
624   }
625 
626   return r;
627 }
628 
629 static int
entry_repeat_range(regex_t * reg,int id,int lower,int upper)630 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
631 {
632 #define REPEAT_RANGE_ALLOC  4
633 
634   OnigRepeatRange* p;
635 
636   if (reg->repeat_range_alloc == 0) {
637     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
638     CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
639     reg->repeat_range = p;
640     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
641   }
642   else if (reg->repeat_range_alloc <= id) {
643     int n;
644     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
645     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
646                                     sizeof(OnigRepeatRange) * n);
647     CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
648     reg->repeat_range = p;
649     reg->repeat_range_alloc = n;
650   }
651   else {
652     p = reg->repeat_range;
653   }
654 
655   p[id].lower = lower;
656   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
657   return 0;
658 }
659 
660 static int
compile_range_repeat_node(QuantifierNode * qn,int target_len,int empty_info,regex_t * reg)661 compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
662                           regex_t* reg)
663 {
664   int r;
665   int num_repeat = reg->num_repeat;
666 
667   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
668   if (r) return r;
669   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
670   reg->num_repeat++;
671   if (r) return r;
672   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
673   if (r) return r;
674 
675   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
676   if (r) return r;
677 
678   r = compile_tree_empty_check(qn->target, reg, empty_info);
679   if (r) return r;
680 
681   if (
682 #ifdef USE_SUBEXP_CALL
683       reg->num_call > 0 ||
684 #endif
685       IS_QUANTIFIER_IN_REPEAT(qn)) {
686     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
687   }
688   else {
689     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
690   }
691   if (r) return r;
692   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
693   return r;
694 }
695 
696 static int
is_anychar_star_quantifier(QuantifierNode * qn)697 is_anychar_star_quantifier(QuantifierNode* qn)
698 {
699   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
700       NTYPE(qn->target) == N_ANYCHAR)
701     return 1;
702   else
703     return 0;
704 }
705 
706 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
707 #define CKN_ON   (ckn > 0)
708 
709 #ifdef USE_COMBINATION_EXPLOSION_CHECK
710 
711 static int
compile_length_quantifier_node(QuantifierNode * qn,regex_t * reg)712 compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
713 {
714   int len, mod_tlen, cklen;
715   int ckn;
716   int infinite = IS_REPEAT_INFINITE(qn->upper);
717   int empty_info = qn->target_empty_info;
718   int tlen = compile_length_tree(qn->target, reg);
719 
720   if (tlen < 0) return tlen;
721 
722   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
723 
724   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
725 
726   /* anychar repeat */
727   if (NTYPE(qn->target) == N_ANYCHAR) {
728     if (qn->greedy && infinite) {
729       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
730         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
731       else
732         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
733     }
734   }
735 
736   if (empty_info != 0)
737     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
738   else
739     mod_tlen = tlen;
740 
741   if (infinite && qn->lower <= 1) {
742     if (qn->greedy) {
743       if (qn->lower == 1)
744 	len = SIZE_OP_JUMP;
745       else
746 	len = 0;
747 
748       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
749     }
750     else {
751       if (qn->lower == 0)
752 	len = SIZE_OP_JUMP;
753       else
754 	len = 0;
755 
756       len += mod_tlen + SIZE_OP_PUSH + cklen;
757     }
758   }
759   else if (qn->upper == 0) {
760     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
761       len = SIZE_OP_JUMP + tlen;
762     else
763       len = 0;
764   }
765   else if (qn->upper == 1 && qn->greedy) {
766     if (qn->lower == 0) {
767       if (CKN_ON) {
768 	len = SIZE_OP_STATE_CHECK_PUSH + tlen;
769       }
770       else {
771 	len = SIZE_OP_PUSH + tlen;
772       }
773     }
774     else {
775       len = tlen;
776     }
777   }
778   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
779     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
780   }
781   else {
782     len = SIZE_OP_REPEAT_INC
783         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
784     if (CKN_ON)
785       len += SIZE_OP_STATE_CHECK;
786   }
787 
788   return len;
789 }
790 
791 static int
compile_quantifier_node(QuantifierNode * qn,regex_t * reg)792 compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
793 {
794   int r, mod_tlen;
795   int ckn;
796   int infinite = IS_REPEAT_INFINITE(qn->upper);
797   int empty_info = qn->target_empty_info;
798   int tlen = compile_length_tree(qn->target, reg);
799 
800   if (tlen < 0) return tlen;
801 
802   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
803 
804   if (is_anychar_star_quantifier(qn)) {
805     r = compile_tree_n_times(qn->target, qn->lower, reg);
806     if (r) return r;
807     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
808       if (IS_MULTILINE(reg->options))
809 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
810       else
811 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
812       if (r) return r;
813       if (CKN_ON) {
814 	r = add_state_check_num(reg, ckn);
815 	if (r) return r;
816       }
817 
818       return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
819     }
820     else {
821       if (IS_MULTILINE(reg->options)) {
822 	r = add_opcode(reg, (CKN_ON ?
823 			       OP_STATE_CHECK_ANYCHAR_ML_STAR
824 			     : OP_ANYCHAR_ML_STAR));
825       }
826       else {
827 	r = add_opcode(reg, (CKN_ON ?
828 			       OP_STATE_CHECK_ANYCHAR_STAR
829 			     : OP_ANYCHAR_STAR));
830       }
831       if (r) return r;
832       if (CKN_ON)
833 	r = add_state_check_num(reg, ckn);
834 
835       return r;
836     }
837   }
838 
839   if (empty_info != 0)
840     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
841   else
842     mod_tlen = tlen;
843 
844   if (infinite && qn->lower <= 1) {
845     if (qn->greedy) {
846       if (qn->lower == 1) {
847 	r = add_opcode_rel_addr(reg, OP_JUMP,
848 			(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
849 	if (r) return r;
850       }
851 
852       if (CKN_ON) {
853 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
854 	if (r) return r;
855 	r = add_state_check_num(reg, ckn);
856 	if (r) return r;
857 	r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
858       }
859       else {
860 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
861       }
862       if (r) return r;
863       r = compile_tree_empty_check(qn->target, reg, empty_info);
864       if (r) return r;
865       r = add_opcode_rel_addr(reg, OP_JUMP,
866 	      -(mod_tlen + (int )SIZE_OP_JUMP
867 		+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
868     }
869     else {
870       if (qn->lower == 0) {
871 	r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
872 	if (r) return r;
873       }
874       r = compile_tree_empty_check(qn->target, reg, empty_info);
875       if (r) return r;
876       if (CKN_ON) {
877 	r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
878 	if (r) return r;
879 	r = add_state_check_num(reg, ckn);
880 	if (r) return r;
881 	r = add_rel_addr(reg,
882 		 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
883       }
884       else
885 	r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
886     }
887   }
888   else if (qn->upper == 0) {
889     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
890       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
891       if (r) return r;
892       r = compile_tree(qn->target, reg);
893     }
894     else
895       r = 0;
896   }
897   else if (qn->upper == 1 && qn->greedy) {
898     if (qn->lower == 0) {
899       if (CKN_ON) {
900 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
901 	if (r) return r;
902 	r = add_state_check_num(reg, ckn);
903 	if (r) return r;
904 	r = add_rel_addr(reg, tlen);
905       }
906       else {
907 	r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
908       }
909       if (r) return r;
910     }
911 
912     r = compile_tree(qn->target, reg);
913   }
914   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
915     if (CKN_ON) {
916       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
917       if (r) return r;
918       r = add_state_check_num(reg, ckn);
919       if (r) return r;
920       r = add_rel_addr(reg, SIZE_OP_JUMP);
921     }
922     else {
923       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
924     }
925 
926     if (r) return r;
927     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
928     if (r) return r;
929     r = compile_tree(qn->target, reg);
930   }
931   else {
932     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
933     if (CKN_ON) {
934       if (r) return r;
935       r = add_opcode(reg, OP_STATE_CHECK);
936       if (r) return r;
937       r = add_state_check_num(reg, ckn);
938     }
939   }
940   return r;
941 }
942 
943 #else /* USE_COMBINATION_EXPLOSION_CHECK */
944 
945 static int
compile_length_quantifier_node(QuantifierNode * qn,regex_t * reg)946 compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
947 {
948   int len, mod_tlen;
949   int infinite = IS_REPEAT_INFINITE(qn->upper);
950   int empty_info = qn->target_empty_info;
951   int tlen = compile_length_tree(qn->target, reg);
952 
953   if (tlen < 0) return tlen;
954 
955   /* anychar repeat */
956   if (NTYPE(qn->target) == N_ANYCHAR) {
957     if (qn->greedy && infinite) {
958       if (IS_NOT_NULL(qn->next_head_exact))
959         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
960       else
961         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
962     }
963   }
964 
965   if (empty_info != 0)
966     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
967   else
968     mod_tlen = tlen;
969 
970   if (infinite &&
971       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
972     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
973       len = SIZE_OP_JUMP;
974     }
975     else {
976       len = tlen * qn->lower;
977     }
978 
979     if (qn->greedy) {
980       if (IS_NOT_NULL(qn->head_exact))
981 	len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
982       else if (IS_NOT_NULL(qn->next_head_exact))
983 	len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
984       else
985 	len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
986     }
987     else
988       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
989   }
990   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
991     len = SIZE_OP_JUMP + tlen;
992   }
993   else if (!infinite && qn->greedy &&
994            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
995                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
996     len = tlen * qn->lower;
997     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
998   }
999   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1000     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1001   }
1002   else {
1003     len = SIZE_OP_REPEAT_INC
1004         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1005   }
1006 
1007   return len;
1008 }
1009 
1010 static int
compile_quantifier_node(QuantifierNode * qn,regex_t * reg)1011 compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
1012 {
1013   int i, r, mod_tlen;
1014   int infinite = IS_REPEAT_INFINITE(qn->upper);
1015   int empty_info = qn->target_empty_info;
1016   int tlen = compile_length_tree(qn->target, reg);
1017 
1018   if (tlen < 0) return tlen;
1019 
1020   if (is_anychar_star_quantifier(qn)) {
1021     r = compile_tree_n_times(qn->target, qn->lower, reg);
1022     if (r) return r;
1023     if (IS_NOT_NULL(qn->next_head_exact)) {
1024       if (IS_MULTILINE(reg->options))
1025 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1026       else
1027 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1028       if (r) return r;
1029       return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
1030     }
1031     else {
1032       if (IS_MULTILINE(reg->options))
1033 	return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1034       else
1035 	return add_opcode(reg, OP_ANYCHAR_STAR);
1036     }
1037   }
1038 
1039   if (empty_info != 0)
1040     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1041   else
1042     mod_tlen = tlen;
1043 
1044   if (infinite &&
1045       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1046     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1047       if (qn->greedy) {
1048 	if (IS_NOT_NULL(qn->head_exact))
1049 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1050 	else if (IS_NOT_NULL(qn->next_head_exact))
1051 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1052 	else
1053 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1054       }
1055       else {
1056 	r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1057       }
1058       if (r) return r;
1059     }
1060     else {
1061       r = compile_tree_n_times(qn->target, qn->lower, reg);
1062       if (r) return r;
1063     }
1064 
1065     if (qn->greedy) {
1066       if (IS_NOT_NULL(qn->head_exact)) {
1067 	r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1068 			     mod_tlen + SIZE_OP_JUMP);
1069 	if (r) return r;
1070 	add_bytes(reg, NSTRING(qn->head_exact).s, 1);
1071 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1072 	if (r) return r;
1073 	r = add_opcode_rel_addr(reg, OP_JUMP,
1074 	-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1075       }
1076       else if (IS_NOT_NULL(qn->next_head_exact)) {
1077 	r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1078 				mod_tlen + SIZE_OP_JUMP);
1079 	if (r) return r;
1080 	add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
1081 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1082 	if (r) return r;
1083 	r = add_opcode_rel_addr(reg, OP_JUMP,
1084           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1085       }
1086       else {
1087 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1088 	if (r) return r;
1089 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1090 	if (r) return r;
1091 	r = add_opcode_rel_addr(reg, OP_JUMP,
1092 		     -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1093       }
1094     }
1095     else {
1096       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1097       if (r) return r;
1098       r = compile_tree_empty_check(qn->target, reg, empty_info);
1099       if (r) return r;
1100       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1101     }
1102   }
1103   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1104     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1105     if (r) return r;
1106     r = compile_tree(qn->target, reg);
1107   }
1108   else if (!infinite && qn->greedy &&
1109            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1110                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1111     int n = qn->upper - qn->lower;
1112 
1113     r = compile_tree_n_times(qn->target, qn->lower, reg);
1114     if (r) return r;
1115 
1116     for (i = 0; i < n; i++) {
1117       r = add_opcode_rel_addr(reg, OP_PUSH,
1118 			   (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1119       if (r) return r;
1120       r = compile_tree(qn->target, reg);
1121       if (r) return r;
1122     }
1123   }
1124   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1125     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1126     if (r) return r;
1127     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1128     if (r) return r;
1129     r = compile_tree(qn->target, reg);
1130   }
1131   else {
1132     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1133   }
1134   return r;
1135 }
1136 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1137 
1138 static int
compile_length_option_node(EffectNode * node,regex_t * reg)1139 compile_length_option_node(EffectNode* node, regex_t* reg)
1140 {
1141   int tlen;
1142   OnigOptionType prev = reg->options;
1143 
1144   reg->options = node->option;
1145   tlen = compile_length_tree(node->target, reg);
1146   reg->options = prev;
1147 
1148   if (tlen < 0) return tlen;
1149 
1150   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1151     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1152            + tlen + SIZE_OP_SET_OPTION;
1153   }
1154   else
1155     return tlen;
1156 }
1157 
1158 static int
compile_option_node(EffectNode * node,regex_t * reg)1159 compile_option_node(EffectNode* node, regex_t* reg)
1160 {
1161   int r;
1162   OnigOptionType prev = reg->options;
1163 
1164   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1165     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1166     if (r) return r;
1167     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1168     if (r) return r;
1169     r = add_opcode(reg, OP_FAIL);
1170     if (r) return r;
1171   }
1172 
1173   reg->options = node->option;
1174   r = compile_tree(node->target, reg);
1175   reg->options = prev;
1176 
1177   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1178     if (r) return r;
1179     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1180   }
1181   return r;
1182 }
1183 
1184 static int
compile_length_effect_node(EffectNode * node,regex_t * reg)1185 compile_length_effect_node(EffectNode* node, regex_t* reg)
1186 {
1187   int len;
1188   int tlen;
1189 
1190   if (node->type == EFFECT_OPTION)
1191     return compile_length_option_node(node, reg);
1192 
1193   if (node->target) {
1194     tlen = compile_length_tree(node->target, reg);
1195     if (tlen < 0) return tlen;
1196   }
1197   else
1198     tlen = 0;
1199 
1200   switch (node->type) {
1201   case EFFECT_MEMORY:
1202 #ifdef USE_SUBEXP_CALL
1203     if (IS_EFFECT_CALLED(node)) {
1204       len = SIZE_OP_MEMORY_START_PUSH + tlen
1205 	  + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1206       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1207 	len += (IS_EFFECT_RECURSION(node)
1208 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1209       else
1210 	len += (IS_EFFECT_RECURSION(node)
1211 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1212     }
1213     else
1214 #endif
1215     {
1216       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1217 	len = SIZE_OP_MEMORY_START_PUSH;
1218       else
1219 	len = SIZE_OP_MEMORY_START;
1220 
1221       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1222 		     ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1223     }
1224     break;
1225 
1226   case EFFECT_STOP_BACKTRACK:
1227     if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
1228       QuantifierNode* qn = &NQUANTIFIER(node->target);
1229       tlen = compile_length_tree(qn->target, reg);
1230       if (tlen < 0) return tlen;
1231 
1232       len = tlen * qn->lower
1233 	  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1234     }
1235     else {
1236       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1237     }
1238     break;
1239 
1240   default:
1241     return ONIGERR_TYPE_BUG;
1242     break;
1243   }
1244 
1245   return len;
1246 }
1247 
1248 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1249 
1250 static int
compile_effect_node(EffectNode * node,regex_t * reg)1251 compile_effect_node(EffectNode* node, regex_t* reg)
1252 {
1253   int r, len;
1254 
1255   if (node->type == EFFECT_OPTION)
1256     return compile_option_node(node, reg);
1257 
1258   switch (node->type) {
1259   case EFFECT_MEMORY:
1260 #ifdef USE_SUBEXP_CALL
1261     if (IS_EFFECT_CALLED(node)) {
1262       r = add_opcode(reg, OP_CALL);
1263       if (r) return r;
1264       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1265       node->state |= NST_ADDR_FIXED;
1266       r = add_abs_addr(reg, (int )node->call_addr);
1267       if (r) return r;
1268       len = compile_length_tree(node->target, reg);
1269       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1270       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1271 	len += (IS_EFFECT_RECURSION(node)
1272 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1273       else
1274 	len += (IS_EFFECT_RECURSION(node)
1275 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1276 
1277       r = add_opcode_rel_addr(reg, OP_JUMP, len);
1278       if (r) return r;
1279     }
1280 #endif
1281     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1282       r = add_opcode(reg, OP_MEMORY_START_PUSH);
1283     else
1284       r = add_opcode(reg, OP_MEMORY_START);
1285     if (r) return r;
1286     r = add_mem_num(reg, node->regnum);
1287     if (r) return r;
1288     r = compile_tree(node->target, reg);
1289     if (r) return r;
1290 #ifdef USE_SUBEXP_CALL
1291     if (IS_EFFECT_CALLED(node)) {
1292       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1293 	r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
1294 			     ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1295       else
1296 	r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
1297 			     ? OP_MEMORY_END_REC : OP_MEMORY_END));
1298 
1299       if (r) return r;
1300       r = add_mem_num(reg, node->regnum);
1301       if (r) return r;
1302       r = add_opcode(reg, OP_RETURN);
1303     }
1304     else
1305 #endif
1306     {
1307       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1308 	r = add_opcode(reg, OP_MEMORY_END_PUSH);
1309       else
1310 	r = add_opcode(reg, OP_MEMORY_END);
1311       if (r) return r;
1312       r = add_mem_num(reg, node->regnum);
1313     }
1314     break;
1315 
1316   case EFFECT_STOP_BACKTRACK:
1317     if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
1318       QuantifierNode* qn = &NQUANTIFIER(node->target);
1319       r = compile_tree_n_times(qn->target, qn->lower, reg);
1320       if (r) return r;
1321 
1322       len = compile_length_tree(qn->target, reg);
1323       if (len < 0) return len;
1324 
1325       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1326       if (r) return r;
1327       r = compile_tree(qn->target, reg);
1328       if (r) return r;
1329       r = add_opcode(reg, OP_POP);
1330       if (r) return r;
1331       r = add_opcode_rel_addr(reg, OP_JUMP,
1332 	 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1333     }
1334     else {
1335       r = add_opcode(reg, OP_PUSH_STOP_BT);
1336       if (r) return r;
1337       r = compile_tree(node->target, reg);
1338       if (r) return r;
1339       r = add_opcode(reg, OP_POP_STOP_BT);
1340     }
1341     break;
1342 
1343   default:
1344     return ONIGERR_TYPE_BUG;
1345     break;
1346   }
1347 
1348   return r;
1349 }
1350 
1351 static int
compile_length_anchor_node(AnchorNode * node,regex_t * reg)1352 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1353 {
1354   int len;
1355   int tlen = 0;
1356 
1357   if (node->target) {
1358     tlen = compile_length_tree(node->target, reg);
1359     if (tlen < 0) return tlen;
1360   }
1361 
1362   switch (node->type) {
1363   case ANCHOR_PREC_READ:
1364     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1365     break;
1366   case ANCHOR_PREC_READ_NOT:
1367     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1368     break;
1369   case ANCHOR_LOOK_BEHIND:
1370     len = SIZE_OP_LOOK_BEHIND + tlen;
1371     break;
1372   case ANCHOR_LOOK_BEHIND_NOT:
1373     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1374     break;
1375 
1376   default:
1377     len = SIZE_OPCODE;
1378     break;
1379   }
1380 
1381   return len;
1382 }
1383 
1384 static int
compile_anchor_node(AnchorNode * node,regex_t * reg)1385 compile_anchor_node(AnchorNode* node, regex_t* reg)
1386 {
1387   int r, len;
1388 
1389   switch (node->type) {
1390   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1391   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1392   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1393   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1394   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1395   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1396 
1397   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
1398   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1399 #ifdef USE_WORD_BEGIN_END
1400   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
1401   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
1402 #endif
1403 
1404   case ANCHOR_PREC_READ:
1405     r = add_opcode(reg, OP_PUSH_POS);
1406     if (r) return r;
1407     r = compile_tree(node->target, reg);
1408     if (r) return r;
1409     r = add_opcode(reg, OP_POP_POS);
1410     break;
1411 
1412   case ANCHOR_PREC_READ_NOT:
1413     len = compile_length_tree(node->target, reg);
1414     if (len < 0) return len;
1415     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1416     if (r) return r;
1417     r = compile_tree(node->target, reg);
1418     if (r) return r;
1419     r = add_opcode(reg, OP_FAIL_POS);
1420     break;
1421 
1422   case ANCHOR_LOOK_BEHIND:
1423     {
1424       int n;
1425       r = add_opcode(reg, OP_LOOK_BEHIND);
1426       if (r) return r;
1427       if (node->char_len < 0) {
1428 	r = get_char_length_tree(node->target, reg, &n);
1429 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1430       }
1431       else
1432 	n = node->char_len;
1433       r = add_length(reg, n);
1434       if (r) return r;
1435       r = compile_tree(node->target, reg);
1436     }
1437     break;
1438 
1439   case ANCHOR_LOOK_BEHIND_NOT:
1440     {
1441       int n;
1442       len = compile_length_tree(node->target, reg);
1443       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1444 			   len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1445       if (r) return r;
1446       if (node->char_len < 0) {
1447 	r = get_char_length_tree(node->target, reg, &n);
1448 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1449       }
1450       else
1451 	n = node->char_len;
1452       r = add_length(reg, n);
1453       if (r) return r;
1454       r = compile_tree(node->target, reg);
1455       if (r) return r;
1456       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1457     }
1458     break;
1459 
1460   default:
1461     return ONIGERR_TYPE_BUG;
1462     break;
1463   }
1464 
1465   return r;
1466 }
1467 
1468 static int
compile_length_tree(Node * node,regex_t * reg)1469 compile_length_tree(Node* node, regex_t* reg)
1470 {
1471   int len, type, r;
1472 
1473   type = NTYPE(node);
1474   switch (type) {
1475   case N_LIST:
1476     len = 0;
1477     do {
1478       r = compile_length_tree(NCONS(node).left, reg);
1479       if (r < 0) return r;
1480       len += r;
1481     } while (IS_NOT_NULL(node = NCONS(node).right));
1482     r = len;
1483     break;
1484 
1485   case N_ALT:
1486     {
1487       int n;
1488 
1489       n = r = 0;
1490       do {
1491 	r += compile_length_tree(NCONS(node).left, reg);
1492 	n++;
1493       } while (IS_NOT_NULL(node = NCONS(node).right));
1494       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1495     }
1496     break;
1497 
1498   case N_STRING:
1499     if (NSTRING_IS_RAW(node))
1500       r = compile_length_string_raw_node(&(NSTRING(node)), reg);
1501     else
1502       r = compile_length_string_node(node, reg);
1503     break;
1504 
1505   case N_CCLASS:
1506     r = compile_length_cclass_node(&(NCCLASS(node)), reg);
1507     break;
1508 
1509   case N_CTYPE:
1510   case N_ANYCHAR:
1511     r = SIZE_OPCODE;
1512     break;
1513 
1514   case N_BACKREF:
1515     {
1516       BackrefNode* br = &(NBACKREF(node));
1517 
1518 #ifdef USE_BACKREF_AT_LEVEL
1519       if (IS_BACKREF_NEST_LEVEL(br)) {
1520         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1521             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1522       }
1523       else
1524 #endif
1525       if (br->back_num == 1) {
1526 	r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1527 	     ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1528       }
1529       else {
1530 	r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1531       }
1532     }
1533     break;
1534 
1535 #ifdef USE_SUBEXP_CALL
1536   case N_CALL:
1537     r = SIZE_OP_CALL;
1538     break;
1539 #endif
1540 
1541   case N_QUANTIFIER:
1542     r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
1543     break;
1544 
1545   case N_EFFECT:
1546     r = compile_length_effect_node(&NEFFECT(node), reg);
1547     break;
1548 
1549   case N_ANCHOR:
1550     r = compile_length_anchor_node(&(NANCHOR(node)), reg);
1551     break;
1552 
1553   default:
1554     return ONIGERR_TYPE_BUG;
1555     break;
1556   }
1557 
1558   return r;
1559 }
1560 
1561 static int
compile_tree(Node * node,regex_t * reg)1562 compile_tree(Node* node, regex_t* reg)
1563 {
1564   int n, type, len, pos, r = 0;
1565 
1566   type = NTYPE(node);
1567   switch (type) {
1568   case N_LIST:
1569     do {
1570       r = compile_tree(NCONS(node).left, reg);
1571     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
1572     break;
1573 
1574   case N_ALT:
1575     {
1576       Node* x = node;
1577       len = 0;
1578       do {
1579 	len += compile_length_tree(NCONS(x).left, reg);
1580 	if (NCONS(x).right != NULL) {
1581 	  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1582 	}
1583       } while (IS_NOT_NULL(x = NCONS(x).right));
1584       pos = reg->used + len;  /* goal position */
1585 
1586       do {
1587 	len = compile_length_tree(NCONS(node).left, reg);
1588 	if (IS_NOT_NULL(NCONS(node).right)) {
1589 	  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1590 	  if (r) break;
1591 	}
1592 	r = compile_tree(NCONS(node).left, reg);
1593 	if (r) break;
1594 	if (IS_NOT_NULL(NCONS(node).right)) {
1595 	  len = pos - (reg->used + SIZE_OP_JUMP);
1596 	  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1597 	  if (r) break;
1598 	}
1599       } while (IS_NOT_NULL(node = NCONS(node).right));
1600     }
1601     break;
1602 
1603   case N_STRING:
1604     if (NSTRING_IS_RAW(node))
1605       r = compile_string_raw_node(&(NSTRING(node)), reg);
1606     else
1607       r = compile_string_node(node, reg);
1608     break;
1609 
1610   case N_CCLASS:
1611     r = compile_cclass_node(&(NCCLASS(node)), reg);
1612     break;
1613 
1614   case N_CTYPE:
1615     {
1616       int op;
1617 
1618       switch (NCTYPE(node).type) {
1619       case CTYPE_WORD:            op = OP_WORD;           break;
1620       case CTYPE_NOT_WORD:        op = OP_NOT_WORD;       break;
1621       default:
1622 	return ONIGERR_TYPE_BUG;
1623 	break;
1624       }
1625       r = add_opcode(reg, op);
1626     }
1627     break;
1628 
1629   case N_ANYCHAR:
1630     if (IS_MULTILINE(reg->options))
1631       r = add_opcode(reg, OP_ANYCHAR_ML);
1632     else
1633       r = add_opcode(reg, OP_ANYCHAR);
1634     break;
1635 
1636   case N_BACKREF:
1637     {
1638       BackrefNode* br = &(NBACKREF(node));
1639 
1640 #ifdef USE_BACKREF_AT_LEVEL
1641       if (IS_BACKREF_NEST_LEVEL(br)) {
1642 	r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
1643 	if (r) return r;
1644 	r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1645 	if (r) return r;
1646 	r = add_length(reg, br->nest_level);
1647 	if (r) return r;
1648 
1649 	goto add_bacref_mems;
1650       }
1651       else
1652 #endif
1653       if (br->back_num == 1) {
1654 	n = br->back_static[0];
1655 	if (IS_IGNORECASE(reg->options)) {
1656 	  r = add_opcode(reg, OP_BACKREFN_IC);
1657 	  if (r) return r;
1658 	  r = add_mem_num(reg, n);
1659 	}
1660 	else {
1661 	  switch (n) {
1662 	  case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1663 	  case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1664 	  default:
1665 	    r = add_opcode(reg, OP_BACKREFN);
1666 	    if (r) return r;
1667 	    r = add_mem_num(reg, n);
1668 	    break;
1669 	  }
1670 	}
1671       }
1672       else {
1673 	int i;
1674 	int* p;
1675 
1676         if (IS_IGNORECASE(reg->options)) {
1677           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1678         }
1679         else {
1680           r = add_opcode(reg, OP_BACKREF_MULTI);
1681         }
1682 	if (r) return r;
1683 
1684 #ifdef USE_BACKREF_AT_LEVEL
1685       add_bacref_mems:
1686 #endif
1687 	r = add_length(reg, br->back_num);
1688 	if (r) return r;
1689 	p = BACKREFS_P(br);
1690 	for (i = br->back_num - 1; i >= 0; i--) {
1691 	  r = add_mem_num(reg, p[i]);
1692 	  if (r) return r;
1693 	}
1694       }
1695     }
1696     break;
1697 
1698 #ifdef USE_SUBEXP_CALL
1699   case N_CALL:
1700     r = compile_call(&(NCALL(node)), reg);
1701     break;
1702 #endif
1703 
1704   case N_QUANTIFIER:
1705     r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
1706     break;
1707 
1708   case N_EFFECT:
1709     r = compile_effect_node(&NEFFECT(node), reg);
1710     break;
1711 
1712   case N_ANCHOR:
1713     r = compile_anchor_node(&(NANCHOR(node)), reg);
1714     break;
1715 
1716   default:
1717 #ifdef ONIG_DEBUG
1718     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1719 #endif
1720     break;
1721   }
1722 
1723   return r;
1724 }
1725 
1726 #ifdef USE_NAMED_GROUP
1727 
1728 static int
noname_disable_map(Node ** plink,GroupNumRemap * map,int * counter)1729 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1730 {
1731   int r = 0;
1732   Node* node = *plink;
1733 
1734   switch (NTYPE(node)) {
1735   case N_LIST:
1736   case N_ALT:
1737     do {
1738       r = noname_disable_map(&(NCONS(node).left), map, counter);
1739     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
1740     break;
1741 
1742   case N_QUANTIFIER:
1743     {
1744       Node** ptarget = &(NQUANTIFIER(node).target);
1745       Node*  old = *ptarget;
1746       r = noname_disable_map(ptarget, map, counter);
1747       if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
1748 	onig_reduce_nested_quantifier(node, *ptarget);
1749       }
1750     }
1751     break;
1752 
1753   case N_EFFECT:
1754     {
1755       EffectNode* en = &(NEFFECT(node));
1756       if (en->type == EFFECT_MEMORY) {
1757 	if (IS_EFFECT_NAMED_GROUP(en)) {
1758 	  (*counter)++;
1759 	  map[en->regnum].new_val = *counter;
1760 	  en->regnum = *counter;
1761 	  r = noname_disable_map(&(en->target), map, counter);
1762 	}
1763 	else {
1764 	  *plink = en->target;
1765 	  en->target = NULL_NODE;
1766 	  onig_node_free(node);
1767 	  r = noname_disable_map(plink, map, counter);
1768 	}
1769       }
1770       else
1771 	r = noname_disable_map(&(en->target), map, counter);
1772     }
1773     break;
1774 
1775   default:
1776     break;
1777   }
1778 
1779   return r;
1780 }
1781 
1782 static int
renumber_node_backref(Node * node,GroupNumRemap * map)1783 renumber_node_backref(Node* node, GroupNumRemap* map)
1784 {
1785   int i, pos, n, old_num;
1786   int *backs;
1787   BackrefNode* bn = &(NBACKREF(node));
1788 
1789   if (! IS_BACKREF_NAME_REF(bn))
1790     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1791 
1792   old_num = bn->back_num;
1793   if (IS_NULL(bn->back_dynamic))
1794     backs = bn->back_static;
1795   else
1796     backs = bn->back_dynamic;
1797 
1798   for (i = 0, pos = 0; i < old_num; i++) {
1799     n = map[backs[i]].new_val;
1800     if (n > 0) {
1801       backs[pos] = n;
1802       pos++;
1803     }
1804   }
1805 
1806   bn->back_num = pos;
1807   return 0;
1808 }
1809 
1810 static int
renumber_by_map(Node * node,GroupNumRemap * map)1811 renumber_by_map(Node* node, GroupNumRemap* map)
1812 {
1813   int r = 0;
1814 
1815   switch (NTYPE(node)) {
1816   case N_LIST:
1817   case N_ALT:
1818     do {
1819       r = renumber_by_map(NCONS(node).left, map);
1820     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
1821     break;
1822   case N_QUANTIFIER:
1823     r = renumber_by_map(NQUANTIFIER(node).target, map);
1824     break;
1825   case N_EFFECT:
1826     r = renumber_by_map(NEFFECT(node).target, map);
1827     break;
1828 
1829   case N_BACKREF:
1830     r = renumber_node_backref(node, map);
1831     break;
1832 
1833   default:
1834     break;
1835   }
1836 
1837   return r;
1838 }
1839 
1840 static int
numbered_ref_check(Node * node)1841 numbered_ref_check(Node* node)
1842 {
1843   int r = 0;
1844 
1845   switch (NTYPE(node)) {
1846   case N_LIST:
1847   case N_ALT:
1848     do {
1849       r = numbered_ref_check(NCONS(node).left);
1850     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
1851     break;
1852   case N_QUANTIFIER:
1853     r = numbered_ref_check(NQUANTIFIER(node).target);
1854     break;
1855   case N_EFFECT:
1856     r = numbered_ref_check(NEFFECT(node).target);
1857     break;
1858 
1859   case N_BACKREF:
1860     if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
1861       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1862     break;
1863 
1864   default:
1865     break;
1866   }
1867 
1868   return r;
1869 }
1870 
1871 static int
disable_noname_group_capture(Node ** root,regex_t * reg,ScanEnv * env)1872 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1873 {
1874   int r, i, pos, counter;
1875   BitStatusType loc;
1876   GroupNumRemap* map;
1877 
1878   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
1879   CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
1880   for (i = 1; i <= env->num_mem; i++) {
1881     map[i].new_val = 0;
1882   }
1883   counter = 0;
1884   r = noname_disable_map(root, map, &counter);
1885   if (r != 0) return r;
1886 
1887   r = renumber_by_map(*root, map);
1888   if (r != 0) return r;
1889 
1890   for (i = 1, pos = 1; i <= env->num_mem; i++) {
1891     if (map[i].new_val > 0) {
1892       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1893       pos++;
1894     }
1895   }
1896 
1897   loc = env->capture_history;
1898   BIT_STATUS_CLEAR(env->capture_history);
1899   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1900     if (BIT_STATUS_AT(loc, i)) {
1901       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
1902     }
1903   }
1904 
1905   env->num_mem = env->num_named;
1906   reg->num_mem = env->num_named;
1907 
1908   return onig_renumber_name_table(reg, map);
1909 }
1910 #endif /* USE_NAMED_GROUP */
1911 
1912 #ifdef USE_SUBEXP_CALL
1913 static int
unset_addr_list_fix(UnsetAddrList * uslist,regex_t * reg)1914 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1915 {
1916   int i, offset;
1917   EffectNode* en;
1918   AbsAddrType addr;
1919 
1920   for (i = 0; i < uslist->num; i++) {
1921     en = &(NEFFECT(uslist->us[i].target));
1922     if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1923     addr = en->call_addr;
1924     offset = uslist->us[i].offset;
1925 
1926     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1927   }
1928   return 0;
1929 }
1930 #endif
1931 
1932 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
1933 static int
quantifiers_memory_node_info(Node * node)1934 quantifiers_memory_node_info(Node* node)
1935 {
1936   int r = 0;
1937 
1938   switch (NTYPE(node)) {
1939   case N_LIST:
1940   case N_ALT:
1941     {
1942       int v;
1943       do {
1944 	v = quantifiers_memory_node_info(NCONS(node).left);
1945 	if (v > r) r = v;
1946       } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
1947     }
1948     break;
1949 
1950 #ifdef USE_SUBEXP_CALL
1951   case N_CALL:
1952     if (IS_CALL_RECURSION(&NCALL(node))) {
1953       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
1954     }
1955     else
1956       r = quantifiers_memory_node_info(NCALL(node).target);
1957     break;
1958 #endif
1959 
1960   case N_QUANTIFIER:
1961     {
1962       QuantifierNode* qn = &(NQUANTIFIER(node));
1963       if (qn->upper != 0) {
1964 	r = quantifiers_memory_node_info(qn->target);
1965       }
1966     }
1967     break;
1968 
1969   case N_EFFECT:
1970     {
1971       EffectNode* en = &(NEFFECT(node));
1972       switch (en->type) {
1973       case EFFECT_MEMORY:
1974 	return NQ_TARGET_IS_EMPTY_MEM;
1975 	break;
1976 
1977       case EFFECT_OPTION:
1978       case EFFECT_STOP_BACKTRACK:
1979 	r = quantifiers_memory_node_info(en->target);
1980 	break;
1981       default:
1982 	break;
1983       }
1984     }
1985     break;
1986 
1987   case N_BACKREF:
1988   case N_STRING:
1989   case N_CTYPE:
1990   case N_CCLASS:
1991   case N_ANYCHAR:
1992   case N_ANCHOR:
1993   default:
1994     break;
1995   }
1996 
1997   return r;
1998 }
1999 #endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
2000 
2001 static int
get_min_match_length(Node * node,OnigDistance * min,ScanEnv * env)2002 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2003 {
2004   OnigDistance tmin;
2005   int r = 0;
2006 
2007   *min = 0;
2008   switch (NTYPE(node)) {
2009   case N_BACKREF:
2010     {
2011       int i;
2012       int* backs;
2013       Node** nodes = SCANENV_MEM_NODES(env);
2014       BackrefNode* br = &(NBACKREF(node));
2015       if (br->state & NST_RECURSION) break;
2016 
2017       backs = BACKREFS_P(br);
2018       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2019       r = get_min_match_length(nodes[backs[0]], min, env);
2020       if (r != 0) break;
2021       for (i = 1; i < br->back_num; i++) {
2022 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2023 	r = get_min_match_length(nodes[backs[i]], &tmin, env);
2024 	if (r != 0) break;
2025 	if (*min > tmin) *min = tmin;
2026       }
2027     }
2028     break;
2029 
2030 #ifdef USE_SUBEXP_CALL
2031   case N_CALL:
2032     if (IS_CALL_RECURSION(&NCALL(node))) {
2033       EffectNode* en = &(NEFFECT(NCALL(node).target));
2034       if (IS_EFFECT_MIN_FIXED(en))
2035 	*min = en->min_len;
2036     }
2037     else
2038       r = get_min_match_length(NCALL(node).target, min, env);
2039     break;
2040 #endif
2041 
2042   case N_LIST:
2043     do {
2044       r = get_min_match_length(NCONS(node).left, &tmin, env);
2045       if (r == 0) *min += tmin;
2046     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
2047     break;
2048 
2049   case N_ALT:
2050     {
2051       Node *x, *y;
2052       y = node;
2053       do {
2054 	x = NCONS(y).left;
2055 	r = get_min_match_length(x, &tmin, env);
2056 	if (r != 0) break;
2057 	if (y == node) *min = tmin;
2058 	else if (*min > tmin) *min = tmin;
2059       } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right));
2060     }
2061     break;
2062 
2063   case N_STRING:
2064     {
2065       StrNode* sn = &(NSTRING(node));
2066       *min = sn->end - sn->s;
2067     }
2068     break;
2069 
2070   case N_CTYPE:
2071     switch (NCTYPE(node).type) {
2072     case CTYPE_WORD:     *min = 1; break;
2073     case CTYPE_NOT_WORD: *min = 1; break;
2074     default:
2075       break;
2076     }
2077     break;
2078 
2079   case N_CCLASS:
2080   case N_ANYCHAR:
2081     *min = 1;
2082     break;
2083 
2084   case N_QUANTIFIER:
2085     {
2086       QuantifierNode* qn = &(NQUANTIFIER(node));
2087 
2088       if (qn->lower > 0) {
2089 	r = get_min_match_length(qn->target, min, env);
2090 	if (r == 0)
2091 	  *min = distance_multiply(*min, qn->lower);
2092       }
2093     }
2094     break;
2095 
2096   case N_EFFECT:
2097     {
2098       EffectNode* en = &(NEFFECT(node));
2099       switch (en->type) {
2100       case EFFECT_MEMORY:
2101 #ifdef USE_SUBEXP_CALL
2102 	if (IS_EFFECT_MIN_FIXED(en))
2103 	  *min = en->min_len;
2104 	else {
2105 	  r = get_min_match_length(en->target, min, env);
2106 	  if (r == 0) {
2107 	    en->min_len = *min;
2108 	    SET_EFFECT_STATUS(node, NST_MIN_FIXED);
2109 	  }
2110 	}
2111 	break;
2112 #endif
2113       case EFFECT_OPTION:
2114       case EFFECT_STOP_BACKTRACK:
2115 	r = get_min_match_length(en->target, min, env);
2116 	break;
2117       }
2118     }
2119     break;
2120 
2121   case N_ANCHOR:
2122   default:
2123     break;
2124   }
2125 
2126   return r;
2127 }
2128 
2129 static int
get_max_match_length(Node * node,OnigDistance * max,ScanEnv * env)2130 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2131 {
2132   OnigDistance tmax;
2133   int r = 0;
2134 
2135   *max = 0;
2136   switch (NTYPE(node)) {
2137   case N_LIST:
2138     do {
2139       r = get_max_match_length(NCONS(node).left, &tmax, env);
2140       if (r == 0)
2141 	*max = distance_add(*max, tmax);
2142     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
2143     break;
2144 
2145   case N_ALT:
2146     do {
2147       r = get_max_match_length(NCONS(node).left, &tmax, env);
2148       if (r == 0 && *max < tmax) *max = tmax;
2149     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
2150     break;
2151 
2152   case N_STRING:
2153     {
2154       StrNode* sn = &(NSTRING(node));
2155       *max = sn->end - sn->s;
2156     }
2157     break;
2158 
2159   case N_CTYPE:
2160     switch (NCTYPE(node).type) {
2161     case CTYPE_WORD:
2162     case CTYPE_NOT_WORD:
2163       *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2164       break;
2165 
2166     default:
2167       break;
2168     }
2169     break;
2170 
2171   case N_CCLASS:
2172   case N_ANYCHAR:
2173     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2174     break;
2175 
2176   case N_BACKREF:
2177     {
2178       int i;
2179       int* backs;
2180       Node** nodes = SCANENV_MEM_NODES(env);
2181       BackrefNode* br = &(NBACKREF(node));
2182       if (br->state & NST_RECURSION) {
2183 	*max = ONIG_INFINITE_DISTANCE;
2184 	break;
2185       }
2186       backs = BACKREFS_P(br);
2187       for (i = 0; i < br->back_num; i++) {
2188 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2189 	r = get_max_match_length(nodes[backs[i]], &tmax, env);
2190 	if (r != 0) break;
2191 	if (*max < tmax) *max = tmax;
2192       }
2193     }
2194     break;
2195 
2196 #ifdef USE_SUBEXP_CALL
2197   case N_CALL:
2198     if (! IS_CALL_RECURSION(&(NCALL(node))))
2199       r = get_max_match_length(NCALL(node).target, max, env);
2200     else
2201       *max = ONIG_INFINITE_DISTANCE;
2202     break;
2203 #endif
2204 
2205   case N_QUANTIFIER:
2206     {
2207       QuantifierNode* qn = &(NQUANTIFIER(node));
2208 
2209       if (qn->upper != 0) {
2210 	r = get_max_match_length(qn->target, max, env);
2211 	if (r == 0 && *max != 0) {
2212 	  if (! IS_REPEAT_INFINITE(qn->upper))
2213 	    *max = distance_multiply(*max, qn->upper);
2214 	  else
2215 	    *max = ONIG_INFINITE_DISTANCE;
2216 	}
2217       }
2218     }
2219     break;
2220 
2221   case N_EFFECT:
2222     {
2223       EffectNode* en = &(NEFFECT(node));
2224       switch (en->type) {
2225       case EFFECT_MEMORY:
2226 #ifdef USE_SUBEXP_CALL
2227 	if (IS_EFFECT_MAX_FIXED(en))
2228 	  *max = en->max_len;
2229 	else {
2230 	  r = get_max_match_length(en->target, max, env);
2231 	  if (r == 0) {
2232 	    en->max_len = *max;
2233 	    SET_EFFECT_STATUS(node, NST_MAX_FIXED);
2234 	  }
2235 	}
2236 	break;
2237 #endif
2238       case EFFECT_OPTION:
2239       case EFFECT_STOP_BACKTRACK:
2240 	r = get_max_match_length(en->target, max, env);
2241 	break;
2242       }
2243     }
2244     break;
2245 
2246   case N_ANCHOR:
2247   default:
2248     break;
2249   }
2250 
2251   return r;
2252 }
2253 
2254 #define GET_CHAR_LEN_VARLEN           -1
2255 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2256 
2257 /* fixed size pattern node only */
2258 static int
get_char_length_tree1(Node * node,regex_t * reg,int * len,int level)2259 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2260 {
2261   int tlen;
2262   int r = 0;
2263 
2264   level++;
2265   *len = 0;
2266   switch (NTYPE(node)) {
2267   case N_LIST:
2268     do {
2269       r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
2270       if (r == 0)
2271 	*len = distance_add(*len, tlen);
2272     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
2273     break;
2274 
2275   case N_ALT:
2276     {
2277       int tlen2;
2278       int varlen = 0;
2279 
2280       r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
2281       while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) {
2282 	r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level);
2283 	if (r == 0) {
2284 	  if (tlen != tlen2)
2285 	    varlen = 1;
2286 	}
2287       }
2288       if (r == 0) {
2289 	if (varlen != 0) {
2290 	  if (level == 1)
2291 	    r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2292 	  else
2293 	    r = GET_CHAR_LEN_VARLEN;
2294 	}
2295 	else
2296 	  *len = tlen;
2297       }
2298     }
2299     break;
2300 
2301   case N_STRING:
2302     {
2303       StrNode* sn = &(NSTRING(node));
2304       UChar *s = sn->s;
2305       while (s < sn->end) {
2306 	s += enc_len(reg->enc, s);
2307 	(*len)++;
2308       }
2309     }
2310     break;
2311 
2312   case N_QUANTIFIER:
2313     {
2314       QuantifierNode* qn = &(NQUANTIFIER(node));
2315       if (qn->lower == qn->upper) {
2316 	r = get_char_length_tree1(qn->target, reg, &tlen, level);
2317 	if (r == 0)
2318 	  *len = distance_multiply(tlen, qn->lower);
2319       }
2320       else
2321 	r = GET_CHAR_LEN_VARLEN;
2322     }
2323     break;
2324 
2325 #ifdef USE_SUBEXP_CALL
2326   case N_CALL:
2327     if (! IS_CALL_RECURSION(&(NCALL(node))))
2328       r = get_char_length_tree1(NCALL(node).target, reg, len, level);
2329     else
2330       r = GET_CHAR_LEN_VARLEN;
2331     break;
2332 #endif
2333 
2334   case N_CTYPE:
2335     switch (NCTYPE(node).type) {
2336     case CTYPE_WORD:
2337     case CTYPE_NOT_WORD:
2338       *len = 1;
2339       break;
2340     }
2341     break;
2342 
2343   case N_CCLASS:
2344   case N_ANYCHAR:
2345     *len = 1;
2346     break;
2347 
2348   case N_EFFECT:
2349     {
2350       EffectNode* en = &(NEFFECT(node));
2351       switch (en->type) {
2352       case EFFECT_MEMORY:
2353 #ifdef USE_SUBEXP_CALL
2354 	if (IS_EFFECT_CLEN_FIXED(en))
2355 	  *len = en->char_len;
2356 	else {
2357 	  r = get_char_length_tree1(en->target, reg, len, level);
2358 	  if (r == 0) {
2359 	    en->char_len = *len;
2360 	    SET_EFFECT_STATUS(node, NST_CLEN_FIXED);
2361 	  }
2362 	}
2363 	break;
2364 #endif
2365       case EFFECT_OPTION:
2366       case EFFECT_STOP_BACKTRACK:
2367 	r = get_char_length_tree1(en->target, reg, len, level);
2368 	break;
2369       default:
2370 	break;
2371       }
2372     }
2373     break;
2374 
2375   case N_ANCHOR:
2376     break;
2377 
2378   default:
2379     r = GET_CHAR_LEN_VARLEN;
2380     break;
2381   }
2382 
2383   return r;
2384 }
2385 
2386 static int
get_char_length_tree(Node * node,regex_t * reg,int * len)2387 get_char_length_tree(Node* node, regex_t* reg, int* len)
2388 {
2389   return get_char_length_tree1(node, reg, len, 0);
2390 }
2391 
2392 /* x is not included y ==>  1 : 0 */
2393 static int
is_not_included(Node * x,Node * y,regex_t * reg)2394 is_not_included(Node* x, Node* y, regex_t* reg)
2395 {
2396   int i, len;
2397   OnigCodePoint code;
2398   UChar *p, c;
2399   int ytype;
2400 
2401  retry:
2402   ytype = NTYPE(y);
2403   switch (NTYPE(x)) {
2404   case N_CTYPE:
2405     {
2406       switch (ytype) {
2407       case N_CTYPE:
2408 	switch (NCTYPE(x).type) {
2409 	case CTYPE_WORD:
2410 	  if (NCTYPE(y).type == CTYPE_NOT_WORD)
2411 	    return 1;
2412 	  else
2413 	    return 0;
2414 	  break;
2415 	case CTYPE_NOT_WORD:
2416 	  if (NCTYPE(y).type == CTYPE_WORD)
2417 	    return 1;
2418 	  else
2419 	    return 0;
2420 	  break;
2421 	default:
2422 	  break;
2423 	}
2424 	break;
2425 
2426       case N_CCLASS:
2427       swap:
2428 	{
2429 	  Node* tmp;
2430 	  tmp = x; x = y; y = tmp;
2431 	  goto retry;
2432 	}
2433 	break;
2434 
2435       case N_STRING:
2436 	goto swap;
2437 	break;
2438 
2439       default:
2440 	break;
2441       }
2442     }
2443     break;
2444 
2445   case N_CCLASS:
2446     {
2447       CClassNode* xc = &(NCCLASS(x));
2448       switch (ytype) {
2449       case N_CTYPE:
2450 	switch (NCTYPE(y).type) {
2451 	case CTYPE_WORD:
2452 	  if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
2453 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2454 	      if (BITSET_AT(xc->bs, i)) {
2455 		if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
2456 	      }
2457 	    }
2458 	    return 1;
2459 	  }
2460 	  return 0;
2461 	  break;
2462 	case CTYPE_NOT_WORD:
2463 	  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2464 	    if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
2465 	      if (!IS_CCLASS_NOT(xc)) {
2466 		if (BITSET_AT(xc->bs, i))
2467 		  return 0;
2468 	      }
2469 	      else {
2470 		if (! BITSET_AT(xc->bs, i))
2471 		  return 0;
2472 	      }
2473 	    }
2474 	  }
2475 	  return 1;
2476 	  break;
2477 
2478 	default:
2479 	  break;
2480 	}
2481 	break;
2482 
2483       case N_CCLASS:
2484 	{
2485 	  int v;
2486 	  CClassNode* yc = &(NCCLASS(y));
2487 
2488 	  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2489 	    v = BITSET_AT(xc->bs, i);
2490 	    if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
2491                 (v == 0 && IS_CCLASS_NOT(xc))) {
2492 	      v = BITSET_AT(yc->bs, i);
2493 	      if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
2494                   (v == 0 && IS_CCLASS_NOT(yc)))
2495 		return 0;
2496 	    }
2497 	  }
2498 	  if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
2499 	      (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
2500 	    return 1;
2501 	  return 0;
2502 	}
2503 	break;
2504 
2505       case N_STRING:
2506 	goto swap;
2507 	break;
2508 
2509       default:
2510 	break;
2511       }
2512     }
2513     break;
2514 
2515   case N_STRING:
2516     {
2517       StrNode* xs = &(NSTRING(x));
2518       if (NSTRING_LEN(x) == 0)
2519 	break;
2520 
2521       c = *(xs->s);
2522       switch (ytype) {
2523       case N_CTYPE:
2524 	switch (NCTYPE(y).type) {
2525 	case CTYPE_WORD:
2526 	  return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
2527 	  break;
2528 	case CTYPE_NOT_WORD:
2529 	  return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
2530 	  break;
2531 	default:
2532 	  break;
2533 	}
2534 	break;
2535 
2536       case N_CCLASS:
2537 	{
2538 	  CClassNode* cc = &(NCCLASS(y));
2539 
2540 	  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2541 				     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2542 	  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2543 	}
2544 	break;
2545 
2546       case N_STRING:
2547 	{
2548 	  UChar *q;
2549 	  StrNode* ys = &(NSTRING(y));
2550 	  len = NSTRING_LEN(x);
2551 	  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2552 	  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2553             /* tiny version */
2554             return 0;
2555 	  }
2556 	  else {
2557 	    for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2558 	      if (*p != *q) return 1;
2559 	    }
2560 	  }
2561 	}
2562 	break;
2563 
2564       default:
2565 	break;
2566       }
2567     }
2568     break;
2569 
2570   default:
2571     break;
2572   }
2573 
2574   return 0;
2575 }
2576 
2577 static Node*
get_head_value_node(Node * node,int exact,regex_t * reg)2578 get_head_value_node(Node* node, int exact, regex_t* reg)
2579 {
2580   Node* n = NULL_NODE;
2581 
2582   switch (NTYPE(node)) {
2583   case N_BACKREF:
2584   case N_ALT:
2585   case N_ANYCHAR:
2586 #ifdef USE_SUBEXP_CALL
2587   case N_CALL:
2588 #endif
2589     break;
2590 
2591   case N_CTYPE:
2592   case N_CCLASS:
2593     if (exact == 0) {
2594       n = node;
2595     }
2596     break;
2597 
2598   case N_LIST:
2599     n = get_head_value_node(NCONS(node).left, exact, reg);
2600     break;
2601 
2602   case N_STRING:
2603     {
2604       StrNode* sn = &(NSTRING(node));
2605 
2606       if (sn->end <= sn->s)
2607 	break;
2608 
2609       if (exact != 0 &&
2610 	  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2611 #if 0
2612         UChar* tmp = sn->s;
2613 	if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag,
2614                                        &tmp, sn->end))
2615 	  n = node;
2616 #endif
2617       }
2618       else {
2619 	n = node;
2620       }
2621     }
2622     break;
2623 
2624   case N_QUANTIFIER:
2625     {
2626       QuantifierNode* qn = &(NQUANTIFIER(node));
2627       if (qn->lower > 0) {
2628 	if (IS_NOT_NULL(qn->head_exact))
2629 	  n = qn->head_exact;
2630 	else
2631 	  n = get_head_value_node(qn->target, exact, reg);
2632       }
2633     }
2634     break;
2635 
2636   case N_EFFECT:
2637     {
2638       EffectNode* en = &(NEFFECT(node));
2639       switch (en->type) {
2640       case EFFECT_OPTION:
2641 	{
2642 	  OnigOptionType options = reg->options;
2643 
2644 	  reg->options = NEFFECT(node).option;
2645 	  n = get_head_value_node(NEFFECT(node).target, exact, reg);
2646 	  reg->options = options;
2647 	}
2648 	break;
2649 
2650       case EFFECT_MEMORY:
2651       case EFFECT_STOP_BACKTRACK:
2652 	n = get_head_value_node(en->target, exact, reg);
2653 	break;
2654       }
2655     }
2656     break;
2657 
2658   case N_ANCHOR:
2659     if (NANCHOR(node).type == ANCHOR_PREC_READ)
2660       n = get_head_value_node(NANCHOR(node).target, exact, reg);
2661     break;
2662 
2663   default:
2664     break;
2665   }
2666 
2667   return n;
2668 }
2669 
2670 static int
check_type_tree(Node * node,int type_mask,int effect_mask,int anchor_mask)2671 check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
2672 {
2673   int type, r = 0;
2674 
2675   type = NTYPE(node);
2676   if ((type & type_mask) == 0)
2677     return 1;
2678 
2679   switch (type) {
2680   case N_LIST:
2681   case N_ALT:
2682     do {
2683       r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask);
2684     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
2685     break;
2686 
2687   case N_QUANTIFIER:
2688     r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
2689 			anchor_mask);
2690     break;
2691 
2692   case N_EFFECT:
2693     {
2694       EffectNode* en = &(NEFFECT(node));
2695       if ((en->type & effect_mask) == 0)
2696 	return 1;
2697 
2698       r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask);
2699     }
2700     break;
2701 
2702   case N_ANCHOR:
2703     type = NANCHOR(node).type;
2704     if ((type & anchor_mask) == 0)
2705       return 1;
2706 
2707     if (NANCHOR(node).target)
2708       r = check_type_tree(NANCHOR(node).target,
2709 			  type_mask, effect_mask, anchor_mask);
2710     break;
2711 
2712   default:
2713     break;
2714   }
2715   return r;
2716 }
2717 
2718 #ifdef USE_SUBEXP_CALL
2719 
2720 #define RECURSION_EXIST       1
2721 #define RECURSION_INFINITE    2
2722 
2723 static int
subexp_inf_recursive_check(Node * node,ScanEnv * env,int head)2724 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2725 {
2726   int type;
2727   int r = 0;
2728 
2729   type = NTYPE(node);
2730   switch (type) {
2731   case N_LIST:
2732     {
2733       Node *x;
2734       OnigDistance min;
2735       int ret;
2736 
2737       x = node;
2738       do {
2739 	ret = subexp_inf_recursive_check(NCONS(x).left, env, head);
2740 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2741 	r |= ret;
2742 	if (head) {
2743 	  ret = get_min_match_length(NCONS(x).left, &min, env);
2744 	  if (ret != 0) return ret;
2745 	  if (min != 0) head = 0;
2746 	}
2747       } while (IS_NOT_NULL(x = NCONS(x).right));
2748     }
2749     break;
2750 
2751   case N_ALT:
2752     {
2753       int ret;
2754       r = RECURSION_EXIST;
2755       do {
2756 	ret = subexp_inf_recursive_check(NCONS(node).left, env, head);
2757 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2758 	r &= ret;
2759       } while (IS_NOT_NULL(node = NCONS(node).right));
2760     }
2761     break;
2762 
2763   case N_QUANTIFIER:
2764     r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
2765     if (r == RECURSION_EXIST) {
2766       if (NQUANTIFIER(node).lower == 0) r = 0;
2767     }
2768     break;
2769 
2770   case N_ANCHOR:
2771     {
2772       AnchorNode* an = &(NANCHOR(node));
2773       switch (an->type) {
2774       case ANCHOR_PREC_READ:
2775       case ANCHOR_PREC_READ_NOT:
2776       case ANCHOR_LOOK_BEHIND:
2777       case ANCHOR_LOOK_BEHIND_NOT:
2778 	r = subexp_inf_recursive_check(an->target, env, head);
2779 	break;
2780       }
2781     }
2782     break;
2783 
2784   case N_CALL:
2785     r = subexp_inf_recursive_check(NCALL(node).target, env, head);
2786     break;
2787 
2788   case N_EFFECT:
2789     if (IS_EFFECT_MARK2(&(NEFFECT(node))))
2790       return 0;
2791     else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
2792       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2793     else {
2794       SET_EFFECT_STATUS(node, NST_MARK2);
2795       r = subexp_inf_recursive_check(NEFFECT(node).target, env, head);
2796       CLEAR_EFFECT_STATUS(node, NST_MARK2);
2797     }
2798     break;
2799 
2800   default:
2801     break;
2802   }
2803 
2804   return r;
2805 }
2806 
2807 static int
subexp_inf_recursive_check_trav(Node * node,ScanEnv * env)2808 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2809 {
2810   int type;
2811   int r = 0;
2812 
2813   type = NTYPE(node);
2814   switch (type) {
2815   case N_LIST:
2816   case N_ALT:
2817     do {
2818       r = subexp_inf_recursive_check_trav(NCONS(node).left, env);
2819     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
2820     break;
2821 
2822   case N_QUANTIFIER:
2823     r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
2824     break;
2825 
2826   case N_ANCHOR:
2827     {
2828       AnchorNode* an = &(NANCHOR(node));
2829       switch (an->type) {
2830       case ANCHOR_PREC_READ:
2831       case ANCHOR_PREC_READ_NOT:
2832       case ANCHOR_LOOK_BEHIND:
2833       case ANCHOR_LOOK_BEHIND_NOT:
2834 	r = subexp_inf_recursive_check_trav(an->target, env);
2835 	break;
2836       }
2837     }
2838     break;
2839 
2840   case N_EFFECT:
2841     {
2842       EffectNode* en = &(NEFFECT(node));
2843 
2844       if (IS_EFFECT_RECURSION(en)) {
2845 	SET_EFFECT_STATUS(node, NST_MARK1);
2846 	r = subexp_inf_recursive_check(en->target, env, 1);
2847 	if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2848 	CLEAR_EFFECT_STATUS(node, NST_MARK1);
2849       }
2850       r = subexp_inf_recursive_check_trav(en->target, env);
2851     }
2852 
2853     break;
2854 
2855   default:
2856     break;
2857   }
2858 
2859   return r;
2860 }
2861 
2862 static int
subexp_recursive_check(Node * node)2863 subexp_recursive_check(Node* node)
2864 {
2865   int type;
2866   int r = 0;
2867 
2868   type = NTYPE(node);
2869   switch (type) {
2870   case N_LIST:
2871   case N_ALT:
2872     do {
2873       r |= subexp_recursive_check(NCONS(node).left);
2874     } while (IS_NOT_NULL(node = NCONS(node).right));
2875     break;
2876 
2877   case N_QUANTIFIER:
2878     r = subexp_recursive_check(NQUANTIFIER(node).target);
2879     break;
2880 
2881   case N_ANCHOR:
2882     {
2883       AnchorNode* an = &(NANCHOR(node));
2884       switch (an->type) {
2885       case ANCHOR_PREC_READ:
2886       case ANCHOR_PREC_READ_NOT:
2887       case ANCHOR_LOOK_BEHIND:
2888       case ANCHOR_LOOK_BEHIND_NOT:
2889 	r = subexp_recursive_check(an->target);
2890 	break;
2891       }
2892     }
2893     break;
2894 
2895   case N_CALL:
2896     r = subexp_recursive_check(NCALL(node).target);
2897     if (r != 0) SET_CALL_RECURSION(node);
2898     break;
2899 
2900   case N_EFFECT:
2901     if (IS_EFFECT_MARK2(&(NEFFECT(node))))
2902       return 0;
2903     else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
2904       return 1; /* recursion */
2905     else {
2906       SET_EFFECT_STATUS(node, NST_MARK2);
2907       r = subexp_recursive_check(NEFFECT(node).target);
2908       CLEAR_EFFECT_STATUS(node, NST_MARK2);
2909     }
2910     break;
2911 
2912   default:
2913     break;
2914   }
2915 
2916   return r;
2917 }
2918 
2919 
2920 static int
subexp_recursive_check_trav(Node * node,ScanEnv * env)2921 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2922 {
2923 #define FOUND_CALLED_NODE    1
2924 
2925   int type;
2926   int r = 0;
2927 
2928   type = NTYPE(node);
2929   switch (type) {
2930   case N_LIST:
2931   case N_ALT:
2932     {
2933       int ret;
2934       do {
2935 	ret = subexp_recursive_check_trav(NCONS(node).left, env);
2936 	if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2937 	else if (ret < 0) return ret;
2938       } while (IS_NOT_NULL(node = NCONS(node).right));
2939     }
2940     break;
2941 
2942   case N_QUANTIFIER:
2943     r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
2944     if (NQUANTIFIER(node).upper == 0) {
2945       if (r == FOUND_CALLED_NODE)
2946 	NQUANTIFIER(node).is_refered = 1;
2947     }
2948     break;
2949 
2950   case N_ANCHOR:
2951     {
2952       AnchorNode* an = &(NANCHOR(node));
2953       switch (an->type) {
2954       case ANCHOR_PREC_READ:
2955       case ANCHOR_PREC_READ_NOT:
2956       case ANCHOR_LOOK_BEHIND:
2957       case ANCHOR_LOOK_BEHIND_NOT:
2958 	r = subexp_recursive_check_trav(an->target, env);
2959 	break;
2960       }
2961     }
2962     break;
2963 
2964   case N_EFFECT:
2965     {
2966       EffectNode* en = &(NEFFECT(node));
2967 
2968       if (! IS_EFFECT_RECURSION(en)) {
2969 	if (IS_EFFECT_CALLED(en)) {
2970 	  SET_EFFECT_STATUS(node, NST_MARK1);
2971 	  r = subexp_recursive_check(en->target);
2972 	  if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION);
2973 	  CLEAR_EFFECT_STATUS(node, NST_MARK1);
2974 	}
2975       }
2976       r = subexp_recursive_check_trav(en->target, env);
2977       if (IS_EFFECT_CALLED(en))
2978 	r |= FOUND_CALLED_NODE;
2979     }
2980     break;
2981 
2982   default:
2983     break;
2984   }
2985 
2986   return r;
2987 }
2988 
2989 static int
setup_subexp_call(Node * node,ScanEnv * env)2990 setup_subexp_call(Node* node, ScanEnv* env)
2991 {
2992   int type;
2993   int r = 0;
2994 
2995   type = NTYPE(node);
2996   switch (type) {
2997   case N_LIST:
2998     do {
2999       r = setup_subexp_call(NCONS(node).left, env);
3000     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
3001     break;
3002 
3003   case N_ALT:
3004     do {
3005       r = setup_subexp_call(NCONS(node).left, env);
3006     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
3007     break;
3008 
3009   case N_QUANTIFIER:
3010     r = setup_subexp_call(NQUANTIFIER(node).target, env);
3011     break;
3012   case N_EFFECT:
3013     r = setup_subexp_call(NEFFECT(node).target, env);
3014     break;
3015 
3016   case N_CALL:
3017     {
3018       int n, num, *refs;
3019       UChar *p;
3020       CallNode* cn = &(NCALL(node));
3021       Node** nodes = SCANENV_MEM_NODES(env);
3022 
3023 #ifdef USE_NAMED_GROUP
3024       n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
3025 #else
3026       n = -1;
3027 #endif
3028       if (n <= 0) {
3029 	/* name not found, check group number. (?*ddd) */
3030 	p = cn->name;
3031 	num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
3032 	if (num <= 0 || p != cn->name_end) {
3033 	  onig_scan_env_set_error_string(env,
3034 		  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3035 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3036 	}
3037 #ifdef USE_NAMED_GROUP
3038 	if (env->num_named > 0 &&
3039 	    IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3040 	    !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3041 	  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3042 	}
3043 #endif
3044 	if (num > env->num_mem) {
3045 	  onig_scan_env_set_error_string(env,
3046 		 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3047 	  return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3048 	}
3049 	cn->ref_num = num;
3050 	goto set_call_attr;
3051       }
3052       else if (n > 1) {
3053 	onig_scan_env_set_error_string(env,
3054 	       ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3055 	return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3056       }
3057       else {
3058 	cn->ref_num = refs[0];
3059       set_call_attr:
3060 	cn->target = nodes[cn->ref_num];
3061 	if (IS_NULL(cn->target)) {
3062 	  onig_scan_env_set_error_string(env,
3063 		  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3064 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3065 	}
3066 	SET_EFFECT_STATUS(cn->target, NST_CALLED);
3067 	BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
3068 	cn->unset_addr_list = env->unset_addr_list;
3069       }
3070     }
3071     break;
3072 
3073   case N_ANCHOR:
3074     {
3075       AnchorNode* an = &(NANCHOR(node));
3076 
3077       switch (an->type) {
3078       case ANCHOR_PREC_READ:
3079       case ANCHOR_PREC_READ_NOT:
3080       case ANCHOR_LOOK_BEHIND:
3081       case ANCHOR_LOOK_BEHIND_NOT:
3082 	r = setup_subexp_call(an->target, env);
3083 	break;
3084       }
3085     }
3086     break;
3087 
3088   default:
3089     break;
3090   }
3091 
3092   return r;
3093 }
3094 #endif
3095 
3096 /* divide different length alternatives in look-behind.
3097   (?<=A|B) ==> (?<=A)|(?<=B)
3098   (?<!A|B) ==> (?<!A)(?<!B)
3099 */
3100 static int
divide_look_behind_alternatives(Node * node)3101 divide_look_behind_alternatives(Node* node)
3102 {
3103   Node tmp_node;
3104   Node *head, *np, *insert_node;
3105   AnchorNode* an = &(NANCHOR(node));
3106   int anc_type = an->type;
3107 
3108   head = an->target;
3109   np = NCONS(head).left;
3110   tmp_node = *node; *node = *head; *head = tmp_node;
3111   NCONS(node).left = head;
3112   NANCHOR(head).target = np;
3113 
3114   np = node;
3115   while ((np = NCONS(np).right) != NULL_NODE) {
3116     insert_node = onig_node_new_anchor(anc_type);
3117     CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
3118     NANCHOR(insert_node).target = NCONS(np).left;
3119     NCONS(np).left = insert_node;
3120   }
3121 
3122   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3123     np = node;
3124     do {
3125       np->type = N_LIST;  /* alt -> list */
3126     } while ((np = NCONS(np).right) != NULL_NODE);
3127   }
3128   return 0;
3129 }
3130 
3131 static int
setup_look_behind(Node * node,regex_t * reg,ScanEnv * env)3132 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3133 {
3134   int r, len;
3135   AnchorNode* an = &(NANCHOR(node));
3136 
3137   r = get_char_length_tree(an->target, reg, &len);
3138   if (r == 0)
3139     an->char_len = len;
3140   else if (r == GET_CHAR_LEN_VARLEN)
3141     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3142   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3143     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3144       r = divide_look_behind_alternatives(node);
3145     else
3146       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3147   }
3148 
3149   return r;
3150 }
3151 
3152 static int
next_setup(Node * node,Node * next_node,regex_t * reg)3153 next_setup(Node* node, Node* next_node, regex_t* reg)
3154 {
3155   int type;
3156 
3157  retry:
3158   type = NTYPE(node);
3159   if (type == N_QUANTIFIER) {
3160     QuantifierNode* qn = &(NQUANTIFIER(node));
3161     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3162 #ifdef USE_QUANTIFIER_PEEK_NEXT
3163       qn->next_head_exact = get_head_value_node(next_node, 1, reg);
3164 #endif
3165       /* automatic posseivation a*b ==> (?>a*)b */
3166       if (qn->lower <= 1) {
3167 	int ttype = NTYPE(qn->target);
3168 	if (IS_NODE_TYPE_SIMPLE(ttype)) {
3169 	  Node *x, *y;
3170 	  x = get_head_value_node(qn->target, 0, reg);
3171 	  if (IS_NOT_NULL(x)) {
3172 	    y = get_head_value_node(next_node,  0, reg);
3173 	    if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3174 	      Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
3175 	      CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
3176 	      SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3177 	      swap_node(node, en);
3178 	      NEFFECT(node).target = en;
3179 	    }
3180 	  }
3181 	}
3182       }
3183     }
3184   }
3185   else if (type == N_EFFECT) {
3186     EffectNode* en = &(NEFFECT(node));
3187     if (en->type == EFFECT_MEMORY) {
3188       node = en->target;
3189       goto retry;
3190     }
3191   }
3192   return 0;
3193 }
3194 
3195 
3196 static int
divide_ambig_string_node_sub(regex_t * reg,int prev_ambig,UChar * prev_start,UChar * prev,UChar * end,Node *** tailp,Node ** root)3197 divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
3198                              UChar* prev_start, UChar* prev,
3199                              UChar* end, Node*** tailp, Node** root)
3200 {
3201   UChar *tmp, *wp;
3202   Node* snode;
3203 
3204   if (prev_ambig != 0) {
3205     tmp = prev_start;
3206     wp  = prev_start;
3207     while (tmp < prev) {
3208       wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
3209                                      &tmp, end, wp);
3210     }
3211     snode = onig_node_new_str(prev_start, wp);
3212     CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
3213     NSTRING_SET_AMBIG(snode);
3214     if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
3215   }
3216   else {
3217     snode = onig_node_new_str(prev_start, prev);
3218     CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
3219   }
3220 
3221   if (*tailp == (Node** )0) {
3222     *root = onig_node_new_list(snode, NULL);
3223     CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
3224     *tailp = &(NCONS(*root).right);
3225   }
3226   else {
3227     **tailp = onig_node_new_list(snode, NULL);
3228     CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
3229     *tailp = &(NCONS(**tailp).right);
3230   }
3231 
3232   return 0;
3233 }
3234 
3235 static int
divide_ambig_string_node(Node * node,regex_t * reg)3236 divide_ambig_string_node(Node* node, regex_t* reg)
3237 {
3238   StrNode* sn = &NSTRING(node);
3239   int ambig, prev_ambig;
3240   UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
3241   Node *root = NULL_NODE;
3242   Node **tailp = (Node** )0;
3243   int r;
3244 
3245   start = prev_start = p = sn->s;
3246   end  = sn->end;
3247   if (p >= end) return 0;
3248 
3249   prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end);
3250 
3251   while (p < end) {
3252     prev = p;
3253     if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
3254                                               reg->ambig_flag, &p, end))) {
3255 
3256       r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
3257                                        end, &tailp, &root);
3258       if (r != 0) return r;
3259 
3260       prev_ambig = ambig;
3261       prev_start = prev;
3262     }
3263   }
3264 
3265   if (prev_start == start) {
3266     if (prev_ambig != 0) {
3267       NSTRING_SET_AMBIG(node);
3268       tmp = start;
3269       wp  = start;
3270       while (tmp < end) {
3271         wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
3272                                        &tmp, end, wp);
3273       }
3274       if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node);
3275       sn->end = wp;
3276     }
3277   }
3278   else {
3279     r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
3280                                      end, &tailp, &root);
3281     if (r != 0) return r;
3282 
3283     swap_node(node, root);
3284     onig_node_str_clear(root); /* should be after swap! */
3285     onig_node_free(root);      /* free original string node */
3286   }
3287 
3288   return 0;
3289 }
3290 
3291 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3292 
3293 #define CEC_THRES_NUM_BIG_REPEAT         512
3294 #define CEC_INFINITE_NUM          0x7fffffff
3295 
3296 #define CEC_IN_INFINITE_REPEAT    (1<<0)
3297 #define CEC_IN_FINITE_REPEAT      (1<<1)
3298 #define CEC_CONT_BIG_REPEAT       (1<<2)
3299 
3300 static int
setup_comb_exp_check(Node * node,int state,ScanEnv * env)3301 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3302 {
3303   int type;
3304   int r = state;
3305 
3306   type = NTYPE(node);
3307   switch (type) {
3308   case N_LIST:
3309     {
3310       Node* prev = NULL_NODE;
3311       do {
3312 	r = setup_comb_exp_check(NCONS(node).left, r, env);
3313 	prev = NCONS(node).left;
3314       } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
3315     }
3316     break;
3317 
3318   case N_ALT:
3319     {
3320       int ret;
3321       do {
3322 	ret = setup_comb_exp_check(NCONS(node).left, state, env);
3323 	r |= ret;
3324       } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
3325     }
3326     break;
3327 
3328   case N_QUANTIFIER:
3329     {
3330       int child_state = state;
3331       int add_state = 0;
3332       QuantifierNode* qn = &(NQUANTIFIER(node));
3333       Node* target = qn->target;
3334       int var_num;
3335 
3336       if (! IS_REPEAT_INFINITE(qn->upper)) {
3337 	if (qn->upper > 1) {
3338 	  /* {0,1}, {1,1} are allowed */
3339 	  child_state |= CEC_IN_FINITE_REPEAT;
3340 
3341 	  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3342 	  if (env->backrefed_mem == 0) {
3343 	    if (NTYPE(qn->target) == N_EFFECT) {
3344 	      EffectNode* en = &(NEFFECT(qn->target));
3345 	      if (en->type == EFFECT_MEMORY) {
3346 		if (NTYPE(en->target) == N_QUANTIFIER) {
3347 		  QuantifierNode* q = &(NQUANTIFIER(en->target));
3348 		  if (IS_REPEAT_INFINITE(q->upper)
3349 		      && q->greedy == qn->greedy) {
3350 		    qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3351 		    if (qn->upper == 1)
3352 		      child_state = state;
3353 		  }
3354 		}
3355 	      }
3356 	    }
3357 	  }
3358 	}
3359       }
3360 
3361       if (state & CEC_IN_FINITE_REPEAT) {
3362 	qn->comb_exp_check_num = -1;
3363       }
3364       else {
3365 	if (IS_REPEAT_INFINITE(qn->upper)) {
3366 	  var_num = CEC_INFINITE_NUM;
3367 	  child_state |= CEC_IN_INFINITE_REPEAT;
3368 	}
3369 	else {
3370 	  var_num = qn->upper - qn->lower;
3371 	}
3372 
3373 	if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3374 	  add_state |= CEC_CONT_BIG_REPEAT;
3375 
3376 	if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3377 	    ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3378 	     var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3379 	  if (qn->comb_exp_check_num == 0) {
3380 	    env->num_comb_exp_check++;
3381 	    qn->comb_exp_check_num = env->num_comb_exp_check;
3382 	    if (env->curr_max_regnum > env->comb_exp_max_regnum)
3383 	      env->comb_exp_max_regnum = env->curr_max_regnum;
3384 	  }
3385 	}
3386       }
3387 
3388       r = setup_comb_exp_check(target, child_state, env);
3389       r |= add_state;
3390     }
3391     break;
3392 
3393   case N_EFFECT:
3394     {
3395       EffectNode* en = &(NEFFECT(node));
3396 
3397       switch (en->type) {
3398       case EFFECT_MEMORY:
3399 	{
3400 	  if (env->curr_max_regnum < en->regnum)
3401 	    env->curr_max_regnum = en->regnum;
3402 
3403 	  r = setup_comb_exp_check(en->target, state, env);
3404 	}
3405 	break;
3406 
3407       default:
3408 	r = setup_comb_exp_check(en->target, state, env);
3409 	break;
3410       }
3411     }
3412     break;
3413 
3414 #ifdef USE_SUBEXP_CALL
3415   case N_CALL:
3416     if (IS_CALL_RECURSION(&(NCALL(node))))
3417       env->has_recursion = 1;
3418     else
3419       r = setup_comb_exp_check(NCALL(node).target, state, env);
3420     break;
3421 #endif
3422 
3423   default:
3424     break;
3425   }
3426 
3427   return r;
3428 }
3429 #endif
3430 
3431 #define IN_ALT        (1<<0)
3432 #define IN_NOT        (1<<1)
3433 #define IN_REPEAT     (1<<2)
3434 #define IN_VAR_REPEAT (1<<3)
3435 
3436 /* setup_tree does the following work.
3437  1. check empty loop. (set qn->target_empty_info)
3438  2. expand ignore-case in char class.
3439  3. set memory status bit flags. (reg->mem_stats)
3440  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3441  5. find invalid patterns in look-behind.
3442  6. expand repeated string.
3443  */
3444 static int
setup_tree(Node * node,regex_t * reg,int state,ScanEnv * env)3445 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3446 {
3447   int type;
3448   int r = 0;
3449 
3450   type = NTYPE(node);
3451   switch (type) {
3452   case N_LIST:
3453     {
3454       Node* prev = NULL_NODE;
3455       do {
3456 	r = setup_tree(NCONS(node).left, reg, state, env);
3457 	if (IS_NOT_NULL(prev) && r == 0) {
3458 	  r = next_setup(prev, NCONS(node).left, reg);
3459 	}
3460 	prev = NCONS(node).left;
3461       } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
3462     }
3463     break;
3464 
3465   case N_ALT:
3466     do {
3467       r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env);
3468     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
3469     break;
3470 
3471   case N_CCLASS:
3472     break;
3473 
3474   case N_STRING:
3475     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3476       r = divide_ambig_string_node(node, reg);
3477     }
3478     break;
3479 
3480   case N_CTYPE:
3481   case N_ANYCHAR:
3482     break;
3483 
3484 #ifdef USE_SUBEXP_CALL
3485   case N_CALL:
3486     break;
3487 #endif
3488 
3489   case N_BACKREF:
3490     {
3491       int i;
3492       int* p;
3493       Node** nodes = SCANENV_MEM_NODES(env);
3494       BackrefNode* br = &(NBACKREF(node));
3495       p = BACKREFS_P(br);
3496       for (i = 0; i < br->back_num; i++) {
3497 	if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3498 	BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3499 	BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3500 #ifdef USE_BACKREF_AT_LEVEL
3501 	if (IS_BACKREF_NEST_LEVEL(br)) {
3502 	  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3503 	}
3504 #endif
3505 	SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3506       }
3507     }
3508     break;
3509 
3510   case N_QUANTIFIER:
3511     {
3512       OnigDistance d;
3513       QuantifierNode* qn = &(NQUANTIFIER(node));
3514       Node* target = qn->target;
3515 
3516       if ((state & IN_REPEAT) != 0) {
3517         qn->state |= NST_IN_REPEAT;
3518       }
3519 
3520       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3521 	r = get_min_match_length(target, &d, env);
3522 	if (r) break;
3523 	if (d == 0) {
3524 	  qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3525 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
3526 	  r = quantifiers_memory_node_info(target);
3527 	  if (r < 0) break;
3528 	  if (r > 0) {
3529 	    qn->target_empty_info = r;
3530 	  }
3531 #endif
3532 #if 0
3533 	  r = get_max_match_length(target, &d, env);
3534 	  if (r == 0 && d == 0) {
3535 	    /*  ()* ==> ()?, ()+ ==> ()  */
3536 	    qn->upper = 1;
3537 	    if (qn->lower > 1) qn->lower = 1;
3538 	    if (NTYPE(target) == N_STRING) {
3539 	      qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3540 	    }
3541 	  }
3542 #endif
3543 	}
3544       }
3545 
3546       state |= IN_REPEAT;
3547       if (qn->lower != qn->upper)
3548 	state |= IN_VAR_REPEAT;
3549       r = setup_tree(target, reg, state, env);
3550       if (r) break;
3551 
3552       /* expand string */
3553 #define EXPAND_STRING_MAX_LENGTH  100
3554       if (NTYPE(target) == N_STRING) {
3555 	if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3556 	    qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3557 	  int len = NSTRING_LEN(target);
3558 	  StrNode* sn = &(NSTRING(target));
3559 
3560 	  if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3561 	    int i, n = qn->lower;
3562 	    onig_node_conv_to_str_node(node, NSTRING(target).flag);
3563 	    for (i = 0; i < n; i++) {
3564 	      r = onig_node_str_cat(node, sn->s, sn->end);
3565 	      if (r) break;
3566 	    }
3567 	    onig_node_free(target);
3568 	    break; /* break case N_QUANTIFIER: */
3569 	  }
3570 	}
3571       }
3572 
3573 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3574       if (qn->greedy && (qn->target_empty_info != 0)) {
3575 	if (NTYPE(target) == N_QUANTIFIER) {
3576 	  QuantifierNode* tqn = &(NQUANTIFIER(target));
3577 	  if (IS_NOT_NULL(tqn->head_exact)) {
3578 	    qn->head_exact  = tqn->head_exact;
3579 	    tqn->head_exact = NULL;
3580 	  }
3581 	}
3582 	else {
3583 	  qn->head_exact = get_head_value_node(qn->target, 1, reg);
3584 	}
3585       }
3586 #endif
3587     }
3588     break;
3589 
3590   case N_EFFECT:
3591     {
3592       EffectNode* en = &(NEFFECT(node));
3593 
3594       switch (en->type) {
3595       case EFFECT_OPTION:
3596 	{
3597 	  OnigOptionType options = reg->options;
3598 	  reg->options = NEFFECT(node).option;
3599 	  r = setup_tree(NEFFECT(node).target, reg, state, env);
3600 	  reg->options = options;
3601 	}
3602 	break;
3603 
3604       case EFFECT_MEMORY:
3605 	if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3606 	  BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
3607 	  /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
3608 	}
3609         r = setup_tree(en->target, reg, state, env);
3610         break;
3611 
3612       case EFFECT_STOP_BACKTRACK:
3613 	{
3614 	  Node* target = en->target;
3615 	  r = setup_tree(target, reg, state, env);
3616 	  if (NTYPE(target) == N_QUANTIFIER) {
3617 	    QuantifierNode* tqn = &(NQUANTIFIER(target));
3618 	    if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3619 		tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
3620 	      int qtype = NTYPE(tqn->target);
3621 	      if (IS_NODE_TYPE_SIMPLE(qtype))
3622 		SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
3623 	    }
3624 	  }
3625 	}
3626 	break;
3627       }
3628     }
3629     break;
3630 
3631   case N_ANCHOR:
3632     {
3633       AnchorNode* an = &(NANCHOR(node));
3634 
3635       switch (an->type) {
3636       case ANCHOR_PREC_READ:
3637 	r = setup_tree(an->target, reg, state, env);
3638 	break;
3639       case ANCHOR_PREC_READ_NOT:
3640 	r = setup_tree(an->target, reg, (state | IN_NOT), env);
3641 	break;
3642 
3643 /* allowed node types in look-behind */
3644 #define ALLOWED_TYPE_IN_LB  \
3645   ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
3646     N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
3647 
3648 #define ALLOWED_EFFECT_IN_LB       ( EFFECT_MEMORY )
3649 #define ALLOWED_EFFECT_IN_LB_NOT   0
3650 
3651 #define ALLOWED_ANCHOR_IN_LB \
3652 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3653 #define ALLOWED_ANCHOR_IN_LB_NOT \
3654 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3655 
3656       case ANCHOR_LOOK_BEHIND:
3657 	{
3658 	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3659 			      ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
3660 	  if (r < 0) return r;
3661 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3662 	  r = setup_look_behind(node, reg, env);
3663 	  if (r != 0) return r;
3664 	  r = setup_tree(an->target, reg, state, env);
3665 	}
3666 	break;
3667 
3668       case ANCHOR_LOOK_BEHIND_NOT:
3669 	{
3670 	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3671 		      ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
3672 	  if (r < 0) return r;
3673 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3674 	  r = setup_look_behind(node, reg, env);
3675 	  if (r != 0) return r;
3676 	  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3677 	}
3678 	break;
3679       }
3680     }
3681     break;
3682 
3683   default:
3684     break;
3685   }
3686 
3687   return r;
3688 }
3689 
3690 /* set skip map for Boyer-Moor search */
3691 static int
set_bm_skip(UChar * s,UChar * end,OnigEncoding enc,UChar skip[],int ** int_skip)3692 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc,
3693 	    UChar skip[], int** int_skip)
3694 {
3695   int i, len;
3696 
3697   len = end - s;
3698   if (len < ONIG_CHAR_TABLE_SIZE) {
3699     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
3700 
3701     for (i = 0; i < len - 1; i++)
3702       skip[s[i]] = len - 1 - i;
3703   }
3704   else {
3705     if (IS_NULL(*int_skip)) {
3706       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3707       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3708     }
3709     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
3710 
3711     for (i = 0; i < len - 1; i++)
3712       (*int_skip)[s[i]] = len - 1 - i;
3713   }
3714   return 0;
3715 }
3716 
3717 #define OPT_EXACT_MAXLEN   24
3718 
3719 typedef struct {
3720   OnigDistance min;  /* min byte length */
3721   OnigDistance max;  /* max byte length */
3722 } MinMaxLen;
3723 
3724 typedef struct {
3725   MinMaxLen       mmd;
3726   OnigEncoding    enc;
3727   OnigOptionType  options;
3728   OnigAmbigType   ambig_flag;
3729   ScanEnv*        scan_env;
3730 } OptEnv;
3731 
3732 typedef struct {
3733   int left_anchor;
3734   int right_anchor;
3735 } OptAncInfo;
3736 
3737 typedef struct {
3738   MinMaxLen  mmd; /* info position */
3739   OptAncInfo anc;
3740 
3741   int   reach_end;
3742   int   ignore_case;
3743   int   len;
3744   UChar s[OPT_EXACT_MAXLEN];
3745 } OptExactInfo;
3746 
3747 typedef struct {
3748   MinMaxLen mmd; /* info position */
3749   OptAncInfo anc;
3750 
3751   int   value;      /* weighted value */
3752   UChar map[ONIG_CHAR_TABLE_SIZE];
3753 } OptMapInfo;
3754 
3755 typedef struct {
3756   MinMaxLen    len;
3757 
3758   OptAncInfo   anc;
3759   OptExactInfo exb;    /* boundary */
3760   OptExactInfo exm;    /* middle */
3761   OptExactInfo expr;   /* prec read (?=...) */
3762 
3763   OptMapInfo   map;   /* boundary */
3764 } NodeOptInfo;
3765 
3766 
3767 static int
map_position_value(OnigEncoding enc,int i)3768 map_position_value(OnigEncoding enc, int i)
3769 {
3770   static const short int ByteValTable[] = {
3771      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
3772      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
3773     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
3774      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
3775      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
3776      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
3777      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
3778      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
3779   };
3780 
3781   if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
3782     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
3783       return 20;
3784     else
3785       return (int )ByteValTable[i];
3786   }
3787   else
3788     return 4;   /* Take it easy. */
3789 }
3790 
3791 static int
distance_value(MinMaxLen * mm)3792 distance_value(MinMaxLen* mm)
3793 {
3794   /* 1000 / (min-max-dist + 1) */
3795   static const short int dist_vals[] = {
3796     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
3797       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
3798       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
3799       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
3800       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
3801       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
3802       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
3803       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
3804       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
3805       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
3806   };
3807 
3808   int d;
3809 
3810   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
3811 
3812   d = mm->max - mm->min;
3813   if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
3814     /* return dist_vals[d] * 16 / (mm->min + 12); */
3815     return (int )dist_vals[d];
3816   else
3817     return 1;
3818 }
3819 
3820 static int
comp_distance_value(MinMaxLen * d1,MinMaxLen * d2,int v1,int v2)3821 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
3822 {
3823   if (v2 <= 0) return -1;
3824   if (v1 <= 0) return  1;
3825 
3826   v1 *= distance_value(d1);
3827   v2 *= distance_value(d2);
3828 
3829   if (v2 > v1) return  1;
3830   if (v2 < v1) return -1;
3831 
3832   if (d2->min < d1->min) return  1;
3833   if (d2->min > d1->min) return -1;
3834   return 0;
3835 }
3836 
3837 static int
is_equal_mml(MinMaxLen * a,MinMaxLen * b)3838 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
3839 {
3840   return (a->min == b->min && a->max == b->max) ? 1 : 0;
3841 }
3842 
3843 
3844 static void
set_mml(MinMaxLen * mml,OnigDistance min,OnigDistance max)3845 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
3846 {
3847   mml->min = min;
3848   mml->max = max;
3849 }
3850 
3851 static void
clear_mml(MinMaxLen * mml)3852 clear_mml(MinMaxLen* mml)
3853 {
3854   mml->min = mml->max = 0;
3855 }
3856 
3857 static void
copy_mml(MinMaxLen * to,MinMaxLen * from)3858 copy_mml(MinMaxLen* to, MinMaxLen* from)
3859 {
3860   to->min = from->min;
3861   to->max = from->max;
3862 }
3863 
3864 static void
add_mml(MinMaxLen * to,MinMaxLen * from)3865 add_mml(MinMaxLen* to, MinMaxLen* from)
3866 {
3867   to->min = distance_add(to->min, from->min);
3868   to->max = distance_add(to->max, from->max);
3869 }
3870 
3871 #if 0
3872 static void
3873 add_len_mml(MinMaxLen* to, OnigDistance len)
3874 {
3875   to->min = distance_add(to->min, len);
3876   to->max = distance_add(to->max, len);
3877 }
3878 #endif
3879 
3880 static void
alt_merge_mml(MinMaxLen * to,MinMaxLen * from)3881 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
3882 {
3883   if (to->min > from->min) to->min = from->min;
3884   if (to->max < from->max) to->max = from->max;
3885 }
3886 
3887 static void
copy_opt_env(OptEnv * to,OptEnv * from)3888 copy_opt_env(OptEnv* to, OptEnv* from)
3889 {
3890   *to = *from;
3891 }
3892 
3893 static void
clear_opt_anc_info(OptAncInfo * anc)3894 clear_opt_anc_info(OptAncInfo* anc)
3895 {
3896   anc->left_anchor  = 0;
3897   anc->right_anchor = 0;
3898 }
3899 
3900 static void
copy_opt_anc_info(OptAncInfo * to,OptAncInfo * from)3901 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
3902 {
3903   *to = *from;
3904 }
3905 
3906 static void
concat_opt_anc_info(OptAncInfo * to,OptAncInfo * left,OptAncInfo * right,OnigDistance left_len,OnigDistance right_len)3907 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
3908 		    OnigDistance left_len, OnigDistance right_len)
3909 {
3910   clear_opt_anc_info(to);
3911 
3912   to->left_anchor = left->left_anchor;
3913   if (left_len == 0) {
3914     to->left_anchor |= right->left_anchor;
3915   }
3916 
3917   to->right_anchor = right->right_anchor;
3918   if (right_len == 0) {
3919     to->right_anchor |= left->right_anchor;
3920   }
3921 }
3922 
3923 static int
is_left_anchor(int anc)3924 is_left_anchor(int anc)
3925 {
3926   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
3927       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
3928       anc == ANCHOR_PREC_READ_NOT)
3929     return 0;
3930 
3931   return 1;
3932 }
3933 
3934 static int
is_set_opt_anc_info(OptAncInfo * to,int anc)3935 is_set_opt_anc_info(OptAncInfo* to, int anc)
3936 {
3937   if ((to->left_anchor & anc) != 0) return 1;
3938 
3939   return ((to->right_anchor & anc) != 0 ? 1 : 0);
3940 }
3941 
3942 static void
add_opt_anc_info(OptAncInfo * to,int anc)3943 add_opt_anc_info(OptAncInfo* to, int anc)
3944 {
3945   if (is_left_anchor(anc))
3946     to->left_anchor |= anc;
3947   else
3948     to->right_anchor |= anc;
3949 }
3950 
3951 static void
remove_opt_anc_info(OptAncInfo * to,int anc)3952 remove_opt_anc_info(OptAncInfo* to, int anc)
3953 {
3954   if (is_left_anchor(anc))
3955     to->left_anchor &= ~anc;
3956   else
3957     to->right_anchor &= ~anc;
3958 }
3959 
3960 static void
alt_merge_opt_anc_info(OptAncInfo * to,OptAncInfo * add)3961 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
3962 {
3963   to->left_anchor  &= add->left_anchor;
3964   to->right_anchor &= add->right_anchor;
3965 }
3966 
3967 static int
is_full_opt_exact_info(OptExactInfo * ex)3968 is_full_opt_exact_info(OptExactInfo* ex)
3969 {
3970   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
3971 }
3972 
3973 static void
clear_opt_exact_info(OptExactInfo * ex)3974 clear_opt_exact_info(OptExactInfo* ex)
3975 {
3976   clear_mml(&ex->mmd);
3977   clear_opt_anc_info(&ex->anc);
3978   ex->reach_end   = 0;
3979   ex->ignore_case = 0;
3980   ex->len         = 0;
3981   ex->s[0]        = '\0';
3982 }
3983 
3984 static void
copy_opt_exact_info(OptExactInfo * to,OptExactInfo * from)3985 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
3986 {
3987   *to = *from;
3988 }
3989 
3990 static void
concat_opt_exact_info(OptExactInfo * to,OptExactInfo * add,OnigEncoding enc)3991 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
3992 {
3993   int i, j, len;
3994   UChar *p, *end;
3995   OptAncInfo tanc;
3996 
3997   if (! to->ignore_case && add->ignore_case) {
3998     if (to->len >= add->len) return ;  /* avoid */
3999 
4000     to->ignore_case = 1;
4001   }
4002 
4003   p = add->s;
4004   end = p + add->len;
4005   for (i = to->len; p < end; ) {
4006     len = enc_len(enc, p);
4007     if (i + len > OPT_EXACT_MAXLEN) break;
4008     for (j = 0; j < len && p < end; j++)
4009       to->s[i++] = *p++;
4010   }
4011 
4012   to->len = i;
4013   to->reach_end = (p == end ? add->reach_end : 0);
4014 
4015   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4016   if (! to->reach_end) tanc.right_anchor = 0;
4017   copy_opt_anc_info(&to->anc, &tanc);
4018 }
4019 
4020 static void
concat_opt_exact_info_str(OptExactInfo * to,UChar * s,UChar * end,int raw,OnigEncoding enc)4021 concat_opt_exact_info_str(OptExactInfo* to,
4022 			  UChar* s, UChar* end, int raw, OnigEncoding enc)
4023 {
4024   int i, j, len;
4025   UChar *p;
4026 
4027   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4028     len = enc_len(enc, p);
4029     if (i + len > OPT_EXACT_MAXLEN) break;
4030     for (j = 0; j < len && p < end; j++)
4031       to->s[i++] = *p++;
4032   }
4033 
4034   to->len = i;
4035 }
4036 
4037 static void
alt_merge_opt_exact_info(OptExactInfo * to,OptExactInfo * add,OptEnv * env)4038 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4039 {
4040   int i, j, len;
4041 
4042   if (add->len == 0 || to->len == 0) {
4043     clear_opt_exact_info(to);
4044     return ;
4045   }
4046 
4047   if (! is_equal_mml(&to->mmd, &add->mmd)) {
4048     clear_opt_exact_info(to);
4049     return ;
4050   }
4051 
4052   for (i = 0; i < to->len && i < add->len; ) {
4053     if (to->s[i] != add->s[i]) break;
4054     len = enc_len(env->enc, to->s + i);
4055 
4056     for (j = 1; j < len; j++) {
4057       if (to->s[i+j] != add->s[i+j]) break;
4058     }
4059     if (j < len) break;
4060     i += len;
4061   }
4062 
4063   if (! add->reach_end || i < add->len || i < to->len) {
4064     to->reach_end = 0;
4065   }
4066   to->len = i;
4067   to->ignore_case |= add->ignore_case;
4068 
4069   alt_merge_opt_anc_info(&to->anc, &add->anc);
4070   if (! to->reach_end) to->anc.right_anchor = 0;
4071 }
4072 
4073 static void
select_opt_exact_info(OnigEncoding enc,OptExactInfo * now,OptExactInfo * alt)4074 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4075 {
4076   int v1, v2;
4077 
4078   v1 = now->len;
4079   v2 = alt->len;
4080 
4081   if (v2 == 0) {
4082     return ;
4083   }
4084   else if (v1 == 0) {
4085     copy_opt_exact_info(now, alt);
4086     return ;
4087   }
4088   else if (v1 <= 2 && v2 <= 2) {
4089     /* ByteValTable[x] is big value --> low price */
4090     v2 = map_position_value(enc, now->s[0]);
4091     v1 = map_position_value(enc, alt->s[0]);
4092 
4093     if (now->len > 1) v1 += 5;
4094     if (alt->len > 1) v2 += 5;
4095   }
4096 
4097   if (now->ignore_case == 0) v1 *= 2;
4098   if (alt->ignore_case == 0) v2 *= 2;
4099 
4100   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4101     copy_opt_exact_info(now, alt);
4102 }
4103 
4104 static void
clear_opt_map_info(OptMapInfo * map)4105 clear_opt_map_info(OptMapInfo* map)
4106 {
4107   static const OptMapInfo clean_info = {
4108     {0, 0}, {0, 0}, 0,
4109     {
4110       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4111       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4112       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4113       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4114       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4115       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4116       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4117       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4118       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4119       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4120       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4121       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4122       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4123       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4124       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4125       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4126     }
4127   };
4128 
4129   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4130 }
4131 
4132 static void
copy_opt_map_info(OptMapInfo * to,OptMapInfo * from)4133 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4134 {
4135   *to = *from;
4136 }
4137 
4138 static void
add_char_opt_map_info(OptMapInfo * map,UChar c,OnigEncoding enc)4139 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4140 {
4141   if (map->map[c] == 0) {
4142     map->map[c] = 1;
4143     map->value += map_position_value(enc, c);
4144   }
4145 }
4146 
4147 static int
add_char_amb_opt_map_info(OptMapInfo * map,UChar * p,UChar * end,OnigEncoding enc,OnigAmbigType ambig_flag)4148 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4149                           OnigEncoding enc, OnigAmbigType ambig_flag)
4150 {
4151   int i, n, len;
4152   UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
4153   OnigCodePoint code;
4154   const OnigPairAmbigCodes* pccs;
4155   OnigAmbigType amb;
4156 
4157   add_char_opt_map_info(map, p[0], enc);
4158   code = ONIGENC_MBC_TO_CODE(enc, p, end);
4159 
4160   for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
4161     if ((amb & ambig_flag) == 0)  continue;
4162 
4163     n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs);
4164     for (i = 0; i < n; i++) {
4165       if (pccs[i].from == code) {
4166         len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
4167         if (len < 0) return len;
4168         add_char_opt_map_info(map, buf[0], enc);
4169       }
4170     }
4171   }
4172   return 0;
4173 }
4174 
4175 static void
select_opt_map_info(OptMapInfo * now,OptMapInfo * alt)4176 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4177 {
4178   static int z = 1<<15; /* 32768: something big value */
4179 
4180   int v1, v2;
4181 
4182   if (alt->value == 0) return ;
4183   if (now->value == 0) {
4184     copy_opt_map_info(now, alt);
4185     return ;
4186   }
4187 
4188   v1 = z / now->value;
4189   v2 = z / alt->value;
4190   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4191     copy_opt_map_info(now, alt);
4192 }
4193 
4194 static int
comp_opt_exact_or_map_info(OptExactInfo * e,OptMapInfo * m)4195 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4196 {
4197 #define COMP_EM_BASE  20
4198   int ve, vm;
4199 
4200   if (m->value <= 0) return -1;
4201 
4202   ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4203   vm = COMP_EM_BASE * 5 * 2 / m->value;
4204   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4205 }
4206 
4207 static void
alt_merge_opt_map_info(OnigEncoding enc,OptMapInfo * to,OptMapInfo * add)4208 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4209 {
4210   int i, val;
4211 
4212   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4213   if (to->value == 0) return ;
4214   if (add->value == 0 || to->mmd.max < add->mmd.min) {
4215     clear_opt_map_info(to);
4216     return ;
4217   }
4218 
4219   alt_merge_mml(&to->mmd, &add->mmd);
4220 
4221   val = 0;
4222   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4223     if (add->map[i])
4224       to->map[i] = 1;
4225 
4226     if (to->map[i])
4227       val += map_position_value(enc, i);
4228   }
4229   to->value = val;
4230 
4231   alt_merge_opt_anc_info(&to->anc, &add->anc);
4232 }
4233 
4234 static void
set_bound_node_opt_info(NodeOptInfo * opt,MinMaxLen * mmd)4235 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4236 {
4237   copy_mml(&(opt->exb.mmd),  mmd);
4238   copy_mml(&(opt->expr.mmd), mmd);
4239   copy_mml(&(opt->map.mmd),  mmd);
4240 }
4241 
4242 static void
clear_node_opt_info(NodeOptInfo * opt)4243 clear_node_opt_info(NodeOptInfo* opt)
4244 {
4245   clear_mml(&opt->len);
4246   clear_opt_anc_info(&opt->anc);
4247   clear_opt_exact_info(&opt->exb);
4248   clear_opt_exact_info(&opt->exm);
4249   clear_opt_exact_info(&opt->expr);
4250   clear_opt_map_info(&opt->map);
4251 }
4252 
4253 static void
copy_node_opt_info(NodeOptInfo * to,NodeOptInfo * from)4254 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4255 {
4256   *to = *from;
4257 }
4258 
4259 static void
concat_left_node_opt_info(OnigEncoding enc,NodeOptInfo * to,NodeOptInfo * add)4260 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4261 {
4262   int exb_reach, exm_reach;
4263   OptAncInfo tanc;
4264 
4265   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4266   copy_opt_anc_info(&to->anc, &tanc);
4267 
4268   if (add->exb.len > 0 && to->len.max == 0) {
4269     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4270 			to->len.max, add->len.max);
4271     copy_opt_anc_info(&add->exb.anc, &tanc);
4272   }
4273 
4274   if (add->map.value > 0 && to->len.max == 0) {
4275     if (add->map.mmd.max == 0)
4276       add->map.anc.left_anchor |= to->anc.left_anchor;
4277   }
4278 
4279   exb_reach = to->exb.reach_end;
4280   exm_reach = to->exm.reach_end;
4281 
4282   if (add->len.max != 0)
4283     to->exb.reach_end = to->exm.reach_end = 0;
4284 
4285   if (add->exb.len > 0) {
4286     if (exb_reach) {
4287       concat_opt_exact_info(&to->exb, &add->exb, enc);
4288       clear_opt_exact_info(&add->exb);
4289     }
4290     else if (exm_reach) {
4291       concat_opt_exact_info(&to->exm, &add->exb, enc);
4292       clear_opt_exact_info(&add->exb);
4293     }
4294   }
4295   select_opt_exact_info(enc, &to->exm, &add->exb);
4296   select_opt_exact_info(enc, &to->exm, &add->exm);
4297 
4298   if (to->expr.len > 0) {
4299     if (add->len.max > 0) {
4300       if (to->expr.len > (int )add->len.max)
4301 	to->expr.len = add->len.max;
4302 
4303       if (to->expr.mmd.max == 0)
4304 	select_opt_exact_info(enc, &to->exb, &to->expr);
4305       else
4306 	select_opt_exact_info(enc, &to->exm, &to->expr);
4307     }
4308   }
4309   else if (add->expr.len > 0) {
4310     copy_opt_exact_info(&to->expr, &add->expr);
4311   }
4312 
4313   select_opt_map_info(&to->map, &add->map);
4314 
4315   add_mml(&to->len, &add->len);
4316 }
4317 
4318 static void
alt_merge_node_opt_info(NodeOptInfo * to,NodeOptInfo * add,OptEnv * env)4319 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4320 {
4321   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4322   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4323   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4324   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4325   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4326 
4327   alt_merge_mml(&to->len, &add->len);
4328 }
4329 
4330 
4331 #define MAX_NODE_OPT_INFO_REF_COUNT    5
4332 
4333 static int
optimize_node_left(Node * node,NodeOptInfo * opt,OptEnv * env)4334 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4335 {
4336   int type;
4337   int r = 0;
4338 
4339   clear_node_opt_info(opt);
4340   set_bound_node_opt_info(opt, &env->mmd);
4341 
4342   type = NTYPE(node);
4343   switch (type) {
4344   case N_LIST:
4345     {
4346       OptEnv nenv;
4347       NodeOptInfo nopt;
4348       Node* nd = node;
4349 
4350       copy_opt_env(&nenv, env);
4351       do {
4352 	r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
4353 	if (r == 0) {
4354 	  add_mml(&nenv.mmd, &nopt.len);
4355 	  concat_left_node_opt_info(env->enc, opt, &nopt);
4356 	}
4357       } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
4358     }
4359     break;
4360 
4361   case N_ALT:
4362     {
4363       NodeOptInfo nopt;
4364       Node* nd = node;
4365 
4366       do {
4367 	r = optimize_node_left(NCONS(nd).left, &nopt, env);
4368 	if (r == 0) {
4369 	  if (nd == node) copy_node_opt_info(opt, &nopt);
4370 	  else            alt_merge_node_opt_info(opt, &nopt, env);
4371 	}
4372       } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right));
4373     }
4374     break;
4375 
4376   case N_STRING:
4377     {
4378       StrNode* sn = &(NSTRING(node));
4379       int slen = sn->end - sn->s;
4380       int is_raw = NSTRING_IS_RAW(node);
4381 
4382       if (! NSTRING_IS_AMBIG(node)) {
4383 	concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4384 				  NSTRING_IS_RAW(node), env->enc);
4385 	if (slen > 0) {
4386 	  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4387 	}
4388         set_mml(&opt->len, slen, slen);
4389       }
4390       else {
4391         int n, max;
4392 
4393         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4394                                   is_raw, env->enc);
4395         opt->exb.ignore_case = 1;
4396 
4397 	if (slen > 0) {
4398           r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4399                                         env->enc, env->ambig_flag);
4400           if (r != 0) break;
4401 	}
4402 
4403         if (NSTRING_IS_AMBIG_REDUCE(node)) {
4404           n = onigenc_strlen(env->enc, sn->s, sn->end);
4405           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4406         }
4407         else {
4408           max = slen;
4409         }
4410         set_mml(&opt->len, slen, max);
4411       }
4412 
4413       if (opt->exb.len == slen)
4414 	opt->exb.reach_end = 1;
4415     }
4416     break;
4417 
4418   case N_CCLASS:
4419     {
4420       int i, z;
4421       CClassNode* cc = &(NCCLASS(node));
4422 
4423       /* no need to check ignore case. (setted in setup_tree()) */
4424 
4425       if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
4426         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4427 	OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4428 
4429 	set_mml(&opt->len, min, max);
4430       }
4431       else {
4432         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4433           z = BITSET_AT(cc->bs, i);
4434           if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
4435             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4436           }
4437         }
4438 	set_mml(&opt->len, 1, 1);
4439       }
4440     }
4441     break;
4442 
4443   case N_CTYPE:
4444     {
4445       int i, min, max;
4446 
4447       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4448 
4449       if (max == 1) {
4450         min = 1;
4451 
4452 	switch (NCTYPE(node).type) {
4453 	case CTYPE_NOT_WORD:
4454           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4455             if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4456               add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4457             }
4458           }
4459           break;
4460 
4461 	case CTYPE_WORD:
4462           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4463             if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4464               add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4465             }
4466           }
4467 	  break;
4468 	}
4469       }
4470       else {
4471         min = ONIGENC_MBC_MINLEN(env->enc);
4472       }
4473       set_mml(&opt->len, min, max);
4474     }
4475     break;
4476 
4477   case N_ANYCHAR:
4478     {
4479       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4480       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4481       set_mml(&opt->len, min, max);
4482     }
4483     break;
4484 
4485   case N_ANCHOR:
4486     switch (NANCHOR(node).type) {
4487     case ANCHOR_BEGIN_BUF:
4488     case ANCHOR_BEGIN_POSITION:
4489     case ANCHOR_BEGIN_LINE:
4490     case ANCHOR_END_BUF:
4491     case ANCHOR_SEMI_END_BUF:
4492     case ANCHOR_END_LINE:
4493       add_opt_anc_info(&opt->anc, NANCHOR(node).type);
4494       break;
4495 
4496     case ANCHOR_PREC_READ:
4497       {
4498 	NodeOptInfo nopt;
4499 
4500 	r = optimize_node_left(NANCHOR(node).target, &nopt, env);
4501 	if (r == 0) {
4502 	  if (nopt.exb.len > 0)
4503 	    copy_opt_exact_info(&opt->expr, &nopt.exb);
4504 	  else if (nopt.exm.len > 0)
4505 	    copy_opt_exact_info(&opt->expr, &nopt.exm);
4506 
4507 	  opt->expr.reach_end = 0;
4508 
4509 	  if (nopt.map.value > 0)
4510 	    copy_opt_map_info(&opt->map, &nopt.map);
4511 	}
4512       }
4513       break;
4514 
4515     case ANCHOR_PREC_READ_NOT:
4516     case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
4517     case ANCHOR_LOOK_BEHIND_NOT:
4518       break;
4519     }
4520     break;
4521 
4522   case N_BACKREF:
4523     {
4524       int i;
4525       int* backs;
4526       OnigDistance min, max, tmin, tmax;
4527       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4528       BackrefNode* br = &(NBACKREF(node));
4529 
4530       if (br->state & NST_RECURSION) {
4531 	set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4532 	break;
4533       }
4534       backs = BACKREFS_P(br);
4535       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4536       if (r != 0) break;
4537       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4538       if (r != 0) break;
4539       for (i = 1; i < br->back_num; i++) {
4540 	r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4541 	if (r != 0) break;
4542 	r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4543 	if (r != 0) break;
4544 	if (min > tmin) min = tmin;
4545 	if (max < tmax) max = tmax;
4546       }
4547       if (r == 0) set_mml(&opt->len, min, max);
4548     }
4549     break;
4550 
4551 #ifdef USE_SUBEXP_CALL
4552   case N_CALL:
4553     if (IS_CALL_RECURSION(&(NCALL(node))))
4554       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4555     else {
4556       OnigOptionType save = env->options;
4557       env->options = NEFFECT(NCALL(node).target).option;
4558       r = optimize_node_left(NCALL(node).target, opt, env);
4559       env->options = save;
4560     }
4561     break;
4562 #endif
4563 
4564   case N_QUANTIFIER:
4565     {
4566       int i;
4567       OnigDistance min, max;
4568       NodeOptInfo nopt;
4569       QuantifierNode* qn = &(NQUANTIFIER(node));
4570 
4571       r = optimize_node_left(qn->target, &nopt, env);
4572       if (r) break;
4573 
4574       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4575 	if (env->mmd.max == 0 &&
4576 	    NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
4577 	  if (IS_MULTILINE(env->options))
4578 	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
4579 	  else
4580 	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
4581 	}
4582       }
4583       else {
4584 	if (qn->lower > 0) {
4585 	  copy_node_opt_info(opt, &nopt);
4586 	  if (nopt.exb.len > 0) {
4587 	    if (nopt.exb.reach_end) {
4588 	      for (i = 2; i < qn->lower &&
4589 		          ! is_full_opt_exact_info(&opt->exb); i++) {
4590 		concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4591 	      }
4592 	      if (i < qn->lower) {
4593 		opt->exb.reach_end = 0;
4594 	      }
4595 	    }
4596 	  }
4597 
4598 	  if (qn->lower != qn->upper) {
4599 	    opt->exb.reach_end = 0;
4600 	    opt->exm.reach_end = 0;
4601 	  }
4602 	  if (qn->lower > 1)
4603 	    opt->exm.reach_end = 0;
4604 	}
4605       }
4606 
4607       min = distance_multiply(nopt.len.min, qn->lower);
4608       if (IS_REPEAT_INFINITE(qn->upper))
4609 	max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4610       else
4611 	max = distance_multiply(nopt.len.max, qn->upper);
4612 
4613       set_mml(&opt->len, min, max);
4614     }
4615     break;
4616 
4617   case N_EFFECT:
4618     {
4619       EffectNode* en = &(NEFFECT(node));
4620 
4621       switch (en->type) {
4622       case EFFECT_OPTION:
4623 	{
4624 	  OnigOptionType save = env->options;
4625 
4626 	  env->options = en->option;
4627 	  r = optimize_node_left(en->target, opt, env);
4628 	  env->options = save;
4629 	}
4630 	break;
4631 
4632       case EFFECT_MEMORY:
4633 #ifdef USE_SUBEXP_CALL
4634 	en->opt_count++;
4635 	if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
4636 	  OnigDistance min, max;
4637 
4638 	  min = 0;
4639 	  max = ONIG_INFINITE_DISTANCE;
4640 	  if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
4641 	  if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
4642 	  set_mml(&opt->len, min, max);
4643 	}
4644 	else
4645 #endif
4646 	{
4647 	  r = optimize_node_left(en->target, opt, env);
4648 
4649 	  if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
4650 	    if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4651 	      remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
4652 	  }
4653 	}
4654 	break;
4655 
4656       case EFFECT_STOP_BACKTRACK:
4657 	r = optimize_node_left(en->target, opt, env);
4658 	break;
4659       }
4660     }
4661     break;
4662 
4663   default:
4664 #ifdef ONIG_DEBUG
4665     fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4666 	    NTYPE(node));
4667 #endif
4668     r = ONIGERR_TYPE_BUG;
4669     break;
4670   }
4671 
4672   return r;
4673 }
4674 
4675 static int
set_optimize_exact_info(regex_t * reg,OptExactInfo * e)4676 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
4677 {
4678   int r;
4679 
4680   if (e->len == 0) return 0;
4681 
4682   if (e->ignore_case) {
4683     reg->exact = (UChar* )xmalloc(e->len);
4684     CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
4685     xmemcpy(reg->exact, e->s, e->len);
4686     reg->exact_end = reg->exact + e->len;
4687     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
4688   }
4689   else {
4690     int allow_reverse;
4691 
4692     reg->exact = k_strdup(e->s, e->s + e->len);
4693     CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
4694     reg->exact_end = reg->exact + e->len;
4695 
4696     allow_reverse =
4697 	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
4698 
4699     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4700       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4701 	              reg->map, &(reg->int_map));
4702       if (r) return r;
4703 
4704       reg->optimize = (allow_reverse != 0
4705 		       ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
4706     }
4707     else {
4708       reg->optimize = ONIG_OPTIMIZE_EXACT;
4709     }
4710   }
4711 
4712   reg->dmin = e->mmd.min;
4713   reg->dmax = e->mmd.max;
4714 
4715   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4716     reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
4717   }
4718 
4719   return 0;
4720 }
4721 
4722 static void
set_optimize_map_info(regex_t * reg,OptMapInfo * m)4723 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
4724 {
4725   int i;
4726 
4727   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4728     reg->map[i] = m->map[i];
4729 
4730   reg->optimize   = ONIG_OPTIMIZE_MAP;
4731   reg->dmin       = m->mmd.min;
4732   reg->dmax       = m->mmd.max;
4733 
4734   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4735     reg->threshold_len = reg->dmin + 1;
4736   }
4737 }
4738 
4739 static void
set_sub_anchor(regex_t * reg,OptAncInfo * anc)4740 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
4741 {
4742   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
4743   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4744 }
4745 
4746 #ifdef ONIG_DEBUG
4747 static void print_optimize_info(FILE* f, regex_t* reg);
4748 #endif
4749 
4750 static int
set_optimize_info_from_tree(Node * node,regex_t * reg,ScanEnv * scan_env)4751 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
4752 {
4753 
4754   int r;
4755   NodeOptInfo opt;
4756   OptEnv env;
4757 
4758   env.enc        = reg->enc;
4759   env.options    = reg->options;
4760   env.ambig_flag = reg->ambig_flag;
4761   env.scan_env   = scan_env;
4762   clear_mml(&env.mmd);
4763 
4764   r = optimize_node_left(node, &opt, &env);
4765   if (r) return r;
4766 
4767   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
4768         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
4769 
4770   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
4771 
4772   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
4773     reg->anchor_dmin = opt.len.min;
4774     reg->anchor_dmax = opt.len.max;
4775   }
4776 
4777   if (opt.exb.len > 0 || opt.exm.len > 0) {
4778     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
4779     if (opt.map.value > 0 &&
4780 	comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
4781       goto set_map;
4782     }
4783     else {
4784       r = set_optimize_exact_info(reg, &opt.exb);
4785       set_sub_anchor(reg, &opt.exb.anc);
4786     }
4787   }
4788   else if (opt.map.value > 0) {
4789   set_map:
4790     set_optimize_map_info(reg, &opt.map);
4791     set_sub_anchor(reg, &opt.map.anc);
4792   }
4793   else {
4794     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
4795     if (opt.len.max == 0)
4796       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
4797   }
4798 
4799 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
4800   print_optimize_info(stderr, reg);
4801 #endif
4802   return r;
4803 }
4804 
4805 static void
clear_optimize_info(regex_t * reg)4806 clear_optimize_info(regex_t* reg)
4807 {
4808   reg->optimize      = ONIG_OPTIMIZE_NONE;
4809   reg->anchor        = 0;
4810   reg->anchor_dmin   = 0;
4811   reg->anchor_dmax   = 0;
4812   reg->sub_anchor    = 0;
4813   reg->exact_end     = (UChar* )NULL;
4814   reg->threshold_len = 0;
4815   if (IS_NOT_NULL(reg->exact)) {
4816     xfree(reg->exact);
4817     reg->exact = (UChar* )NULL;
4818   }
4819 }
4820 
4821 #ifdef ONIG_DEBUG
4822 
print_enc_string(FILE * fp,OnigEncoding enc,const UChar * s,const UChar * end)4823 static void print_enc_string(FILE* fp, OnigEncoding enc,
4824 			     const UChar *s, const UChar *end)
4825 {
4826   fprintf(fp, "\nPATTERN: /");
4827 
4828   if (ONIGENC_MBC_MINLEN(enc) > 1) {
4829     const UChar *p;
4830     OnigCodePoint code;
4831 
4832     p = s;
4833     while (p < end) {
4834       code = ONIGENC_MBC_TO_CODE(enc, p, end);
4835       if (code >= 0x80) {
4836 	fprintf(fp, " 0x%04x ", (int )code);
4837       }
4838       else {
4839 	fputc((int )code, fp);
4840       }
4841 
4842       p += enc_len(enc, p);
4843     }
4844   }
4845   else {
4846     while (s < end) {
4847       fputc((int )*s, fp);
4848       s++;
4849     }
4850   }
4851 
4852   fprintf(fp, "/\n");
4853 }
4854 
4855 static void
print_distance_range(FILE * f,OnigDistance a,OnigDistance b)4856 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
4857 {
4858   if (a == ONIG_INFINITE_DISTANCE)
4859     fputs("inf", f);
4860   else
4861     fprintf(f, "(%u)", a);
4862 
4863   fputs("-", f);
4864 
4865   if (b == ONIG_INFINITE_DISTANCE)
4866     fputs("inf", f);
4867   else
4868     fprintf(f, "(%u)", b);
4869 }
4870 
4871 static void
print_anchor(FILE * f,int anchor)4872 print_anchor(FILE* f, int anchor)
4873 {
4874   int q = 0;
4875 
4876   fprintf(f, "[");
4877 
4878   if (anchor & ANCHOR_BEGIN_BUF) {
4879     fprintf(f, "begin-buf");
4880     q = 1;
4881   }
4882   if (anchor & ANCHOR_BEGIN_LINE) {
4883     if (q) fprintf(f, ", ");
4884     q = 1;
4885     fprintf(f, "begin-line");
4886   }
4887   if (anchor & ANCHOR_BEGIN_POSITION) {
4888     if (q) fprintf(f, ", ");
4889     q = 1;
4890     fprintf(f, "begin-pos");
4891   }
4892   if (anchor & ANCHOR_END_BUF) {
4893     if (q) fprintf(f, ", ");
4894     q = 1;
4895     fprintf(f, "end-buf");
4896   }
4897   if (anchor & ANCHOR_SEMI_END_BUF) {
4898     if (q) fprintf(f, ", ");
4899     q = 1;
4900     fprintf(f, "semi-end-buf");
4901   }
4902   if (anchor & ANCHOR_END_LINE) {
4903     if (q) fprintf(f, ", ");
4904     q = 1;
4905     fprintf(f, "end-line");
4906   }
4907   if (anchor & ANCHOR_ANYCHAR_STAR) {
4908     if (q) fprintf(f, ", ");
4909     q = 1;
4910     fprintf(f, "anychar-star");
4911   }
4912   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
4913     if (q) fprintf(f, ", ");
4914     fprintf(f, "anychar-star-pl");
4915   }
4916 
4917   fprintf(f, "]");
4918 }
4919 
4920 static void
print_optimize_info(FILE * f,regex_t * reg)4921 print_optimize_info(FILE* f, regex_t* reg)
4922 {
4923   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
4924                               "EXACT_IC", "MAP" };
4925 
4926   fprintf(f, "optimize: %s\n", on[reg->optimize]);
4927   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
4928   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
4929     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
4930   fprintf(f, "\n");
4931 
4932   if (reg->optimize) {
4933     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
4934     fprintf(f, "\n");
4935   }
4936   fprintf(f, "\n");
4937 
4938   if (reg->exact) {
4939     UChar *p;
4940     fprintf(f, "exact: [");
4941     for (p = reg->exact; p < reg->exact_end; p++) {
4942       fputc(*p, f);
4943     }
4944     fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
4945   }
4946   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
4947     int c, i, n = 0;
4948 
4949     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4950       if (reg->map[i]) n++;
4951 
4952     fprintf(f, "map: n=%d\n", n);
4953     if (n > 0) {
4954       c = 0;
4955       fputc('[', f);
4956       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4957 	if (reg->map[i] != 0) {
4958           if (c > 0)  fputs(", ", f);
4959           c++;
4960           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
4961               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
4962             fputc(i, f);
4963           else
4964             fprintf(f, "%d", i);
4965         }
4966       }
4967       fprintf(f, "]\n");
4968     }
4969   }
4970 }
4971 #endif /* ONIG_DEBUG */
4972 
4973 
4974 static void
onig_free_body(regex_t * reg)4975 onig_free_body(regex_t* reg)
4976 {
4977   if (IS_NOT_NULL(reg->p))                xfree(reg->p);
4978   if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
4979   if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
4980   if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
4981   if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
4982   if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
4983 
4984 #ifdef USE_NAMED_GROUP
4985   onig_names_free(reg);
4986 #endif
4987 }
4988 
4989 extern void
onig_free(regex_t * reg)4990 onig_free(regex_t* reg)
4991 {
4992   if (IS_NOT_NULL(reg)) {
4993     onig_free_body(reg);
4994     xfree(reg);
4995   }
4996 }
4997 
4998 #define REGEX_TRANSFER(to,from) do {\
4999   (to)->state = ONIG_STATE_MODIFY;\
5000   onig_free_body(to);\
5001   xmemcpy(to, from, sizeof(regex_t));\
5002   xfree(from);\
5003 } while (0)
5004 
5005 extern void
onig_transfer(regex_t * to,regex_t * from)5006 onig_transfer(regex_t* to, regex_t* from)
5007 {
5008   THREAD_ATOMIC_START;
5009   REGEX_TRANSFER(to, from);
5010   THREAD_ATOMIC_END;
5011 }
5012 
5013 #define REGEX_CHAIN_HEAD(reg) do {\
5014   while (IS_NOT_NULL((reg)->chain)) {\
5015     (reg) = (reg)->chain;\
5016   }\
5017 } while (0)
5018 
5019 extern void
onig_chain_link_add(regex_t * to,regex_t * add)5020 onig_chain_link_add(regex_t* to, regex_t* add)
5021 {
5022   THREAD_ATOMIC_START;
5023   REGEX_CHAIN_HEAD(to);
5024   to->chain = add;
5025   THREAD_ATOMIC_END;
5026 }
5027 
5028 extern void
onig_chain_reduce(regex_t * reg)5029 onig_chain_reduce(regex_t* reg)
5030 {
5031   regex_t *head, *prev;
5032 
5033   prev = reg;
5034   head = prev->chain;
5035   if (IS_NOT_NULL(head)) {
5036     reg->state = ONIG_STATE_MODIFY;
5037     while (IS_NOT_NULL(head->chain)) {
5038       prev = head;
5039       head = head->chain;
5040     }
5041     prev->chain = (regex_t* )NULL;
5042     REGEX_TRANSFER(reg, head);
5043   }
5044 }
5045 
5046 #if 0
5047 extern int
5048 onig_clone(regex_t** to, regex_t* from)
5049 {
5050   int r, size;
5051   regex_t* reg;
5052 
5053 #ifdef USE_MULTI_THREAD_SYSTEM
5054   if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
5055     ONIG_STATE_INC(from);
5056     if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5057       onig_chain_reduce(from);
5058       ONIG_STATE_INC(from);
5059     }
5060   }
5061   else {
5062     int n = 0;
5063     while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
5064       if (++n > THREAD_PASS_LIMIT_COUNT)
5065 	return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
5066       THREAD_PASS;
5067     }
5068     ONIG_STATE_INC(from);
5069   }
5070 #endif /* USE_MULTI_THREAD_SYSTEM */
5071 
5072   r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
5073                       from->enc, ONIG_SYNTAX_DEFAULT);
5074   if (r != 0) {
5075     ONIG_STATE_DEC(from);
5076     return r;
5077   }
5078 
5079   xmemcpy(reg, from, sizeof(onig_t));
5080   reg->chain = (regex_t* )NULL;
5081   reg->state = ONIG_STATE_NORMAL;
5082 
5083   if (from->p) {
5084     reg->p = (UChar* )xmalloc(reg->alloc);
5085     if (IS_NULL(reg->p)) goto mem_error;
5086     xmemcpy(reg->p, from->p, reg->alloc);
5087   }
5088 
5089   if (from->exact) {
5090     reg->exact = (UChar* )xmalloc(from->exact_end - from->exact);
5091     if (IS_NULL(reg->exact)) goto mem_error;
5092     reg->exact_end = reg->exact + (from->exact_end - from->exact);
5093     xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact);
5094   }
5095 
5096   if (from->int_map) {
5097     size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5098     reg->int_map = (int* )xmalloc(size);
5099     if (IS_NULL(reg->int_map)) goto mem_error;
5100     xmemcpy(reg->int_map, from->int_map, size);
5101   }
5102 
5103   if (from->int_map_backward) {
5104     size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5105     reg->int_map_backward = (int* )xmalloc(size);
5106     if (IS_NULL(reg->int_map_backward)) goto mem_error;
5107     xmemcpy(reg->int_map_backward, from->int_map_backward, size);
5108   }
5109 
5110 #ifdef USE_NAMED_GROUP
5111   reg->name_table = names_clone(from); /* names_clone is not implemented */
5112 #endif
5113 
5114   ONIG_STATE_DEC(from);
5115   *to = reg;
5116   return 0;
5117 
5118  mem_error:
5119   ONIG_STATE_DEC(from);
5120   return ONIGERR_MEMORY;
5121 }
5122 #endif
5123 
5124 #ifdef ONIG_DEBUG
5125 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5126 #endif
5127 #ifdef ONIG_DEBUG_PARSE_TREE
5128 static void print_tree P_((FILE* f, Node* node));
5129 #endif
5130 
5131 extern int
onig_compile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigErrorInfo * einfo)5132 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5133 	      OnigErrorInfo* einfo)
5134 {
5135 #define COMPILE_INIT_SIZE  20
5136 
5137   int r, init_size;
5138   Node*  root;
5139   ScanEnv  scan_env;
5140 #ifdef USE_SUBEXP_CALL
5141   UnsetAddrList  uslist;
5142 #endif
5143 
5144   reg->state = ONIG_STATE_COMPILING;
5145 
5146 #ifdef ONIG_DEBUG
5147   print_enc_string(stderr, reg->enc, pattern, pattern_end);
5148 #endif
5149 
5150   if (reg->alloc == 0) {
5151     init_size = (pattern_end - pattern) * 2;
5152     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5153     r = BBUF_INIT(reg, init_size);
5154     if (r != 0) goto end;
5155   }
5156   else
5157     reg->used = 0;
5158 
5159   reg->num_mem            = 0;
5160   reg->num_repeat         = 0;
5161   reg->num_null_check     = 0;
5162   reg->repeat_range_alloc = 0;
5163   reg->repeat_range       = (OnigRepeatRange* )NULL;
5164 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5165   reg->num_comb_exp_check = 0;
5166 #endif
5167 
5168   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5169   if (r != 0) goto err;
5170 
5171 #ifdef USE_NAMED_GROUP
5172   /* mixed use named group and no-named group */
5173   if (scan_env.num_named > 0 &&
5174       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5175       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5176     if (scan_env.num_named != scan_env.num_mem)
5177       r = disable_noname_group_capture(&root, reg, &scan_env);
5178     else
5179       r = numbered_ref_check(root);
5180 
5181     if (r != 0) goto err;
5182   }
5183 #endif
5184 
5185 #ifdef ONIG_DEBUG_PARSE_TREE
5186   print_tree(stderr, root);
5187 #endif
5188 
5189 #ifdef USE_SUBEXP_CALL
5190   if (scan_env.num_call > 0) {
5191     r = unset_addr_list_init(&uslist, scan_env.num_call);
5192     if (r != 0) goto err;
5193     scan_env.unset_addr_list = &uslist;
5194     r = setup_subexp_call(root, &scan_env);
5195     if (r != 0) goto err_unset;
5196     r = subexp_recursive_check_trav(root, &scan_env);
5197     if (r  < 0) goto err_unset;
5198     r = subexp_inf_recursive_check_trav(root, &scan_env);
5199     if (r != 0) goto err_unset;
5200 
5201     reg->num_call = scan_env.num_call;
5202   }
5203   else
5204     reg->num_call = 0;
5205 #endif
5206 
5207   r = setup_tree(root, reg, 0, &scan_env);
5208   if (r != 0) goto err_unset;
5209 
5210   reg->capture_history  = scan_env.capture_history;
5211   reg->bt_mem_start     = scan_env.bt_mem_start;
5212   reg->bt_mem_start    |= reg->capture_history;
5213   if (IS_FIND_CONDITION(reg->options))
5214     BIT_STATUS_ON_ALL(reg->bt_mem_end);
5215   else {
5216     reg->bt_mem_end  = scan_env.bt_mem_end;
5217     reg->bt_mem_end |= reg->capture_history;
5218   }
5219 
5220 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5221   if (scan_env.backrefed_mem == 0
5222 #ifdef USE_SUBEXP_CALL
5223       || scan_env.num_call == 0
5224 #endif
5225       ) {
5226     setup_comb_exp_check(root, 0, &scan_env);
5227 #ifdef USE_SUBEXP_CALL
5228     if (scan_env.has_recursion != 0) {
5229       scan_env.num_comb_exp_check = 0;
5230     }
5231     else
5232 #endif
5233     if (scan_env.comb_exp_max_regnum > 0) {
5234       int i;
5235       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5236 	if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5237 	  scan_env.num_comb_exp_check = 0;
5238 	  break;
5239 	}
5240       }
5241     }
5242   }
5243 
5244   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5245 #endif
5246 
5247   clear_optimize_info(reg);
5248 #ifndef ONIG_DONT_OPTIMIZE
5249   r = set_optimize_info_from_tree(root, reg, &scan_env);
5250   if (r != 0) goto err_unset;
5251 #endif
5252 
5253   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5254     xfree(scan_env.mem_nodes_dynamic);
5255     scan_env.mem_nodes_dynamic = (Node** )NULL;
5256   }
5257 
5258   r = compile_tree(root, reg);
5259   if (r == 0) {
5260     r = add_opcode(reg, OP_END);
5261 #ifdef USE_SUBEXP_CALL
5262     if (scan_env.num_call > 0) {
5263       r = unset_addr_list_fix(&uslist, reg);
5264       unset_addr_list_end(&uslist);
5265       if (r) goto err;
5266     }
5267 #endif
5268 
5269     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5270       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5271     else {
5272       if (reg->bt_mem_start != 0)
5273 	reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5274       else
5275 	reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5276     }
5277   }
5278 #ifdef USE_SUBEXP_CALL
5279   else if (scan_env.num_call > 0) {
5280     unset_addr_list_end(&uslist);
5281   }
5282 #endif
5283   onig_node_free(root);
5284 
5285 #ifdef ONIG_DEBUG_COMPILE
5286 #ifdef USE_NAMED_GROUP
5287   onig_print_names(stderr, reg);
5288 #endif
5289   print_compiled_byte_code_list(stderr, reg);
5290 #endif
5291 
5292  end:
5293   reg->state = ONIG_STATE_NORMAL;
5294   return r;
5295 
5296  err_unset:
5297 #ifdef USE_SUBEXP_CALL
5298   if (scan_env.num_call > 0) {
5299     unset_addr_list_end(&uslist);
5300   }
5301 #endif
5302  err:
5303   if (IS_NOT_NULL(scan_env.error)) {
5304     if (IS_NOT_NULL(einfo)) {
5305       einfo->enc     = scan_env.enc;
5306       einfo->par     = scan_env.error;
5307       einfo->par_end = scan_env.error_end;
5308     }
5309   }
5310 
5311   if (IS_NOT_NULL(root)) onig_node_free(root);
5312   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5313       xfree(scan_env.mem_nodes_dynamic);
5314   return r;
5315 }
5316 
5317 #ifdef USE_RECOMPILE_API
5318 extern int
onig_recompile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5319 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5320 	    OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5321 	    OnigErrorInfo* einfo)
5322 {
5323   int r;
5324   regex_t *new_reg;
5325 
5326   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5327   if (r) return r;
5328   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5329     onig_transfer(reg, new_reg);
5330   }
5331   else {
5332     onig_chain_link_add(reg, new_reg);
5333   }
5334   return 0;
5335 }
5336 #endif
5337 
5338 static int onig_inited = 0;
5339 
5340 extern int
onig_alloc_init(regex_t ** reg,OnigOptionType option,OnigAmbigType ambig_flag,OnigEncoding enc,OnigSyntaxType * syntax)5341 onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
5342                 OnigEncoding enc, OnigSyntaxType* syntax)
5343 {
5344   if (! onig_inited)
5345     onig_init();
5346 
5347   if (ONIGENC_IS_UNDEF(enc))
5348     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
5349 
5350   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5351       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5352     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5353   }
5354 
5355   *reg = (regex_t* )xmalloc(sizeof(regex_t));
5356   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5357   (*reg)->state = ONIG_STATE_MODIFY;
5358 
5359   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5360     option |= syntax->options;
5361     option &= ~ONIG_OPTION_SINGLELINE;
5362   }
5363   else
5364     option |= syntax->options;
5365 
5366   (*reg)->enc              = enc;
5367   (*reg)->options          = option;
5368   (*reg)->syntax           = syntax;
5369   (*reg)->optimize         = 0;
5370   (*reg)->exact            = (UChar* )NULL;
5371   (*reg)->int_map          = (int* )NULL;
5372   (*reg)->int_map_backward = (int* )NULL;
5373   (*reg)->chain            = (regex_t* )NULL;
5374 
5375   (*reg)->p                = (UChar* )NULL;
5376   (*reg)->alloc            = 0;
5377   (*reg)->used             = 0;
5378   (*reg)->name_table       = (void* )NULL;
5379 
5380   (*reg)->ambig_flag       = ambig_flag;
5381   (*reg)->ambig_flag      &= ONIGENC_SUPPORT_AMBIG_FLAG(enc);
5382 
5383   return 0;
5384 }
5385 
5386 extern int
onig_new(regex_t ** reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5387 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5388 	  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5389 	  OnigErrorInfo* einfo)
5390 {
5391   int r;
5392 
5393   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5394 
5395   r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
5396                       enc, syntax);
5397   if (r) return r;
5398 
5399   r = onig_compile(*reg, pattern, pattern_end, einfo);
5400   if (r) {
5401     onig_free(*reg);
5402     *reg = NULL;
5403   }
5404   return r;
5405 }
5406 
5407 extern int
onig_init(void)5408 onig_init(void)
5409 {
5410   if (onig_inited != 0)
5411     return 0;
5412 
5413   onig_inited = 1;
5414 
5415   THREAD_SYSTEM_INIT;
5416   THREAD_ATOMIC_START;
5417 
5418   onigenc_init();
5419   onigenc_set_default_caseconv_table((UChar* )0);
5420 
5421 #ifdef ONIG_DEBUG_STATISTICS
5422   onig_statistics_init();
5423 #endif
5424 
5425   THREAD_ATOMIC_END;
5426   return 0;
5427 }
5428 
5429 
5430 extern int
onig_end(void)5431 onig_end(void)
5432 {
5433   extern int onig_free_shared_cclass_table(void);
5434 
5435   THREAD_ATOMIC_START;
5436 
5437 #ifdef ONIG_DEBUG_STATISTICS
5438   onig_print_statistics(stderr);
5439 #endif
5440 
5441 #ifdef USE_SHARED_CCLASS_TABLE
5442   onig_free_shared_cclass_table();
5443 #endif
5444 
5445 #ifdef USE_RECYCLE_NODE
5446   onig_free_node_list();
5447 #endif
5448 
5449   onig_inited = 0;
5450 
5451   THREAD_ATOMIC_END;
5452   THREAD_SYSTEM_END;
5453   return 0;
5454 }
5455 
5456 
5457 #ifdef ONIG_DEBUG
5458 
5459 /* arguments type */
5460 #define ARG_SPECIAL     -1
5461 #define ARG_NON          0
5462 #define ARG_RELADDR      1
5463 #define ARG_ABSADDR      2
5464 #define ARG_LENGTH       3
5465 #define ARG_MEMNUM       4
5466 #define ARG_OPTION       5
5467 #define ARG_STATE_CHECK  6
5468 
5469 OnigOpInfoType OnigOpInfo[] = {
5470   { OP_FINISH,            "finish",          ARG_NON },
5471   { OP_END,               "end",             ARG_NON },
5472   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
5473   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
5474   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
5475   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
5476   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
5477   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
5478   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
5479   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
5480   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
5481   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
5482   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
5483   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
5484   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
5485   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
5486   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
5487   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
5488   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
5489   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
5490   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
5491   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
5492   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
5493   { OP_ANYCHAR,           "anychar",         ARG_NON },
5494   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
5495   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
5496   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
5497   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5498   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5499   { OP_WORD,                "word",            ARG_NON },
5500   { OP_NOT_WORD,            "not-word",        ARG_NON },
5501   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
5502   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
5503   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
5504   { OP_WORD_END,            "word-end",        ARG_NON },
5505   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
5506   { OP_END_BUF,             "end-buf",         ARG_NON },
5507   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
5508   { OP_END_LINE,            "end-line",        ARG_NON },
5509   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
5510   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
5511   { OP_BACKREF1,            "backref1",             ARG_NON },
5512   { OP_BACKREF2,            "backref2",             ARG_NON },
5513   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
5514   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
5515   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
5516   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
5517   { OP_BACKREF_AT_LEVEL,    "backref_at_level",     ARG_SPECIAL },
5518   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
5519   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
5520   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
5521   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
5522   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
5523   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
5524   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
5525   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
5526   { OP_FAIL,                "fail",                 ARG_NON },
5527   { OP_JUMP,                "jump",                 ARG_RELADDR },
5528   { OP_PUSH,                "push",                 ARG_RELADDR },
5529   { OP_POP,                 "pop",                  ARG_NON },
5530   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
5531   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
5532   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
5533   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
5534   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
5535   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
5536   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
5537   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
5538   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
5539   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
5540   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
5541   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
5542   { OP_PUSH_POS,             "push-pos",             ARG_NON },
5543   { OP_POP_POS,              "pop-pos",              ARG_NON },
5544   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
5545   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
5546   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
5547   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
5548   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
5549   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5550   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5551   { OP_CALL,                 "call",                 ARG_ABSADDR },
5552   { OP_RETURN,               "return",               ARG_NON },
5553   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
5554   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5555   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
5556   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
5557   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
5558     "state-check-anychar-ml*", ARG_STATE_CHECK },
5559   { -1, "", ARG_NON }
5560 };
5561 
5562 static char*
op2name(int opcode)5563 op2name(int opcode)
5564 {
5565   int i;
5566 
5567   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5568     if (opcode == OnigOpInfo[i].opcode)
5569       return OnigOpInfo[i].name;
5570   }
5571   return "";
5572 }
5573 
5574 static int
op2arg_type(int opcode)5575 op2arg_type(int opcode)
5576 {
5577   int i;
5578 
5579   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5580     if (opcode == OnigOpInfo[i].opcode)
5581       return OnigOpInfo[i].arg_type;
5582   }
5583   return ARG_SPECIAL;
5584 }
5585 
5586 static void
Indent(FILE * f,int indent)5587 Indent(FILE* f, int indent)
5588 {
5589   int i;
5590   for (i = 0; i < indent; i++) putc(' ', f);
5591 }
5592 
5593 static void
p_string(FILE * f,int len,UChar * s)5594 p_string(FILE* f, int len, UChar* s)
5595 {
5596   fputs(":", f);
5597   while (len-- > 0) { fputc(*s++, f); }
5598 }
5599 
5600 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)5601 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5602 {
5603   int x = len * mb_len;
5604 
5605   fprintf(f, ":%d:", len);
5606   while (x-- > 0) { fputc(*s++, f); }
5607 }
5608 
5609 extern void
onig_print_compiled_byte_code(FILE * f,UChar * bp,UChar ** nextp,OnigEncoding enc)5610 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
5611                               OnigEncoding enc)
5612 {
5613   int i, n, arg_type;
5614   RelAddrType addr;
5615   LengthType len;
5616   MemNumType mem;
5617   StateCheckNumType scn;
5618   OnigCodePoint code;
5619   UChar *q;
5620 
5621   fprintf(f, "[%s", op2name(*bp));
5622   arg_type = op2arg_type(*bp);
5623   if (arg_type != ARG_SPECIAL) {
5624     bp++;
5625     switch (arg_type) {
5626     case ARG_NON:
5627       break;
5628     case ARG_RELADDR:
5629       GET_RELADDR_INC(addr, bp);
5630       fprintf(f, ":(%d)", addr);
5631       break;
5632     case ARG_ABSADDR:
5633       GET_ABSADDR_INC(addr, bp);
5634       fprintf(f, ":(%d)", addr);
5635       break;
5636     case ARG_LENGTH:
5637       GET_LENGTH_INC(len, bp);
5638       fprintf(f, ":%d", len);
5639       break;
5640     case ARG_MEMNUM:
5641       mem = *((MemNumType* )bp);
5642       bp += SIZE_MEMNUM;
5643       fprintf(f, ":%d", mem);
5644       break;
5645     case ARG_OPTION:
5646       {
5647 	OnigOptionType option = *((OnigOptionType* )bp);
5648 	bp += SIZE_OPTION;
5649 	fprintf(f, ":%d", option);
5650       }
5651       break;
5652 
5653     case ARG_STATE_CHECK:
5654       scn = *((StateCheckNumType* )bp);
5655       bp += SIZE_STATE_CHECK_NUM;
5656       fprintf(f, ":%d", scn);
5657       break;
5658     }
5659   }
5660   else {
5661     switch (*bp++) {
5662     case OP_EXACT1:
5663     case OP_ANYCHAR_STAR_PEEK_NEXT:
5664     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
5665       p_string(f, 1, bp++); break;
5666     case OP_EXACT2:
5667       p_string(f, 2, bp); bp += 2; break;
5668     case OP_EXACT3:
5669       p_string(f, 3, bp); bp += 3; break;
5670     case OP_EXACT4:
5671       p_string(f, 4, bp); bp += 4; break;
5672     case OP_EXACT5:
5673       p_string(f, 5, bp); bp += 5; break;
5674     case OP_EXACTN:
5675       GET_LENGTH_INC(len, bp);
5676       p_len_string(f, len, 1, bp);
5677       bp += len;
5678       break;
5679 
5680     case OP_EXACTMB2N1:
5681       p_string(f, 2, bp); bp += 2; break;
5682     case OP_EXACTMB2N2:
5683       p_string(f, 4, bp); bp += 4; break;
5684     case OP_EXACTMB2N3:
5685       p_string(f, 6, bp); bp += 6; break;
5686     case OP_EXACTMB2N:
5687       GET_LENGTH_INC(len, bp);
5688       p_len_string(f, len, 2, bp);
5689       bp += len * 2;
5690       break;
5691     case OP_EXACTMB3N:
5692       GET_LENGTH_INC(len, bp);
5693       p_len_string(f, len, 3, bp);
5694       bp += len * 3;
5695       break;
5696     case OP_EXACTMBN:
5697       {
5698 	int mb_len;
5699 
5700 	GET_LENGTH_INC(mb_len, bp);
5701 	GET_LENGTH_INC(len, bp);
5702 	fprintf(f, ":%d:%d:", mb_len, len);
5703 	n = len * mb_len;
5704 	while (n-- > 0) { fputc(*bp++, f); }
5705       }
5706       break;
5707 
5708     case OP_EXACT1_IC:
5709       len = enc_len(enc, bp);
5710       p_string(f, len, bp);
5711       bp += len;
5712       break;
5713     case OP_EXACTN_IC:
5714       GET_LENGTH_INC(len, bp);
5715       p_len_string(f, len, 1, bp);
5716       bp += len;
5717       break;
5718 
5719     case OP_CCLASS:
5720       n = bitset_on_num((BitSetRef )bp);
5721       bp += SIZE_BITSET;
5722       fprintf(f, ":%d", n);
5723       break;
5724 
5725     case OP_CCLASS_NOT:
5726       n = bitset_on_num((BitSetRef )bp);
5727       bp += SIZE_BITSET;
5728       fprintf(f, ":%d", n);
5729       break;
5730 
5731     case OP_CCLASS_MB:
5732     case OP_CCLASS_MB_NOT:
5733       GET_LENGTH_INC(len, bp);
5734       q = bp;
5735 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5736       ALIGNMENT_RIGHT(q);
5737 #endif
5738       GET_CODE_POINT(code, q);
5739       bp += len;
5740       fprintf(f, ":%d:%d", (int )code, len);
5741       break;
5742 
5743     case OP_CCLASS_MIX:
5744     case OP_CCLASS_MIX_NOT:
5745       n = bitset_on_num((BitSetRef )bp);
5746       bp += SIZE_BITSET;
5747       GET_LENGTH_INC(len, bp);
5748       q = bp;
5749 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5750       ALIGNMENT_RIGHT(q);
5751 #endif
5752       GET_CODE_POINT(code, q);
5753       bp += len;
5754       fprintf(f, ":%d:%d:%d", n, (int )code, len);
5755       break;
5756 
5757     case OP_CCLASS_NODE:
5758       {
5759         CClassNode *cc;
5760 
5761         GET_POINTER_INC(cc, bp);
5762         n = bitset_on_num(cc->bs);
5763         fprintf(f, ":%u:%d", (unsigned int )cc, n);
5764       }
5765       break;
5766 
5767     case OP_BACKREFN_IC:
5768       mem = *((MemNumType* )bp);
5769       bp += SIZE_MEMNUM;
5770       fprintf(f, ":%d", mem);
5771       break;
5772 
5773     case OP_BACKREF_MULTI_IC:
5774     case OP_BACKREF_MULTI:
5775       fputs(" ", f);
5776       GET_LENGTH_INC(len, bp);
5777       for (i = 0; i < len; i++) {
5778 	GET_MEMNUM_INC(mem, bp);
5779 	if (i > 0) fputs(", ", f);
5780 	fprintf(f, "%d", mem);
5781       }
5782       break;
5783 
5784     case OP_BACKREF_AT_LEVEL:
5785       {
5786 	OnigOptionType option;
5787 	LengthType level;
5788 
5789 	GET_OPTION_INC(option, bp);
5790 	fprintf(f, ":%d", option);
5791 	GET_LENGTH_INC(level, bp);
5792 	fprintf(f, ":%d", level);
5793 
5794 	fputs(" ", f);
5795 	GET_LENGTH_INC(len, bp);
5796 	for (i = 0; i < len; i++) {
5797 	  GET_MEMNUM_INC(mem, bp);
5798 	  if (i > 0) fputs(", ", f);
5799 	  fprintf(f, "%d", mem);
5800 	}
5801       }
5802       break;
5803 
5804     case OP_REPEAT:
5805     case OP_REPEAT_NG:
5806       {
5807 	mem = *((MemNumType* )bp);
5808 	bp += SIZE_MEMNUM;
5809 	addr = *((RelAddrType* )bp);
5810 	bp += SIZE_RELADDR;
5811 	fprintf(f, ":%d:%d", mem, addr);
5812       }
5813       break;
5814 
5815     case OP_PUSH_OR_JUMP_EXACT1:
5816     case OP_PUSH_IF_PEEK_NEXT:
5817       addr = *((RelAddrType* )bp);
5818       bp += SIZE_RELADDR;
5819       fprintf(f, ":(%d)", addr);
5820       p_string(f, 1, bp);
5821       bp += 1;
5822       break;
5823 
5824     case OP_LOOK_BEHIND:
5825       GET_LENGTH_INC(len, bp);
5826       fprintf(f, ":%d", len);
5827       break;
5828 
5829     case OP_PUSH_LOOK_BEHIND_NOT:
5830       GET_RELADDR_INC(addr, bp);
5831       GET_LENGTH_INC(len, bp);
5832       fprintf(f, ":%d:(%d)", len, addr);
5833       break;
5834 
5835     case OP_STATE_CHECK_PUSH:
5836     case OP_STATE_CHECK_PUSH_OR_JUMP:
5837       scn = *((StateCheckNumType* )bp);
5838       bp += SIZE_STATE_CHECK_NUM;
5839       addr = *((RelAddrType* )bp);
5840       bp += SIZE_RELADDR;
5841       fprintf(f, ":%d:(%d)", scn, addr);
5842       break;
5843 
5844     default:
5845       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
5846 	      *--bp);
5847     }
5848   }
5849   fputs("]", f);
5850   if (nextp) *nextp = bp;
5851 }
5852 
5853 static void
print_compiled_byte_code_list(FILE * f,regex_t * reg)5854 print_compiled_byte_code_list(FILE* f, regex_t* reg)
5855 {
5856   int ncode;
5857   UChar* bp = reg->p;
5858   UChar* end = reg->p + reg->used;
5859 
5860   fprintf(f, "code length: %d\n", reg->used);
5861 
5862   ncode = 0;
5863   while (bp < end) {
5864     ncode++;
5865     if (bp > reg->p) {
5866       if (ncode % 5 == 0)
5867 	fprintf(f, "\n");
5868       else
5869 	fputs(" ", f);
5870     }
5871     onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
5872   }
5873 
5874   fprintf(f, "\n");
5875 }
5876 
5877 static void
print_indent_tree(FILE * f,Node * node,int indent)5878 print_indent_tree(FILE* f, Node* node, int indent)
5879 {
5880   int i, type;
5881   int add = 3;
5882   UChar* p;
5883 
5884   Indent(f, indent);
5885   if (IS_NULL(node)) {
5886     fprintf(f, "ERROR: null node!!!\n");
5887     exit (0);
5888   }
5889 
5890   type = NTYPE(node);
5891   switch (type) {
5892   case N_LIST:
5893   case N_ALT:
5894     if (NTYPE(node) == N_LIST)
5895       fprintf(f, "<list:%x>\n", (int )node);
5896     else
5897       fprintf(f, "<alt:%x>\n", (int )node);
5898 
5899     print_indent_tree(f, NCONS(node).left, indent + add);
5900     while (IS_NOT_NULL(node = NCONS(node).right)) {
5901       if (NTYPE(node) != type) {
5902 	fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
5903 	exit(0);
5904       }
5905       print_indent_tree(f, NCONS(node).left, indent + add);
5906     }
5907     break;
5908 
5909   case N_STRING:
5910     fprintf(f, "<string%s:%x>",
5911 	    (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
5912     for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
5913       if (*p >= 0x20 && *p < 0x7f)
5914 	fputc(*p, f);
5915       else {
5916 	fprintf(f, " 0x%02x", *p);
5917       }
5918     }
5919     break;
5920 
5921   case N_CCLASS:
5922     fprintf(f, "<cclass:%x>", (int )node);
5923     if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
5924     if (NCCLASS(node).mbuf) {
5925       BBuf* bbuf = NCCLASS(node).mbuf;
5926       for (i = 0; i < bbuf->used; i++) {
5927 	if (i > 0) fprintf(f, ",");
5928 	fprintf(f, "%0x", bbuf->p[i]);
5929       }
5930     }
5931     break;
5932 
5933   case N_CTYPE:
5934     fprintf(f, "<ctype:%x> ", (int )node);
5935     switch (NCTYPE(node).type) {
5936     case CTYPE_WORD:            fputs("word",           f); break;
5937     case CTYPE_NOT_WORD:        fputs("not word",       f); break;
5938     default:
5939       fprintf(f, "ERROR: undefined ctype.\n");
5940       exit(0);
5941     }
5942     break;
5943 
5944   case N_ANYCHAR:
5945     fprintf(f, "<anychar:%x>", (int )node);
5946     break;
5947 
5948   case N_ANCHOR:
5949     fprintf(f, "<anchor:%x> ", (int )node);
5950     switch (NANCHOR(node).type) {
5951     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
5952     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
5953     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
5954     case ANCHOR_END_LINE:       fputs("end line",       f); break;
5955     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
5956     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
5957 
5958     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
5959     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
5960 #ifdef USE_WORD_BEGIN_END
5961     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
5962     case ANCHOR_WORD_END:        fputs("word end", f);       break;
5963 #endif
5964     case ANCHOR_PREC_READ:       fputs("prec read",      f); break;
5965     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); break;
5966     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); break;
5967     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
5968 
5969     default:
5970       fprintf(f, "ERROR: undefined anchor type.\n");
5971       break;
5972     }
5973     break;
5974 
5975   case N_BACKREF:
5976     {
5977       int* p;
5978       BackrefNode* br = &(NBACKREF(node));
5979       p = BACKREFS_P(br);
5980       fprintf(f, "<backref:%x>", (int )node);
5981       for (i = 0; i < br->back_num; i++) {
5982 	if (i > 0) fputs(", ", f);
5983 	fprintf(f, "%d", p[i]);
5984       }
5985     }
5986     break;
5987 
5988 #ifdef USE_SUBEXP_CALL
5989   case N_CALL:
5990     {
5991       CallNode* cn = &(NCALL(node));
5992       fprintf(f, "<call:%x>", (int )node);
5993       p_string(f, cn->name_end - cn->name, cn->name);
5994     }
5995     break;
5996 #endif
5997 
5998   case N_QUANTIFIER:
5999     fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
6000 	    NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
6001 	    (NQUANTIFIER(node).greedy ? "" : "?"));
6002     print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
6003     break;
6004 
6005   case N_EFFECT:
6006     fprintf(f, "<effect:%x> ", (int )node);
6007     switch (NEFFECT(node).type) {
6008     case EFFECT_OPTION:
6009       fprintf(f, "option:%d\n", NEFFECT(node).option);
6010       print_indent_tree(f, NEFFECT(node).target, indent + add);
6011       break;
6012     case EFFECT_MEMORY:
6013       fprintf(f, "memory:%d", NEFFECT(node).regnum);
6014       break;
6015     case EFFECT_STOP_BACKTRACK:
6016       fprintf(f, "stop-bt");
6017       break;
6018 
6019     default:
6020       break;
6021     }
6022     fprintf(f, "\n");
6023     print_indent_tree(f, NEFFECT(node).target, indent + add);
6024     break;
6025 
6026   default:
6027     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6028     break;
6029   }
6030 
6031   if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
6032       type != N_EFFECT)
6033     fprintf(f, "\n");
6034   fflush(f);
6035 }
6036 #endif /* ONIG_DEBUG */
6037 
6038 #ifdef ONIG_DEBUG_PARSE_TREE
6039 static void
print_tree(FILE * f,Node * node)6040 print_tree(FILE* f, Node* node)
6041 {
6042   print_indent_tree(f, node, 0);
6043 }
6044 #endif
6045