xref: /PHP-5.5/ext/mbstring/oniguruma/regcomp.c (revision fe92d64a)
1 /**********************************************************************
2   regcomp.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regparse.h"
31 
32 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
33 
34 extern OnigCaseFoldType
onig_get_default_case_fold_flag(void)35 onig_get_default_case_fold_flag(void)
36 {
37   return OnigDefaultCaseFoldFlag;
38 }
39 
40 extern int
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)41 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
42 {
43   OnigDefaultCaseFoldFlag = case_fold_flag;
44   return 0;
45 }
46 
47 
48 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
49 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
50 #endif
51 
52 static UChar*
str_dup(UChar * s,UChar * end)53 str_dup(UChar* s, UChar* end)
54 {
55   int len = end - s;
56 
57   if (len > 0) {
58     UChar* r = (UChar* )xmalloc(len + 1);
59     CHECK_NULL_RETURN(r);
60     xmemcpy(r, s, len);
61     r[len] = (UChar )0;
62     return r;
63   }
64   else return NULL;
65 }
66 
67 static void
swap_node(Node * a,Node * b)68 swap_node(Node* a, Node* b)
69 {
70   Node c;
71   c = *a; *a = *b; *b = c;
72 
73   if (NTYPE(a) == NT_STR) {
74     StrNode* sn = NSTR(a);
75     if (sn->capa == 0) {
76       int len = sn->end - sn->s;
77       sn->s   = sn->buf;
78       sn->end = sn->s + len;
79     }
80   }
81 
82   if (NTYPE(b) == NT_STR) {
83     StrNode* sn = NSTR(b);
84     if (sn->capa == 0) {
85       int len = sn->end - sn->s;
86       sn->s   = sn->buf;
87       sn->end = sn->s + len;
88     }
89   }
90 }
91 
92 static OnigDistance
distance_add(OnigDistance d1,OnigDistance d2)93 distance_add(OnigDistance d1, OnigDistance d2)
94 {
95   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
96     return ONIG_INFINITE_DISTANCE;
97   else {
98     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
99     else return ONIG_INFINITE_DISTANCE;
100   }
101 }
102 
103 static OnigDistance
distance_multiply(OnigDistance d,int m)104 distance_multiply(OnigDistance d, int m)
105 {
106   if (m == 0) return 0;
107 
108   if (d < ONIG_INFINITE_DISTANCE / m)
109     return d * m;
110   else
111     return ONIG_INFINITE_DISTANCE;
112 }
113 
114 static int
bitset_is_empty(BitSetRef bs)115 bitset_is_empty(BitSetRef bs)
116 {
117   int i;
118   for (i = 0; i < (int )BITSET_SIZE; i++) {
119     if (bs[i] != 0) return 0;
120   }
121   return 1;
122 }
123 
124 #ifdef ONIG_DEBUG
125 static int
bitset_on_num(BitSetRef bs)126 bitset_on_num(BitSetRef bs)
127 {
128   int i, n;
129 
130   n = 0;
131   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
132     if (BITSET_AT(bs, i)) n++;
133   }
134   return n;
135 }
136 #endif
137 
138 extern int
onig_bbuf_init(BBuf * buf,int size)139 onig_bbuf_init(BBuf* buf, int size)
140 {
141   if (size <= 0) {
142     size   = 0;
143     buf->p = NULL;
144   }
145   else {
146     buf->p = (UChar* )xmalloc(size);
147     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
148   }
149 
150   buf->alloc = size;
151   buf->used  = 0;
152   return 0;
153 }
154 
155 
156 #ifdef USE_SUBEXP_CALL
157 
158 static int
unset_addr_list_init(UnsetAddrList * uslist,int size)159 unset_addr_list_init(UnsetAddrList* uslist, int size)
160 {
161   UnsetAddr* p;
162 
163   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
164   CHECK_NULL_RETURN_MEMERR(p);
165   uslist->num   = 0;
166   uslist->alloc = size;
167   uslist->us    = p;
168   return 0;
169 }
170 
171 static void
unset_addr_list_end(UnsetAddrList * uslist)172 unset_addr_list_end(UnsetAddrList* uslist)
173 {
174   if (IS_NOT_NULL(uslist->us))
175     xfree(uslist->us);
176 }
177 
178 static int
unset_addr_list_add(UnsetAddrList * uslist,int offset,struct _Node * node)179 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
180 {
181   UnsetAddr* p;
182   int size;
183 
184   if (uslist->num >= uslist->alloc) {
185     size = uslist->alloc * 2;
186     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
187     CHECK_NULL_RETURN_MEMERR(p);
188     uslist->alloc = size;
189     uslist->us    = p;
190   }
191 
192   uslist->us[uslist->num].offset = offset;
193   uslist->us[uslist->num].target = node;
194   uslist->num++;
195   return 0;
196 }
197 #endif /* USE_SUBEXP_CALL */
198 
199 
200 static int
add_opcode(regex_t * reg,int opcode)201 add_opcode(regex_t* reg, int opcode)
202 {
203   BBUF_ADD1(reg, opcode);
204   return 0;
205 }
206 
207 #ifdef USE_COMBINATION_EXPLOSION_CHECK
208 static int
add_state_check_num(regex_t * reg,int num)209 add_state_check_num(regex_t* reg, int num)
210 {
211   StateCheckNumType n = (StateCheckNumType )num;
212 
213   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
214   return 0;
215 }
216 #endif
217 
218 static int
add_rel_addr(regex_t * reg,int addr)219 add_rel_addr(regex_t* reg, int addr)
220 {
221   RelAddrType ra = (RelAddrType )addr;
222 
223   BBUF_ADD(reg, &ra, SIZE_RELADDR);
224   return 0;
225 }
226 
227 static int
add_abs_addr(regex_t * reg,int addr)228 add_abs_addr(regex_t* reg, int addr)
229 {
230   AbsAddrType ra = (AbsAddrType )addr;
231 
232   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
233   return 0;
234 }
235 
236 static int
add_length(regex_t * reg,int len)237 add_length(regex_t* reg, int len)
238 {
239   LengthType l = (LengthType )len;
240 
241   BBUF_ADD(reg, &l, SIZE_LENGTH);
242   return 0;
243 }
244 
245 static int
add_mem_num(regex_t * reg,int num)246 add_mem_num(regex_t* reg, int num)
247 {
248   MemNumType n = (MemNumType )num;
249 
250   BBUF_ADD(reg, &n, SIZE_MEMNUM);
251   return 0;
252 }
253 
254 static int
add_pointer(regex_t * reg,void * addr)255 add_pointer(regex_t* reg, void* addr)
256 {
257   PointerType ptr = (PointerType )addr;
258 
259   BBUF_ADD(reg, &ptr, SIZE_POINTER);
260   return 0;
261 }
262 
263 static int
add_option(regex_t * reg,OnigOptionType option)264 add_option(regex_t* reg, OnigOptionType option)
265 {
266   BBUF_ADD(reg, &option, SIZE_OPTION);
267   return 0;
268 }
269 
270 static int
add_opcode_rel_addr(regex_t * reg,int opcode,int addr)271 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
272 {
273   int r;
274 
275   r = add_opcode(reg, opcode);
276   if (r) return r;
277   r = add_rel_addr(reg, addr);
278   return r;
279 }
280 
281 static int
add_bytes(regex_t * reg,UChar * bytes,int len)282 add_bytes(regex_t* reg, UChar* bytes, int len)
283 {
284   BBUF_ADD(reg, bytes, len);
285   return 0;
286 }
287 
288 static int
add_bitset(regex_t * reg,BitSetRef bs)289 add_bitset(regex_t* reg, BitSetRef bs)
290 {
291   BBUF_ADD(reg, bs, SIZE_BITSET);
292   return 0;
293 }
294 
295 static int
add_opcode_option(regex_t * reg,int opcode,OnigOptionType option)296 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
297 {
298   int r;
299 
300   r = add_opcode(reg, opcode);
301   if (r) return r;
302   r = add_option(reg, option);
303   return r;
304 }
305 
306 static int compile_length_tree(Node* node, regex_t* reg);
307 static int compile_tree(Node* node, regex_t* reg);
308 
309 
310 #define IS_NEED_STR_LEN_OP_EXACT(op) \
311    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
312     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
313 
314 static int
select_str_opcode(int mb_len,int str_len,int ignore_case)315 select_str_opcode(int mb_len, int str_len, int ignore_case)
316 {
317   int op;
318 
319   if (ignore_case) {
320     switch (str_len) {
321     case 1:  op = OP_EXACT1_IC; break;
322     default: op = OP_EXACTN_IC; break;
323     }
324   }
325   else {
326     switch (mb_len) {
327     case 1:
328       switch (str_len) {
329       case 1:  op = OP_EXACT1; break;
330       case 2:  op = OP_EXACT2; break;
331       case 3:  op = OP_EXACT3; break;
332       case 4:  op = OP_EXACT4; break;
333       case 5:  op = OP_EXACT5; break;
334       default: op = OP_EXACTN; break;
335       }
336       break;
337 
338     case 2:
339       switch (str_len) {
340       case 1:  op = OP_EXACTMB2N1; break;
341       case 2:  op = OP_EXACTMB2N2; break;
342       case 3:  op = OP_EXACTMB2N3; break;
343       default: op = OP_EXACTMB2N;  break;
344       }
345       break;
346 
347     case 3:
348       op = OP_EXACTMB3N;
349       break;
350 
351     default:
352       op = OP_EXACTMBN;
353       break;
354     }
355   }
356   return op;
357 }
358 
359 static int
compile_tree_empty_check(Node * node,regex_t * reg,int empty_info)360 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
361 {
362   int r;
363   int saved_num_null_check = reg->num_null_check;
364 
365   if (empty_info != 0) {
366     r = add_opcode(reg, OP_NULL_CHECK_START);
367     if (r) return r;
368     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
369     if (r) return r;
370     reg->num_null_check++;
371   }
372 
373   r = compile_tree(node, reg);
374   if (r) return r;
375 
376   if (empty_info != 0) {
377     if (empty_info == NQ_TARGET_IS_EMPTY)
378       r = add_opcode(reg, OP_NULL_CHECK_END);
379     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
380       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
381     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
382       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
383 
384     if (r) return r;
385     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
386   }
387   return r;
388 }
389 
390 #ifdef USE_SUBEXP_CALL
391 static int
compile_call(CallNode * node,regex_t * reg)392 compile_call(CallNode* node, regex_t* reg)
393 {
394   int r;
395 
396   r = add_opcode(reg, OP_CALL);
397   if (r) return r;
398   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
399                           node->target);
400   if (r) return r;
401   r = add_abs_addr(reg, 0 /*dummy addr.*/);
402   return r;
403 }
404 #endif
405 
406 static int
compile_tree_n_times(Node * node,int n,regex_t * reg)407 compile_tree_n_times(Node* node, int n, regex_t* reg)
408 {
409   int i, r;
410 
411   for (i = 0; i < n; i++) {
412     r = compile_tree(node, reg);
413     if (r) return r;
414   }
415   return 0;
416 }
417 
418 static int
add_compile_string_length(UChar * s ARG_UNUSED,int mb_len,int str_len,regex_t * reg ARG_UNUSED,int ignore_case)419 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
420                           regex_t* reg ARG_UNUSED, int ignore_case)
421 {
422   int len;
423   int op = select_str_opcode(mb_len, str_len, ignore_case);
424 
425   len = SIZE_OPCODE;
426 
427   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
428   if (IS_NEED_STR_LEN_OP_EXACT(op))
429     len += SIZE_LENGTH;
430 
431   len += mb_len * str_len;
432   return len;
433 }
434 
435 static int
add_compile_string(UChar * s,int mb_len,int str_len,regex_t * reg,int ignore_case)436 add_compile_string(UChar* s, int mb_len, int str_len,
437                    regex_t* reg, int ignore_case)
438 {
439   int op = select_str_opcode(mb_len, str_len, ignore_case);
440   add_opcode(reg, op);
441 
442   if (op == OP_EXACTMBN)
443     add_length(reg, mb_len);
444 
445   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
446     if (op == OP_EXACTN_IC)
447       add_length(reg, mb_len * str_len);
448     else
449       add_length(reg, str_len);
450   }
451 
452   add_bytes(reg, s, mb_len * str_len);
453   return 0;
454 }
455 
456 
457 static int
compile_length_string_node(Node * node,regex_t * reg)458 compile_length_string_node(Node* node, regex_t* reg)
459 {
460   int rlen, r, len, prev_len, slen, ambig;
461   OnigEncoding enc = reg->enc;
462   UChar *p, *prev;
463   StrNode* sn;
464 
465   sn = NSTR(node);
466   if (sn->end <= sn->s)
467     return 0;
468 
469   ambig = NSTRING_IS_AMBIG(node);
470 
471   p = prev = sn->s;
472   prev_len = enclen(enc, p);
473   p += prev_len;
474   slen = 1;
475   rlen = 0;
476 
477   for (; p < sn->end; ) {
478     len = enclen(enc, p);
479     if (len == prev_len) {
480       slen++;
481     }
482     else {
483       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
484       rlen += r;
485       prev = p;
486       slen = 1;
487       prev_len = len;
488     }
489     p += len;
490   }
491   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
492   rlen += r;
493   return rlen;
494 }
495 
496 static int
compile_length_string_raw_node(StrNode * sn,regex_t * reg)497 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
498 {
499   if (sn->end <= sn->s)
500     return 0;
501 
502   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
503 }
504 
505 static int
compile_string_node(Node * node,regex_t * reg)506 compile_string_node(Node* node, regex_t* reg)
507 {
508   int r, len, prev_len, slen, ambig;
509   OnigEncoding enc = reg->enc;
510   UChar *p, *prev, *end;
511   StrNode* sn;
512 
513   sn = NSTR(node);
514   if (sn->end <= sn->s)
515     return 0;
516 
517   end = sn->end;
518   ambig = NSTRING_IS_AMBIG(node);
519 
520   p = prev = sn->s;
521   prev_len = enclen(enc, p);
522   p += prev_len;
523   slen = 1;
524 
525   for (; p < end; ) {
526     len = enclen(enc, p);
527     if (len == prev_len) {
528       slen++;
529     }
530     else {
531       r = add_compile_string(prev, prev_len, slen, reg, ambig);
532       if (r) return r;
533 
534       prev  = p;
535       slen  = 1;
536       prev_len = len;
537     }
538 
539     p += len;
540   }
541   return add_compile_string(prev, prev_len, slen, reg, ambig);
542 }
543 
544 static int
compile_string_raw_node(StrNode * sn,regex_t * reg)545 compile_string_raw_node(StrNode* sn, regex_t* reg)
546 {
547   if (sn->end <= sn->s)
548     return 0;
549 
550   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
551 }
552 
553 static int
add_multi_byte_cclass(BBuf * mbuf,regex_t * reg)554 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
555 {
556 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
557   add_length(reg, mbuf->used);
558   return add_bytes(reg, mbuf->p, mbuf->used);
559 #else
560   int r, pad_size;
561   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
562 
563   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
564   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
565   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
566 
567   r = add_bytes(reg, mbuf->p, mbuf->used);
568 
569   /* padding for return value from compile_length_cclass_node() to be fix. */
570   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
571   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
572   return r;
573 #endif
574 }
575 
576 static int
compile_length_cclass_node(CClassNode * cc,regex_t * reg)577 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
578 {
579   int len;
580 
581   if (IS_NCCLASS_SHARE(cc)) {
582     len = SIZE_OPCODE + SIZE_POINTER;
583     return len;
584   }
585 
586   if (IS_NULL(cc->mbuf)) {
587     len = SIZE_OPCODE + SIZE_BITSET;
588   }
589   else {
590     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
591       len = SIZE_OPCODE;
592     }
593     else {
594       len = SIZE_OPCODE + SIZE_BITSET;
595     }
596 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
597     len += SIZE_LENGTH + cc->mbuf->used;
598 #else
599     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
600 #endif
601   }
602 
603   return len;
604 }
605 
606 static int
compile_cclass_node(CClassNode * cc,regex_t * reg)607 compile_cclass_node(CClassNode* cc, regex_t* reg)
608 {
609   int r;
610 
611   if (IS_NCCLASS_SHARE(cc)) {
612     add_opcode(reg, OP_CCLASS_NODE);
613     r = add_pointer(reg, cc);
614     return r;
615   }
616 
617   if (IS_NULL(cc->mbuf)) {
618     if (IS_NCCLASS_NOT(cc))
619       add_opcode(reg, OP_CCLASS_NOT);
620     else
621       add_opcode(reg, OP_CCLASS);
622 
623     r = add_bitset(reg, cc->bs);
624   }
625   else {
626     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
627       if (IS_NCCLASS_NOT(cc))
628         add_opcode(reg, OP_CCLASS_MB_NOT);
629       else
630         add_opcode(reg, OP_CCLASS_MB);
631 
632       r = add_multi_byte_cclass(cc->mbuf, reg);
633     }
634     else {
635       if (IS_NCCLASS_NOT(cc))
636         add_opcode(reg, OP_CCLASS_MIX_NOT);
637       else
638         add_opcode(reg, OP_CCLASS_MIX);
639 
640       r = add_bitset(reg, cc->bs);
641       if (r) return r;
642       r = add_multi_byte_cclass(cc->mbuf, reg);
643     }
644   }
645 
646   return r;
647 }
648 
649 static int
entry_repeat_range(regex_t * reg,int id,int lower,int upper)650 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
651 {
652 #define REPEAT_RANGE_ALLOC  4
653 
654   OnigRepeatRange* p;
655 
656   if (reg->repeat_range_alloc == 0) {
657     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
658     CHECK_NULL_RETURN_MEMERR(p);
659     reg->repeat_range = p;
660     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
661   }
662   else if (reg->repeat_range_alloc <= id) {
663     int n;
664     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
665     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
666                                     sizeof(OnigRepeatRange) * n);
667     CHECK_NULL_RETURN_MEMERR(p);
668     reg->repeat_range = p;
669     reg->repeat_range_alloc = n;
670   }
671   else {
672     p = reg->repeat_range;
673   }
674 
675   p[id].lower = lower;
676   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
677   return 0;
678 }
679 
680 static int
compile_range_repeat_node(QtfrNode * qn,int target_len,int empty_info,regex_t * reg)681 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
682                           regex_t* reg)
683 {
684   int r;
685   int num_repeat = reg->num_repeat;
686 
687   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
688   if (r) return r;
689   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
690   reg->num_repeat++;
691   if (r) return r;
692   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
693   if (r) return r;
694 
695   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
696   if (r) return r;
697 
698   r = compile_tree_empty_check(qn->target, reg, empty_info);
699   if (r) return r;
700 
701   if (
702 #ifdef USE_SUBEXP_CALL
703       reg->num_call > 0 ||
704 #endif
705       IS_QUANTIFIER_IN_REPEAT(qn)) {
706     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
707   }
708   else {
709     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
710   }
711   if (r) return r;
712   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
713   return r;
714 }
715 
716 static int
is_anychar_star_quantifier(QtfrNode * qn)717 is_anychar_star_quantifier(QtfrNode* qn)
718 {
719   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
720       NTYPE(qn->target) == NT_CANY)
721     return 1;
722   else
723     return 0;
724 }
725 
726 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
727 #define CKN_ON   (ckn > 0)
728 
729 #ifdef USE_COMBINATION_EXPLOSION_CHECK
730 
731 static int
compile_length_quantifier_node(QtfrNode * qn,regex_t * reg)732 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
733 {
734   int len, mod_tlen, cklen;
735   int ckn;
736   int infinite = IS_REPEAT_INFINITE(qn->upper);
737   int empty_info = qn->target_empty_info;
738   int tlen = compile_length_tree(qn->target, reg);
739 
740   if (tlen < 0) return tlen;
741 
742   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
743 
744   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
745 
746   /* anychar repeat */
747   if (NTYPE(qn->target) == NT_CANY) {
748     if (qn->greedy && infinite) {
749       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
750         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
751       else
752         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
753     }
754   }
755 
756   if (empty_info != 0)
757     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
758   else
759     mod_tlen = tlen;
760 
761   if (infinite && qn->lower <= 1) {
762     if (qn->greedy) {
763       if (qn->lower == 1)
764 	len = SIZE_OP_JUMP;
765       else
766 	len = 0;
767 
768       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
769     }
770     else {
771       if (qn->lower == 0)
772 	len = SIZE_OP_JUMP;
773       else
774 	len = 0;
775 
776       len += mod_tlen + SIZE_OP_PUSH + cklen;
777     }
778   }
779   else if (qn->upper == 0) {
780     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
781       len = SIZE_OP_JUMP + tlen;
782     else
783       len = 0;
784   }
785   else if (qn->upper == 1 && qn->greedy) {
786     if (qn->lower == 0) {
787       if (CKN_ON) {
788 	len = SIZE_OP_STATE_CHECK_PUSH + tlen;
789       }
790       else {
791 	len = SIZE_OP_PUSH + tlen;
792       }
793     }
794     else {
795       len = tlen;
796     }
797   }
798   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
799     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
800   }
801   else {
802     len = SIZE_OP_REPEAT_INC
803         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
804     if (CKN_ON)
805       len += SIZE_OP_STATE_CHECK;
806   }
807 
808   return len;
809 }
810 
811 static int
compile_quantifier_node(QtfrNode * qn,regex_t * reg)812 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
813 {
814   int r, mod_tlen;
815   int ckn;
816   int infinite = IS_REPEAT_INFINITE(qn->upper);
817   int empty_info = qn->target_empty_info;
818   int tlen = compile_length_tree(qn->target, reg);
819 
820   if (tlen < 0) return tlen;
821 
822   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
823 
824   if (is_anychar_star_quantifier(qn)) {
825     r = compile_tree_n_times(qn->target, qn->lower, reg);
826     if (r) return r;
827     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
828       if (IS_MULTILINE(reg->options))
829 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
830       else
831 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
832       if (r) return r;
833       if (CKN_ON) {
834 	r = add_state_check_num(reg, ckn);
835 	if (r) return r;
836       }
837 
838       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
839     }
840     else {
841       if (IS_MULTILINE(reg->options)) {
842 	r = add_opcode(reg, (CKN_ON ?
843 			       OP_STATE_CHECK_ANYCHAR_ML_STAR
844 			     : OP_ANYCHAR_ML_STAR));
845       }
846       else {
847 	r = add_opcode(reg, (CKN_ON ?
848 			       OP_STATE_CHECK_ANYCHAR_STAR
849 			     : OP_ANYCHAR_STAR));
850       }
851       if (r) return r;
852       if (CKN_ON)
853 	r = add_state_check_num(reg, ckn);
854 
855       return r;
856     }
857   }
858 
859   if (empty_info != 0)
860     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
861   else
862     mod_tlen = tlen;
863 
864   if (infinite && qn->lower <= 1) {
865     if (qn->greedy) {
866       if (qn->lower == 1) {
867 	r = add_opcode_rel_addr(reg, OP_JUMP,
868 			(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
869 	if (r) return r;
870       }
871 
872       if (CKN_ON) {
873 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
874 	if (r) return r;
875 	r = add_state_check_num(reg, ckn);
876 	if (r) return r;
877 	r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
878       }
879       else {
880 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
881       }
882       if (r) return r;
883       r = compile_tree_empty_check(qn->target, reg, empty_info);
884       if (r) return r;
885       r = add_opcode_rel_addr(reg, OP_JUMP,
886 	      -(mod_tlen + (int )SIZE_OP_JUMP
887 		+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
888     }
889     else {
890       if (qn->lower == 0) {
891 	r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
892 	if (r) return r;
893       }
894       r = compile_tree_empty_check(qn->target, reg, empty_info);
895       if (r) return r;
896       if (CKN_ON) {
897 	r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
898 	if (r) return r;
899 	r = add_state_check_num(reg, ckn);
900 	if (r) return r;
901 	r = add_rel_addr(reg,
902 		 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
903       }
904       else
905 	r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
906     }
907   }
908   else if (qn->upper == 0) {
909     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
910       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
911       if (r) return r;
912       r = compile_tree(qn->target, reg);
913     }
914     else
915       r = 0;
916   }
917   else if (qn->upper == 1 && qn->greedy) {
918     if (qn->lower == 0) {
919       if (CKN_ON) {
920 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
921 	if (r) return r;
922 	r = add_state_check_num(reg, ckn);
923 	if (r) return r;
924 	r = add_rel_addr(reg, tlen);
925       }
926       else {
927 	r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
928       }
929       if (r) return r;
930     }
931 
932     r = compile_tree(qn->target, reg);
933   }
934   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
935     if (CKN_ON) {
936       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937       if (r) return r;
938       r = add_state_check_num(reg, ckn);
939       if (r) return r;
940       r = add_rel_addr(reg, SIZE_OP_JUMP);
941     }
942     else {
943       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
944     }
945 
946     if (r) return r;
947     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
948     if (r) return r;
949     r = compile_tree(qn->target, reg);
950   }
951   else {
952     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
953     if (CKN_ON) {
954       if (r) return r;
955       r = add_opcode(reg, OP_STATE_CHECK);
956       if (r) return r;
957       r = add_state_check_num(reg, ckn);
958     }
959   }
960   return r;
961 }
962 
963 #else /* USE_COMBINATION_EXPLOSION_CHECK */
964 
965 static int
compile_length_quantifier_node(QtfrNode * qn,regex_t * reg)966 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
967 {
968   int len, mod_tlen;
969   int infinite = IS_REPEAT_INFINITE(qn->upper);
970   int empty_info = qn->target_empty_info;
971   int tlen = compile_length_tree(qn->target, reg);
972 
973   if (tlen < 0) return tlen;
974 
975   /* anychar repeat */
976   if (NTYPE(qn->target) == NT_CANY) {
977     if (qn->greedy && infinite) {
978       if (IS_NOT_NULL(qn->next_head_exact))
979         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
980       else
981         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
982     }
983   }
984 
985   if (empty_info != 0)
986     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
987   else
988     mod_tlen = tlen;
989 
990   if (infinite &&
991       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
992     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
993       len = SIZE_OP_JUMP;
994     }
995     else {
996       len = tlen * qn->lower;
997     }
998 
999     if (qn->greedy) {
1000       if (IS_NOT_NULL(qn->head_exact))
1001 	len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1002       else if (IS_NOT_NULL(qn->next_head_exact))
1003 	len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1004       else
1005 	len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1006     }
1007     else
1008       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1009   }
1010   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1011     len = SIZE_OP_JUMP + tlen;
1012   }
1013   else if (!infinite && qn->greedy &&
1014            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1015                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1016     len = tlen * qn->lower;
1017     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1018   }
1019   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1020     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1021   }
1022   else {
1023     len = SIZE_OP_REPEAT_INC
1024         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1025   }
1026 
1027   return len;
1028 }
1029 
1030 static int
compile_quantifier_node(QtfrNode * qn,regex_t * reg)1031 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1032 {
1033   int i, r, mod_tlen;
1034   int infinite = IS_REPEAT_INFINITE(qn->upper);
1035   int empty_info = qn->target_empty_info;
1036   int tlen = compile_length_tree(qn->target, reg);
1037 
1038   if (tlen < 0) return tlen;
1039 
1040   if (is_anychar_star_quantifier(qn)) {
1041     r = compile_tree_n_times(qn->target, qn->lower, reg);
1042     if (r) return r;
1043     if (IS_NOT_NULL(qn->next_head_exact)) {
1044       if (IS_MULTILINE(reg->options))
1045 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1046       else
1047 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1048       if (r) return r;
1049       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1050     }
1051     else {
1052       if (IS_MULTILINE(reg->options))
1053 	return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1054       else
1055 	return add_opcode(reg, OP_ANYCHAR_STAR);
1056     }
1057   }
1058 
1059   if (empty_info != 0)
1060     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1061   else
1062     mod_tlen = tlen;
1063 
1064   if (infinite &&
1065       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1066     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1067       if (qn->greedy) {
1068 	if (IS_NOT_NULL(qn->head_exact))
1069 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1070 	else if (IS_NOT_NULL(qn->next_head_exact))
1071 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1072 	else
1073 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1074       }
1075       else {
1076 	r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1077       }
1078       if (r) return r;
1079     }
1080     else {
1081       r = compile_tree_n_times(qn->target, qn->lower, reg);
1082       if (r) return r;
1083     }
1084 
1085     if (qn->greedy) {
1086       if (IS_NOT_NULL(qn->head_exact)) {
1087 	r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1088 			     mod_tlen + SIZE_OP_JUMP);
1089 	if (r) return r;
1090 	add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1091 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1092 	if (r) return r;
1093 	r = add_opcode_rel_addr(reg, OP_JUMP,
1094 	-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1095       }
1096       else if (IS_NOT_NULL(qn->next_head_exact)) {
1097 	r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1098 				mod_tlen + SIZE_OP_JUMP);
1099 	if (r) return r;
1100 	add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1101 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1102 	if (r) return r;
1103 	r = add_opcode_rel_addr(reg, OP_JUMP,
1104           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1105       }
1106       else {
1107 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1108 	if (r) return r;
1109 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1110 	if (r) return r;
1111 	r = add_opcode_rel_addr(reg, OP_JUMP,
1112 		     -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1113       }
1114     }
1115     else {
1116       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1117       if (r) return r;
1118       r = compile_tree_empty_check(qn->target, reg, empty_info);
1119       if (r) return r;
1120       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1121     }
1122   }
1123   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1124     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1125     if (r) return r;
1126     r = compile_tree(qn->target, reg);
1127   }
1128   else if (!infinite && qn->greedy &&
1129            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1130                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1131     int n = qn->upper - qn->lower;
1132 
1133     r = compile_tree_n_times(qn->target, qn->lower, reg);
1134     if (r) return r;
1135 
1136     for (i = 0; i < n; i++) {
1137       r = add_opcode_rel_addr(reg, OP_PUSH,
1138 			   (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1139       if (r) return r;
1140       r = compile_tree(qn->target, reg);
1141       if (r) return r;
1142     }
1143   }
1144   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1145     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1146     if (r) return r;
1147     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1148     if (r) return r;
1149     r = compile_tree(qn->target, reg);
1150   }
1151   else {
1152     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1153   }
1154   return r;
1155 }
1156 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1157 
1158 static int
compile_length_option_node(EncloseNode * node,regex_t * reg)1159 compile_length_option_node(EncloseNode* node, regex_t* reg)
1160 {
1161   int tlen;
1162   OnigOptionType prev = reg->options;
1163 
1164   reg->options = node->option;
1165   tlen = compile_length_tree(node->target, reg);
1166   reg->options = prev;
1167 
1168   if (tlen < 0) return tlen;
1169 
1170   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1171     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1172            + tlen + SIZE_OP_SET_OPTION;
1173   }
1174   else
1175     return tlen;
1176 }
1177 
1178 static int
compile_option_node(EncloseNode * node,regex_t * reg)1179 compile_option_node(EncloseNode* node, regex_t* reg)
1180 {
1181   int r;
1182   OnigOptionType prev = reg->options;
1183 
1184   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1185     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1186     if (r) return r;
1187     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1188     if (r) return r;
1189     r = add_opcode(reg, OP_FAIL);
1190     if (r) return r;
1191   }
1192 
1193   reg->options = node->option;
1194   r = compile_tree(node->target, reg);
1195   reg->options = prev;
1196 
1197   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1198     if (r) return r;
1199     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1200   }
1201   return r;
1202 }
1203 
1204 static int
compile_length_enclose_node(EncloseNode * node,regex_t * reg)1205 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1206 {
1207   int len;
1208   int tlen;
1209 
1210   if (node->type == ENCLOSE_OPTION)
1211     return compile_length_option_node(node, reg);
1212 
1213   if (node->target) {
1214     tlen = compile_length_tree(node->target, reg);
1215     if (tlen < 0) return tlen;
1216   }
1217   else
1218     tlen = 0;
1219 
1220   switch (node->type) {
1221   case ENCLOSE_MEMORY:
1222 #ifdef USE_SUBEXP_CALL
1223     if (IS_ENCLOSE_CALLED(node)) {
1224       len = SIZE_OP_MEMORY_START_PUSH + tlen
1225 	  + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1226       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1227 	len += (IS_ENCLOSE_RECURSION(node)
1228 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1229       else
1230 	len += (IS_ENCLOSE_RECURSION(node)
1231 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1232     }
1233     else
1234 #endif
1235     {
1236       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1237 	len = SIZE_OP_MEMORY_START_PUSH;
1238       else
1239 	len = SIZE_OP_MEMORY_START;
1240 
1241       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1242 		     ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1243     }
1244     break;
1245 
1246   case ENCLOSE_STOP_BACKTRACK:
1247     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1248       QtfrNode* qn = NQTFR(node->target);
1249       tlen = compile_length_tree(qn->target, reg);
1250       if (tlen < 0) return tlen;
1251 
1252       len = tlen * qn->lower
1253 	  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1254     }
1255     else {
1256       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1257     }
1258     break;
1259 
1260   default:
1261     return ONIGERR_TYPE_BUG;
1262     break;
1263   }
1264 
1265   return len;
1266 }
1267 
1268 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1269 
1270 static int
compile_enclose_node(EncloseNode * node,regex_t * reg)1271 compile_enclose_node(EncloseNode* node, regex_t* reg)
1272 {
1273   int r, len;
1274 
1275   if (node->type == ENCLOSE_OPTION)
1276     return compile_option_node(node, reg);
1277 
1278   switch (node->type) {
1279   case ENCLOSE_MEMORY:
1280 #ifdef USE_SUBEXP_CALL
1281     if (IS_ENCLOSE_CALLED(node)) {
1282       r = add_opcode(reg, OP_CALL);
1283       if (r) return r;
1284       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1285       node->state |= NST_ADDR_FIXED;
1286       r = add_abs_addr(reg, (int )node->call_addr);
1287       if (r) return r;
1288       len = compile_length_tree(node->target, reg);
1289       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1290       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1291 	len += (IS_ENCLOSE_RECURSION(node)
1292 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1293       else
1294 	len += (IS_ENCLOSE_RECURSION(node)
1295 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1296 
1297       r = add_opcode_rel_addr(reg, OP_JUMP, len);
1298       if (r) return r;
1299     }
1300 #endif
1301     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1302       r = add_opcode(reg, OP_MEMORY_START_PUSH);
1303     else
1304       r = add_opcode(reg, OP_MEMORY_START);
1305     if (r) return r;
1306     r = add_mem_num(reg, node->regnum);
1307     if (r) return r;
1308     r = compile_tree(node->target, reg);
1309     if (r) return r;
1310 #ifdef USE_SUBEXP_CALL
1311     if (IS_ENCLOSE_CALLED(node)) {
1312       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1313 	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1314 			     ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1315       else
1316 	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1317 			     ? OP_MEMORY_END_REC : OP_MEMORY_END));
1318 
1319       if (r) return r;
1320       r = add_mem_num(reg, node->regnum);
1321       if (r) return r;
1322       r = add_opcode(reg, OP_RETURN);
1323     }
1324     else
1325 #endif
1326     {
1327       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1328 	r = add_opcode(reg, OP_MEMORY_END_PUSH);
1329       else
1330 	r = add_opcode(reg, OP_MEMORY_END);
1331       if (r) return r;
1332       r = add_mem_num(reg, node->regnum);
1333     }
1334     break;
1335 
1336   case ENCLOSE_STOP_BACKTRACK:
1337     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1338       QtfrNode* qn = NQTFR(node->target);
1339       r = compile_tree_n_times(qn->target, qn->lower, reg);
1340       if (r) return r;
1341 
1342       len = compile_length_tree(qn->target, reg);
1343       if (len < 0) return len;
1344 
1345       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1346       if (r) return r;
1347       r = compile_tree(qn->target, reg);
1348       if (r) return r;
1349       r = add_opcode(reg, OP_POP);
1350       if (r) return r;
1351       r = add_opcode_rel_addr(reg, OP_JUMP,
1352 	 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1353     }
1354     else {
1355       r = add_opcode(reg, OP_PUSH_STOP_BT);
1356       if (r) return r;
1357       r = compile_tree(node->target, reg);
1358       if (r) return r;
1359       r = add_opcode(reg, OP_POP_STOP_BT);
1360     }
1361     break;
1362 
1363   default:
1364     return ONIGERR_TYPE_BUG;
1365     break;
1366   }
1367 
1368   return r;
1369 }
1370 
1371 static int
compile_length_anchor_node(AnchorNode * node,regex_t * reg)1372 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1373 {
1374   int len;
1375   int tlen = 0;
1376 
1377   if (node->target) {
1378     tlen = compile_length_tree(node->target, reg);
1379     if (tlen < 0) return tlen;
1380   }
1381 
1382   switch (node->type) {
1383   case ANCHOR_PREC_READ:
1384     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1385     break;
1386   case ANCHOR_PREC_READ_NOT:
1387     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1388     break;
1389   case ANCHOR_LOOK_BEHIND:
1390     len = SIZE_OP_LOOK_BEHIND + tlen;
1391     break;
1392   case ANCHOR_LOOK_BEHIND_NOT:
1393     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1394     break;
1395 
1396   default:
1397     len = SIZE_OPCODE;
1398     break;
1399   }
1400 
1401   return len;
1402 }
1403 
1404 static int
compile_anchor_node(AnchorNode * node,regex_t * reg)1405 compile_anchor_node(AnchorNode* node, regex_t* reg)
1406 {
1407   int r, len;
1408 
1409   switch (node->type) {
1410   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1411   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1412   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1413   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1414   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1415   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1416 
1417   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
1418   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1419 #ifdef USE_WORD_BEGIN_END
1420   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
1421   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
1422 #endif
1423 
1424   case ANCHOR_PREC_READ:
1425     r = add_opcode(reg, OP_PUSH_POS);
1426     if (r) return r;
1427     r = compile_tree(node->target, reg);
1428     if (r) return r;
1429     r = add_opcode(reg, OP_POP_POS);
1430     break;
1431 
1432   case ANCHOR_PREC_READ_NOT:
1433     len = compile_length_tree(node->target, reg);
1434     if (len < 0) return len;
1435     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1436     if (r) return r;
1437     r = compile_tree(node->target, reg);
1438     if (r) return r;
1439     r = add_opcode(reg, OP_FAIL_POS);
1440     break;
1441 
1442   case ANCHOR_LOOK_BEHIND:
1443     {
1444       int n;
1445       r = add_opcode(reg, OP_LOOK_BEHIND);
1446       if (r) return r;
1447       if (node->char_len < 0) {
1448 	r = get_char_length_tree(node->target, reg, &n);
1449 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1450       }
1451       else
1452 	n = node->char_len;
1453       r = add_length(reg, n);
1454       if (r) return r;
1455       r = compile_tree(node->target, reg);
1456     }
1457     break;
1458 
1459   case ANCHOR_LOOK_BEHIND_NOT:
1460     {
1461       int n;
1462       len = compile_length_tree(node->target, reg);
1463       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1464 			   len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1465       if (r) return r;
1466       if (node->char_len < 0) {
1467 	r = get_char_length_tree(node->target, reg, &n);
1468 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1469       }
1470       else
1471 	n = node->char_len;
1472       r = add_length(reg, n);
1473       if (r) return r;
1474       r = compile_tree(node->target, reg);
1475       if (r) return r;
1476       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1477     }
1478     break;
1479 
1480   default:
1481     return ONIGERR_TYPE_BUG;
1482     break;
1483   }
1484 
1485   return r;
1486 }
1487 
1488 static int
compile_length_tree(Node * node,regex_t * reg)1489 compile_length_tree(Node* node, regex_t* reg)
1490 {
1491   int len, type, r;
1492 
1493   type = NTYPE(node);
1494   switch (type) {
1495   case NT_LIST:
1496     len = 0;
1497     do {
1498       r = compile_length_tree(NCAR(node), reg);
1499       if (r < 0) return r;
1500       len += r;
1501     } while (IS_NOT_NULL(node = NCDR(node)));
1502     r = len;
1503     break;
1504 
1505   case NT_ALT:
1506     {
1507       int n;
1508 
1509       n = r = 0;
1510       do {
1511 	r += compile_length_tree(NCAR(node), reg);
1512 	n++;
1513       } while (IS_NOT_NULL(node = NCDR(node)));
1514       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1515     }
1516     break;
1517 
1518   case NT_STR:
1519     if (NSTRING_IS_RAW(node))
1520       r = compile_length_string_raw_node(NSTR(node), reg);
1521     else
1522       r = compile_length_string_node(node, reg);
1523     break;
1524 
1525   case NT_CCLASS:
1526     r = compile_length_cclass_node(NCCLASS(node), reg);
1527     break;
1528 
1529   case NT_CTYPE:
1530   case NT_CANY:
1531     r = SIZE_OPCODE;
1532     break;
1533 
1534   case NT_BREF:
1535     {
1536       BRefNode* br = NBREF(node);
1537 
1538 #ifdef USE_BACKREF_WITH_LEVEL
1539       if (IS_BACKREF_NEST_LEVEL(br)) {
1540         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1541             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1542       }
1543       else
1544 #endif
1545       if (br->back_num == 1) {
1546 	r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1547 	     ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1548       }
1549       else {
1550 	r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1551       }
1552     }
1553     break;
1554 
1555 #ifdef USE_SUBEXP_CALL
1556   case NT_CALL:
1557     r = SIZE_OP_CALL;
1558     break;
1559 #endif
1560 
1561   case NT_QTFR:
1562     r = compile_length_quantifier_node(NQTFR(node), reg);
1563     break;
1564 
1565   case NT_ENCLOSE:
1566     r = compile_length_enclose_node(NENCLOSE(node), reg);
1567     break;
1568 
1569   case NT_ANCHOR:
1570     r = compile_length_anchor_node(NANCHOR(node), reg);
1571     break;
1572 
1573   default:
1574     return ONIGERR_TYPE_BUG;
1575     break;
1576   }
1577 
1578   return r;
1579 }
1580 
1581 static int
compile_tree(Node * node,regex_t * reg)1582 compile_tree(Node* node, regex_t* reg)
1583 {
1584   int n, type, len, pos, r = 0;
1585 
1586   type = NTYPE(node);
1587   switch (type) {
1588   case NT_LIST:
1589     do {
1590       r = compile_tree(NCAR(node), reg);
1591     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1592     break;
1593 
1594   case NT_ALT:
1595     {
1596       Node* x = node;
1597       len = 0;
1598       do {
1599 	len += compile_length_tree(NCAR(x), reg);
1600 	if (NCDR(x) != NULL) {
1601 	  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1602 	}
1603       } while (IS_NOT_NULL(x = NCDR(x)));
1604       pos = reg->used + len;  /* goal position */
1605 
1606       do {
1607 	len = compile_length_tree(NCAR(node), reg);
1608 	if (IS_NOT_NULL(NCDR(node))) {
1609 	  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1610 	  if (r) break;
1611 	}
1612 	r = compile_tree(NCAR(node), reg);
1613 	if (r) break;
1614 	if (IS_NOT_NULL(NCDR(node))) {
1615 	  len = pos - (reg->used + SIZE_OP_JUMP);
1616 	  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1617 	  if (r) break;
1618 	}
1619       } while (IS_NOT_NULL(node = NCDR(node)));
1620     }
1621     break;
1622 
1623   case NT_STR:
1624     if (NSTRING_IS_RAW(node))
1625       r = compile_string_raw_node(NSTR(node), reg);
1626     else
1627       r = compile_string_node(node, reg);
1628     break;
1629 
1630   case NT_CCLASS:
1631     r = compile_cclass_node(NCCLASS(node), reg);
1632     break;
1633 
1634   case NT_CTYPE:
1635     {
1636       int op;
1637 
1638       switch (NCTYPE(node)->ctype) {
1639       case ONIGENC_CTYPE_WORD:
1640 	if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1641 	else                         op = OP_WORD;
1642 	break;
1643       default:
1644 	return ONIGERR_TYPE_BUG;
1645 	break;
1646       }
1647       r = add_opcode(reg, op);
1648     }
1649     break;
1650 
1651   case NT_CANY:
1652     if (IS_MULTILINE(reg->options))
1653       r = add_opcode(reg, OP_ANYCHAR_ML);
1654     else
1655       r = add_opcode(reg, OP_ANYCHAR);
1656     break;
1657 
1658   case NT_BREF:
1659     {
1660       BRefNode* br = NBREF(node);
1661 
1662 #ifdef USE_BACKREF_WITH_LEVEL
1663       if (IS_BACKREF_NEST_LEVEL(br)) {
1664 	r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1665 	if (r) return r;
1666 	r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1667 	if (r) return r;
1668 	r = add_length(reg, br->nest_level);
1669 	if (r) return r;
1670 
1671 	goto add_bacref_mems;
1672       }
1673       else
1674 #endif
1675       if (br->back_num == 1) {
1676 	n = br->back_static[0];
1677 	if (IS_IGNORECASE(reg->options)) {
1678 	  r = add_opcode(reg, OP_BACKREFN_IC);
1679 	  if (r) return r;
1680 	  r = add_mem_num(reg, n);
1681 	}
1682 	else {
1683 	  switch (n) {
1684 	  case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1685 	  case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1686 	  default:
1687 	    r = add_opcode(reg, OP_BACKREFN);
1688 	    if (r) return r;
1689 	    r = add_mem_num(reg, n);
1690 	    break;
1691 	  }
1692 	}
1693       }
1694       else {
1695 	int i;
1696 	int* p;
1697 
1698         if (IS_IGNORECASE(reg->options)) {
1699           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1700         }
1701         else {
1702           r = add_opcode(reg, OP_BACKREF_MULTI);
1703         }
1704 	if (r) return r;
1705 
1706 #ifdef USE_BACKREF_WITH_LEVEL
1707       add_bacref_mems:
1708 #endif
1709 	r = add_length(reg, br->back_num);
1710 	if (r) return r;
1711 	p = BACKREFS_P(br);
1712 	for (i = br->back_num - 1; i >= 0; i--) {
1713 	  r = add_mem_num(reg, p[i]);
1714 	  if (r) return r;
1715 	}
1716       }
1717     }
1718     break;
1719 
1720 #ifdef USE_SUBEXP_CALL
1721   case NT_CALL:
1722     r = compile_call(NCALL(node), reg);
1723     break;
1724 #endif
1725 
1726   case NT_QTFR:
1727     r = compile_quantifier_node(NQTFR(node), reg);
1728     break;
1729 
1730   case NT_ENCLOSE:
1731     r = compile_enclose_node(NENCLOSE(node), reg);
1732     break;
1733 
1734   case NT_ANCHOR:
1735     r = compile_anchor_node(NANCHOR(node), reg);
1736     break;
1737 
1738   default:
1739 #ifdef ONIG_DEBUG
1740     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1741 #endif
1742     break;
1743   }
1744 
1745   return r;
1746 }
1747 
1748 #ifdef USE_NAMED_GROUP
1749 
1750 static int
noname_disable_map(Node ** plink,GroupNumRemap * map,int * counter)1751 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1752 {
1753   int r = 0;
1754   Node* node = *plink;
1755 
1756   switch (NTYPE(node)) {
1757   case NT_LIST:
1758   case NT_ALT:
1759     do {
1760       r = noname_disable_map(&(NCAR(node)), map, counter);
1761     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1762     break;
1763 
1764   case NT_QTFR:
1765     {
1766       Node** ptarget = &(NQTFR(node)->target);
1767       Node*  old = *ptarget;
1768       r = noname_disable_map(ptarget, map, counter);
1769       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1770 	onig_reduce_nested_quantifier(node, *ptarget);
1771       }
1772     }
1773     break;
1774 
1775   case NT_ENCLOSE:
1776     {
1777       EncloseNode* en = NENCLOSE(node);
1778       if (en->type == ENCLOSE_MEMORY) {
1779 	if (IS_ENCLOSE_NAMED_GROUP(en)) {
1780 	  (*counter)++;
1781 	  map[en->regnum].new_val = *counter;
1782 	  en->regnum = *counter;
1783 	  r = noname_disable_map(&(en->target), map, counter);
1784 	}
1785 	else {
1786 	  *plink = en->target;
1787 	  en->target = NULL_NODE;
1788 	  onig_node_free(node);
1789 	  r = noname_disable_map(plink, map, counter);
1790 	}
1791       }
1792       else
1793 	r = noname_disable_map(&(en->target), map, counter);
1794     }
1795     break;
1796 
1797   default:
1798     break;
1799   }
1800 
1801   return r;
1802 }
1803 
1804 static int
renumber_node_backref(Node * node,GroupNumRemap * map)1805 renumber_node_backref(Node* node, GroupNumRemap* map)
1806 {
1807   int i, pos, n, old_num;
1808   int *backs;
1809   BRefNode* bn = NBREF(node);
1810 
1811   if (! IS_BACKREF_NAME_REF(bn))
1812     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1813 
1814   old_num = bn->back_num;
1815   if (IS_NULL(bn->back_dynamic))
1816     backs = bn->back_static;
1817   else
1818     backs = bn->back_dynamic;
1819 
1820   for (i = 0, pos = 0; i < old_num; i++) {
1821     n = map[backs[i]].new_val;
1822     if (n > 0) {
1823       backs[pos] = n;
1824       pos++;
1825     }
1826   }
1827 
1828   bn->back_num = pos;
1829   return 0;
1830 }
1831 
1832 static int
renumber_by_map(Node * node,GroupNumRemap * map)1833 renumber_by_map(Node* node, GroupNumRemap* map)
1834 {
1835   int r = 0;
1836 
1837   switch (NTYPE(node)) {
1838   case NT_LIST:
1839   case NT_ALT:
1840     do {
1841       r = renumber_by_map(NCAR(node), map);
1842     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1843     break;
1844   case NT_QTFR:
1845     r = renumber_by_map(NQTFR(node)->target, map);
1846     break;
1847   case NT_ENCLOSE:
1848     r = renumber_by_map(NENCLOSE(node)->target, map);
1849     break;
1850 
1851   case NT_BREF:
1852     r = renumber_node_backref(node, map);
1853     break;
1854 
1855   default:
1856     break;
1857   }
1858 
1859   return r;
1860 }
1861 
1862 static int
numbered_ref_check(Node * node)1863 numbered_ref_check(Node* node)
1864 {
1865   int r = 0;
1866 
1867   switch (NTYPE(node)) {
1868   case NT_LIST:
1869   case NT_ALT:
1870     do {
1871       r = numbered_ref_check(NCAR(node));
1872     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1873     break;
1874   case NT_QTFR:
1875     r = numbered_ref_check(NQTFR(node)->target);
1876     break;
1877   case NT_ENCLOSE:
1878     r = numbered_ref_check(NENCLOSE(node)->target);
1879     break;
1880 
1881   case NT_BREF:
1882     if (! IS_BACKREF_NAME_REF(NBREF(node)))
1883       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1884     break;
1885 
1886   default:
1887     break;
1888   }
1889 
1890   return r;
1891 }
1892 
1893 static int
disable_noname_group_capture(Node ** root,regex_t * reg,ScanEnv * env)1894 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1895 {
1896   int r, i, pos, counter;
1897   BitStatusType loc;
1898   GroupNumRemap* map;
1899 
1900   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
1901   CHECK_NULL_RETURN_MEMERR(map);
1902   for (i = 1; i <= env->num_mem; i++) {
1903     map[i].new_val = 0;
1904   }
1905   counter = 0;
1906   r = noname_disable_map(root, map, &counter);
1907   if (r != 0) return r;
1908 
1909   r = renumber_by_map(*root, map);
1910   if (r != 0) return r;
1911 
1912   for (i = 1, pos = 1; i <= env->num_mem; i++) {
1913     if (map[i].new_val > 0) {
1914       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1915       pos++;
1916     }
1917   }
1918 
1919   loc = env->capture_history;
1920   BIT_STATUS_CLEAR(env->capture_history);
1921   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1922     if (BIT_STATUS_AT(loc, i)) {
1923       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
1924     }
1925   }
1926 
1927   env->num_mem = env->num_named;
1928   reg->num_mem = env->num_named;
1929 
1930   return onig_renumber_name_table(reg, map);
1931 }
1932 #endif /* USE_NAMED_GROUP */
1933 
1934 #ifdef USE_SUBEXP_CALL
1935 static int
unset_addr_list_fix(UnsetAddrList * uslist,regex_t * reg)1936 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1937 {
1938   int i, offset;
1939   EncloseNode* en;
1940   AbsAddrType addr;
1941 
1942   for (i = 0; i < uslist->num; i++) {
1943     en = NENCLOSE(uslist->us[i].target);
1944     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1945     addr = en->call_addr;
1946     offset = uslist->us[i].offset;
1947 
1948     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1949   }
1950   return 0;
1951 }
1952 #endif
1953 
1954 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1955 static int
quantifiers_memory_node_info(Node * node)1956 quantifiers_memory_node_info(Node* node)
1957 {
1958   int r = 0;
1959 
1960   switch (NTYPE(node)) {
1961   case NT_LIST:
1962   case NT_ALT:
1963     {
1964       int v;
1965       do {
1966 	v = quantifiers_memory_node_info(NCAR(node));
1967 	if (v > r) r = v;
1968       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
1969     }
1970     break;
1971 
1972 #ifdef USE_SUBEXP_CALL
1973   case NT_CALL:
1974     if (IS_CALL_RECURSION(NCALL(node))) {
1975       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
1976     }
1977     else
1978       r = quantifiers_memory_node_info(NCALL(node)->target);
1979     break;
1980 #endif
1981 
1982   case NT_QTFR:
1983     {
1984       QtfrNode* qn = NQTFR(node);
1985       if (qn->upper != 0) {
1986 	r = quantifiers_memory_node_info(qn->target);
1987       }
1988     }
1989     break;
1990 
1991   case NT_ENCLOSE:
1992     {
1993       EncloseNode* en = NENCLOSE(node);
1994       switch (en->type) {
1995       case ENCLOSE_MEMORY:
1996 	return NQ_TARGET_IS_EMPTY_MEM;
1997 	break;
1998 
1999       case ENCLOSE_OPTION:
2000       case ENCLOSE_STOP_BACKTRACK:
2001 	r = quantifiers_memory_node_info(en->target);
2002 	break;
2003       default:
2004 	break;
2005       }
2006     }
2007     break;
2008 
2009   case NT_BREF:
2010   case NT_STR:
2011   case NT_CTYPE:
2012   case NT_CCLASS:
2013   case NT_CANY:
2014   case NT_ANCHOR:
2015   default:
2016     break;
2017   }
2018 
2019   return r;
2020 }
2021 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2022 
2023 static int
get_min_match_length(Node * node,OnigDistance * min,ScanEnv * env)2024 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2025 {
2026   OnigDistance tmin;
2027   int r = 0;
2028 
2029   *min = 0;
2030   switch (NTYPE(node)) {
2031   case NT_BREF:
2032     {
2033       int i;
2034       int* backs;
2035       Node** nodes = SCANENV_MEM_NODES(env);
2036       BRefNode* br = NBREF(node);
2037       if (br->state & NST_RECURSION) break;
2038 
2039       backs = BACKREFS_P(br);
2040       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2041       r = get_min_match_length(nodes[backs[0]], min, env);
2042       if (r != 0) break;
2043       for (i = 1; i < br->back_num; i++) {
2044 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2045 	r = get_min_match_length(nodes[backs[i]], &tmin, env);
2046 	if (r != 0) break;
2047 	if (*min > tmin) *min = tmin;
2048       }
2049     }
2050     break;
2051 
2052 #ifdef USE_SUBEXP_CALL
2053   case NT_CALL:
2054     if (IS_CALL_RECURSION(NCALL(node))) {
2055       EncloseNode* en = NENCLOSE(NCALL(node)->target);
2056       if (IS_ENCLOSE_MIN_FIXED(en))
2057 	*min = en->min_len;
2058     }
2059     else
2060       r = get_min_match_length(NCALL(node)->target, min, env);
2061     break;
2062 #endif
2063 
2064   case NT_LIST:
2065     do {
2066       r = get_min_match_length(NCAR(node), &tmin, env);
2067       if (r == 0) *min += tmin;
2068     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2069     break;
2070 
2071   case NT_ALT:
2072     {
2073       Node *x, *y;
2074       y = node;
2075       do {
2076 	x = NCAR(y);
2077 	r = get_min_match_length(x, &tmin, env);
2078 	if (r != 0) break;
2079 	if (y == node) *min = tmin;
2080 	else if (*min > tmin) *min = tmin;
2081       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2082     }
2083     break;
2084 
2085   case NT_STR:
2086     {
2087       StrNode* sn = NSTR(node);
2088       *min = sn->end - sn->s;
2089     }
2090     break;
2091 
2092   case NT_CTYPE:
2093     *min = 1;
2094     break;
2095 
2096   case NT_CCLASS:
2097   case NT_CANY:
2098     *min = 1;
2099     break;
2100 
2101   case NT_QTFR:
2102     {
2103       QtfrNode* qn = NQTFR(node);
2104 
2105       if (qn->lower > 0) {
2106 	r = get_min_match_length(qn->target, min, env);
2107 	if (r == 0)
2108 	  *min = distance_multiply(*min, qn->lower);
2109       }
2110     }
2111     break;
2112 
2113   case NT_ENCLOSE:
2114     {
2115       EncloseNode* en = NENCLOSE(node);
2116       switch (en->type) {
2117       case ENCLOSE_MEMORY:
2118 #ifdef USE_SUBEXP_CALL
2119 	if (IS_ENCLOSE_MIN_FIXED(en))
2120 	  *min = en->min_len;
2121 	else {
2122 	  r = get_min_match_length(en->target, min, env);
2123 	  if (r == 0) {
2124 	    en->min_len = *min;
2125 	    SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2126 	  }
2127 	}
2128 	break;
2129 #endif
2130       case ENCLOSE_OPTION:
2131       case ENCLOSE_STOP_BACKTRACK:
2132 	r = get_min_match_length(en->target, min, env);
2133 	break;
2134       }
2135     }
2136     break;
2137 
2138   case NT_ANCHOR:
2139   default:
2140     break;
2141   }
2142 
2143   return r;
2144 }
2145 
2146 static int
get_max_match_length(Node * node,OnigDistance * max,ScanEnv * env)2147 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2148 {
2149   OnigDistance tmax;
2150   int r = 0;
2151 
2152   *max = 0;
2153   switch (NTYPE(node)) {
2154   case NT_LIST:
2155     do {
2156       r = get_max_match_length(NCAR(node), &tmax, env);
2157       if (r == 0)
2158 	*max = distance_add(*max, tmax);
2159     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2160     break;
2161 
2162   case NT_ALT:
2163     do {
2164       r = get_max_match_length(NCAR(node), &tmax, env);
2165       if (r == 0 && *max < tmax) *max = tmax;
2166     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2167     break;
2168 
2169   case NT_STR:
2170     {
2171       StrNode* sn = NSTR(node);
2172       *max = sn->end - sn->s;
2173     }
2174     break;
2175 
2176   case NT_CTYPE:
2177     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2178     break;
2179 
2180   case NT_CCLASS:
2181   case NT_CANY:
2182     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2183     break;
2184 
2185   case NT_BREF:
2186     {
2187       int i;
2188       int* backs;
2189       Node** nodes = SCANENV_MEM_NODES(env);
2190       BRefNode* br = NBREF(node);
2191       if (br->state & NST_RECURSION) {
2192 	*max = ONIG_INFINITE_DISTANCE;
2193 	break;
2194       }
2195       backs = BACKREFS_P(br);
2196       for (i = 0; i < br->back_num; i++) {
2197 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2198 	r = get_max_match_length(nodes[backs[i]], &tmax, env);
2199 	if (r != 0) break;
2200 	if (*max < tmax) *max = tmax;
2201       }
2202     }
2203     break;
2204 
2205 #ifdef USE_SUBEXP_CALL
2206   case NT_CALL:
2207     if (! IS_CALL_RECURSION(NCALL(node)))
2208       r = get_max_match_length(NCALL(node)->target, max, env);
2209     else
2210       *max = ONIG_INFINITE_DISTANCE;
2211     break;
2212 #endif
2213 
2214   case NT_QTFR:
2215     {
2216       QtfrNode* qn = NQTFR(node);
2217 
2218       if (qn->upper != 0) {
2219 	r = get_max_match_length(qn->target, max, env);
2220 	if (r == 0 && *max != 0) {
2221 	  if (! IS_REPEAT_INFINITE(qn->upper))
2222 	    *max = distance_multiply(*max, qn->upper);
2223 	  else
2224 	    *max = ONIG_INFINITE_DISTANCE;
2225 	}
2226       }
2227     }
2228     break;
2229 
2230   case NT_ENCLOSE:
2231     {
2232       EncloseNode* en = NENCLOSE(node);
2233       switch (en->type) {
2234       case ENCLOSE_MEMORY:
2235 #ifdef USE_SUBEXP_CALL
2236 	if (IS_ENCLOSE_MAX_FIXED(en))
2237 	  *max = en->max_len;
2238 	else {
2239 	  r = get_max_match_length(en->target, max, env);
2240 	  if (r == 0) {
2241 	    en->max_len = *max;
2242 	    SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2243 	  }
2244 	}
2245 	break;
2246 #endif
2247       case ENCLOSE_OPTION:
2248       case ENCLOSE_STOP_BACKTRACK:
2249 	r = get_max_match_length(en->target, max, env);
2250 	break;
2251       }
2252     }
2253     break;
2254 
2255   case NT_ANCHOR:
2256   default:
2257     break;
2258   }
2259 
2260   return r;
2261 }
2262 
2263 #define GET_CHAR_LEN_VARLEN           -1
2264 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2265 
2266 /* fixed size pattern node only */
2267 static int
get_char_length_tree1(Node * node,regex_t * reg,int * len,int level)2268 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2269 {
2270   int tlen;
2271   int r = 0;
2272 
2273   level++;
2274   *len = 0;
2275   switch (NTYPE(node)) {
2276   case NT_LIST:
2277     do {
2278       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2279       if (r == 0)
2280 	*len = distance_add(*len, tlen);
2281     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2282     break;
2283 
2284   case NT_ALT:
2285     {
2286       int tlen2;
2287       int varlen = 0;
2288 
2289       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2290       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2291 	r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2292 	if (r == 0) {
2293 	  if (tlen != tlen2)
2294 	    varlen = 1;
2295 	}
2296       }
2297       if (r == 0) {
2298 	if (varlen != 0) {
2299 	  if (level == 1)
2300 	    r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2301 	  else
2302 	    r = GET_CHAR_LEN_VARLEN;
2303 	}
2304 	else
2305 	  *len = tlen;
2306       }
2307     }
2308     break;
2309 
2310   case NT_STR:
2311     {
2312       StrNode* sn = NSTR(node);
2313       UChar *s = sn->s;
2314       while (s < sn->end) {
2315 	s += enclen(reg->enc, s);
2316 	(*len)++;
2317       }
2318     }
2319     break;
2320 
2321   case NT_QTFR:
2322     {
2323       QtfrNode* qn = NQTFR(node);
2324       if (qn->lower == qn->upper) {
2325 	r = get_char_length_tree1(qn->target, reg, &tlen, level);
2326 	if (r == 0)
2327 	  *len = distance_multiply(tlen, qn->lower);
2328       }
2329       else
2330 	r = GET_CHAR_LEN_VARLEN;
2331     }
2332     break;
2333 
2334 #ifdef USE_SUBEXP_CALL
2335   case NT_CALL:
2336     if (! IS_CALL_RECURSION(NCALL(node)))
2337       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2338     else
2339       r = GET_CHAR_LEN_VARLEN;
2340     break;
2341 #endif
2342 
2343   case NT_CTYPE:
2344     *len = 1;
2345     break;
2346 
2347   case NT_CCLASS:
2348   case NT_CANY:
2349     *len = 1;
2350     break;
2351 
2352   case NT_ENCLOSE:
2353     {
2354       EncloseNode* en = NENCLOSE(node);
2355       switch (en->type) {
2356       case ENCLOSE_MEMORY:
2357 #ifdef USE_SUBEXP_CALL
2358 	if (IS_ENCLOSE_CLEN_FIXED(en))
2359 	  *len = en->char_len;
2360 	else {
2361 	  r = get_char_length_tree1(en->target, reg, len, level);
2362 	  if (r == 0) {
2363 	    en->char_len = *len;
2364 	    SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2365 	  }
2366 	}
2367 	break;
2368 #endif
2369       case ENCLOSE_OPTION:
2370       case ENCLOSE_STOP_BACKTRACK:
2371 	r = get_char_length_tree1(en->target, reg, len, level);
2372 	break;
2373       default:
2374 	break;
2375       }
2376     }
2377     break;
2378 
2379   case NT_ANCHOR:
2380     break;
2381 
2382   default:
2383     r = GET_CHAR_LEN_VARLEN;
2384     break;
2385   }
2386 
2387   return r;
2388 }
2389 
2390 static int
get_char_length_tree(Node * node,regex_t * reg,int * len)2391 get_char_length_tree(Node* node, regex_t* reg, int* len)
2392 {
2393   return get_char_length_tree1(node, reg, len, 0);
2394 }
2395 
2396 /* x is not included y ==>  1 : 0 */
2397 static int
is_not_included(Node * x,Node * y,regex_t * reg)2398 is_not_included(Node* x, Node* y, regex_t* reg)
2399 {
2400   int i, len;
2401   OnigCodePoint code;
2402   UChar *p, c;
2403   int ytype;
2404 
2405  retry:
2406   ytype = NTYPE(y);
2407   switch (NTYPE(x)) {
2408   case NT_CTYPE:
2409     {
2410       switch (ytype) {
2411       case NT_CTYPE:
2412 	if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2413 	    NCTYPE(y)->not   != NCTYPE(x)->not)
2414 	  return 1;
2415 	else
2416 	  return 0;
2417 	break;
2418 
2419       case NT_CCLASS:
2420       swap:
2421 	{
2422 	  Node* tmp;
2423 	  tmp = x; x = y; y = tmp;
2424 	  goto retry;
2425 	}
2426 	break;
2427 
2428       case NT_STR:
2429 	goto swap;
2430 	break;
2431 
2432       default:
2433 	break;
2434       }
2435     }
2436     break;
2437 
2438   case NT_CCLASS:
2439     {
2440       CClassNode* xc = NCCLASS(x);
2441       switch (ytype) {
2442       case NT_CTYPE:
2443 	switch (NCTYPE(y)->ctype) {
2444 	case ONIGENC_CTYPE_WORD:
2445 	  if (NCTYPE(y)->not == 0) {
2446 	    if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2447 	      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2448 		if (BITSET_AT(xc->bs, i)) {
2449 		  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2450 		}
2451 	      }
2452 	      return 1;
2453 	    }
2454 	    return 0;
2455 	  }
2456 	  else {
2457 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2458 	      if (! IS_CODE_SB_WORD(reg->enc, i)) {
2459 		if (!IS_NCCLASS_NOT(xc)) {
2460 		  if (BITSET_AT(xc->bs, i))
2461 		    return 0;
2462 		}
2463 		else {
2464 		  if (! BITSET_AT(xc->bs, i))
2465 		    return 0;
2466 		}
2467 	      }
2468 	    }
2469 	    return 1;
2470 	  }
2471 	  break;
2472 
2473 	default:
2474 	  break;
2475 	}
2476 	break;
2477 
2478       case NT_CCLASS:
2479 	{
2480 	  int v;
2481 	  CClassNode* yc = NCCLASS(y);
2482 
2483 	  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2484 	    v = BITSET_AT(xc->bs, i);
2485 	    if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2486                 (v == 0 && IS_NCCLASS_NOT(xc))) {
2487 	      v = BITSET_AT(yc->bs, i);
2488 	      if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2489                   (v == 0 && IS_NCCLASS_NOT(yc)))
2490 		return 0;
2491 	    }
2492 	  }
2493 	  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2494 	      (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2495 	    return 1;
2496 	  return 0;
2497 	}
2498 	break;
2499 
2500       case NT_STR:
2501 	goto swap;
2502 	break;
2503 
2504       default:
2505 	break;
2506       }
2507     }
2508     break;
2509 
2510   case NT_STR:
2511     {
2512       StrNode* xs = NSTR(x);
2513       if (NSTRING_LEN(x) == 0)
2514 	break;
2515 
2516       c = *(xs->s);
2517       switch (ytype) {
2518       case NT_CTYPE:
2519 	switch (NCTYPE(y)->ctype) {
2520 	case ONIGENC_CTYPE_WORD:
2521 	  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2522 	    return NCTYPE(y)->not;
2523 	  else
2524 	    return !(NCTYPE(y)->not);
2525 	  break;
2526 	default:
2527 	  break;
2528 	}
2529 	break;
2530 
2531       case NT_CCLASS:
2532 	{
2533 	  CClassNode* cc = NCCLASS(y);
2534 
2535 	  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2536 				     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2537 	  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2538 	}
2539 	break;
2540 
2541       case NT_STR:
2542 	{
2543 	  UChar *q;
2544 	  StrNode* ys = NSTR(y);
2545 	  len = NSTRING_LEN(x);
2546 	  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2547 	  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2548             /* tiny version */
2549             return 0;
2550 	  }
2551 	  else {
2552 	    for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2553 	      if (*p != *q) return 1;
2554 	    }
2555 	  }
2556 	}
2557 	break;
2558 
2559       default:
2560 	break;
2561       }
2562     }
2563     break;
2564 
2565   default:
2566     break;
2567   }
2568 
2569   return 0;
2570 }
2571 
2572 static Node*
get_head_value_node(Node * node,int exact,regex_t * reg)2573 get_head_value_node(Node* node, int exact, regex_t* reg)
2574 {
2575   Node* n = NULL_NODE;
2576 
2577   switch (NTYPE(node)) {
2578   case NT_BREF:
2579   case NT_ALT:
2580   case NT_CANY:
2581 #ifdef USE_SUBEXP_CALL
2582   case NT_CALL:
2583 #endif
2584     break;
2585 
2586   case NT_CTYPE:
2587   case NT_CCLASS:
2588     if (exact == 0) {
2589       n = node;
2590     }
2591     break;
2592 
2593   case NT_LIST:
2594     n = get_head_value_node(NCAR(node), exact, reg);
2595     break;
2596 
2597   case NT_STR:
2598     {
2599       StrNode* sn = NSTR(node);
2600 
2601       if (sn->end <= sn->s)
2602 	break;
2603 
2604       if (exact != 0 &&
2605 	  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2606       }
2607       else {
2608 	n = node;
2609       }
2610     }
2611     break;
2612 
2613   case NT_QTFR:
2614     {
2615       QtfrNode* qn = NQTFR(node);
2616       if (qn->lower > 0) {
2617 	if (IS_NOT_NULL(qn->head_exact))
2618 	  n = qn->head_exact;
2619 	else
2620 	  n = get_head_value_node(qn->target, exact, reg);
2621       }
2622     }
2623     break;
2624 
2625   case NT_ENCLOSE:
2626     {
2627       EncloseNode* en = NENCLOSE(node);
2628       switch (en->type) {
2629       case ENCLOSE_OPTION:
2630 	{
2631 	  OnigOptionType options = reg->options;
2632 
2633 	  reg->options = NENCLOSE(node)->option;
2634 	  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2635 	  reg->options = options;
2636 	}
2637 	break;
2638 
2639       case ENCLOSE_MEMORY:
2640       case ENCLOSE_STOP_BACKTRACK:
2641 	n = get_head_value_node(en->target, exact, reg);
2642 	break;
2643       }
2644     }
2645     break;
2646 
2647   case NT_ANCHOR:
2648     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2649       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2650     break;
2651 
2652   default:
2653     break;
2654   }
2655 
2656   return n;
2657 }
2658 
2659 static int
check_type_tree(Node * node,int type_mask,int enclose_mask,int anchor_mask)2660 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2661 {
2662   int type, r = 0;
2663 
2664   type = NTYPE(node);
2665   if ((NTYPE2BIT(type) & type_mask) == 0)
2666     return 1;
2667 
2668   switch (type) {
2669   case NT_LIST:
2670   case NT_ALT:
2671     do {
2672       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2673 			  anchor_mask);
2674     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2675     break;
2676 
2677   case NT_QTFR:
2678     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2679 			anchor_mask);
2680     break;
2681 
2682   case NT_ENCLOSE:
2683     {
2684       EncloseNode* en = NENCLOSE(node);
2685       if ((en->type & enclose_mask) == 0)
2686 	return 1;
2687 
2688       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2689     }
2690     break;
2691 
2692   case NT_ANCHOR:
2693     type = NANCHOR(node)->type;
2694     if ((type & anchor_mask) == 0)
2695       return 1;
2696 
2697     if (NANCHOR(node)->target)
2698       r = check_type_tree(NANCHOR(node)->target,
2699 			  type_mask, enclose_mask, anchor_mask);
2700     break;
2701 
2702   default:
2703     break;
2704   }
2705   return r;
2706 }
2707 
2708 #ifdef USE_SUBEXP_CALL
2709 
2710 #define RECURSION_EXIST       1
2711 #define RECURSION_INFINITE    2
2712 
2713 static int
subexp_inf_recursive_check(Node * node,ScanEnv * env,int head)2714 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2715 {
2716   int type;
2717   int r = 0;
2718 
2719   type = NTYPE(node);
2720   switch (type) {
2721   case NT_LIST:
2722     {
2723       Node *x;
2724       OnigDistance min;
2725       int ret;
2726 
2727       x = node;
2728       do {
2729 	ret = subexp_inf_recursive_check(NCAR(x), env, head);
2730 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2731 	r |= ret;
2732 	if (head) {
2733 	  ret = get_min_match_length(NCAR(x), &min, env);
2734 	  if (ret != 0) return ret;
2735 	  if (min != 0) head = 0;
2736 	}
2737       } while (IS_NOT_NULL(x = NCDR(x)));
2738     }
2739     break;
2740 
2741   case NT_ALT:
2742     {
2743       int ret;
2744       r = RECURSION_EXIST;
2745       do {
2746 	ret = subexp_inf_recursive_check(NCAR(node), env, head);
2747 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2748 	r &= ret;
2749       } while (IS_NOT_NULL(node = NCDR(node)));
2750     }
2751     break;
2752 
2753   case NT_QTFR:
2754     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2755     if (r == RECURSION_EXIST) {
2756       if (NQTFR(node)->lower == 0) r = 0;
2757     }
2758     break;
2759 
2760   case NT_ANCHOR:
2761     {
2762       AnchorNode* an = NANCHOR(node);
2763       switch (an->type) {
2764       case ANCHOR_PREC_READ:
2765       case ANCHOR_PREC_READ_NOT:
2766       case ANCHOR_LOOK_BEHIND:
2767       case ANCHOR_LOOK_BEHIND_NOT:
2768 	r = subexp_inf_recursive_check(an->target, env, head);
2769 	break;
2770       }
2771     }
2772     break;
2773 
2774   case NT_CALL:
2775     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2776     break;
2777 
2778   case NT_ENCLOSE:
2779     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2780       return 0;
2781     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2782       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2783     else {
2784       SET_ENCLOSE_STATUS(node, NST_MARK2);
2785       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2786       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2787     }
2788     break;
2789 
2790   default:
2791     break;
2792   }
2793 
2794   return r;
2795 }
2796 
2797 static int
subexp_inf_recursive_check_trav(Node * node,ScanEnv * env)2798 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2799 {
2800   int type;
2801   int r = 0;
2802 
2803   type = NTYPE(node);
2804   switch (type) {
2805   case NT_LIST:
2806   case NT_ALT:
2807     do {
2808       r = subexp_inf_recursive_check_trav(NCAR(node), env);
2809     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2810     break;
2811 
2812   case NT_QTFR:
2813     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2814     break;
2815 
2816   case NT_ANCHOR:
2817     {
2818       AnchorNode* an = NANCHOR(node);
2819       switch (an->type) {
2820       case ANCHOR_PREC_READ:
2821       case ANCHOR_PREC_READ_NOT:
2822       case ANCHOR_LOOK_BEHIND:
2823       case ANCHOR_LOOK_BEHIND_NOT:
2824 	r = subexp_inf_recursive_check_trav(an->target, env);
2825 	break;
2826       }
2827     }
2828     break;
2829 
2830   case NT_ENCLOSE:
2831     {
2832       EncloseNode* en = NENCLOSE(node);
2833 
2834       if (IS_ENCLOSE_RECURSION(en)) {
2835 	SET_ENCLOSE_STATUS(node, NST_MARK1);
2836 	r = subexp_inf_recursive_check(en->target, env, 1);
2837 	if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2838 	CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2839       }
2840       r = subexp_inf_recursive_check_trav(en->target, env);
2841     }
2842 
2843     break;
2844 
2845   default:
2846     break;
2847   }
2848 
2849   return r;
2850 }
2851 
2852 static int
subexp_recursive_check(Node * node)2853 subexp_recursive_check(Node* node)
2854 {
2855   int r = 0;
2856 
2857   switch (NTYPE(node)) {
2858   case NT_LIST:
2859   case NT_ALT:
2860     do {
2861       r |= subexp_recursive_check(NCAR(node));
2862     } while (IS_NOT_NULL(node = NCDR(node)));
2863     break;
2864 
2865   case NT_QTFR:
2866     r = subexp_recursive_check(NQTFR(node)->target);
2867     break;
2868 
2869   case NT_ANCHOR:
2870     {
2871       AnchorNode* an = NANCHOR(node);
2872       switch (an->type) {
2873       case ANCHOR_PREC_READ:
2874       case ANCHOR_PREC_READ_NOT:
2875       case ANCHOR_LOOK_BEHIND:
2876       case ANCHOR_LOOK_BEHIND_NOT:
2877 	r = subexp_recursive_check(an->target);
2878 	break;
2879       }
2880     }
2881     break;
2882 
2883   case NT_CALL:
2884     r = subexp_recursive_check(NCALL(node)->target);
2885     if (r != 0) SET_CALL_RECURSION(node);
2886     break;
2887 
2888   case NT_ENCLOSE:
2889     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2890       return 0;
2891     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2892       return 1; /* recursion */
2893     else {
2894       SET_ENCLOSE_STATUS(node, NST_MARK2);
2895       r = subexp_recursive_check(NENCLOSE(node)->target);
2896       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2897     }
2898     break;
2899 
2900   default:
2901     break;
2902   }
2903 
2904   return r;
2905 }
2906 
2907 
2908 static int
subexp_recursive_check_trav(Node * node,ScanEnv * env)2909 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2910 {
2911 #define FOUND_CALLED_NODE    1
2912 
2913   int type;
2914   int r = 0;
2915 
2916   type = NTYPE(node);
2917   switch (type) {
2918   case NT_LIST:
2919   case NT_ALT:
2920     {
2921       int ret;
2922       do {
2923 	ret = subexp_recursive_check_trav(NCAR(node), env);
2924 	if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2925 	else if (ret < 0) return ret;
2926       } while (IS_NOT_NULL(node = NCDR(node)));
2927     }
2928     break;
2929 
2930   case NT_QTFR:
2931     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2932     if (NQTFR(node)->upper == 0) {
2933       if (r == FOUND_CALLED_NODE)
2934 	NQTFR(node)->is_refered = 1;
2935     }
2936     break;
2937 
2938   case NT_ANCHOR:
2939     {
2940       AnchorNode* an = NANCHOR(node);
2941       switch (an->type) {
2942       case ANCHOR_PREC_READ:
2943       case ANCHOR_PREC_READ_NOT:
2944       case ANCHOR_LOOK_BEHIND:
2945       case ANCHOR_LOOK_BEHIND_NOT:
2946 	r = subexp_recursive_check_trav(an->target, env);
2947 	break;
2948       }
2949     }
2950     break;
2951 
2952   case NT_ENCLOSE:
2953     {
2954       EncloseNode* en = NENCLOSE(node);
2955 
2956       if (! IS_ENCLOSE_RECURSION(en)) {
2957 	if (IS_ENCLOSE_CALLED(en)) {
2958 	  SET_ENCLOSE_STATUS(node, NST_MARK1);
2959 	  r = subexp_recursive_check(en->target);
2960 	  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2961 	  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2962 	}
2963       }
2964       r = subexp_recursive_check_trav(en->target, env);
2965       if (IS_ENCLOSE_CALLED(en))
2966 	r |= FOUND_CALLED_NODE;
2967     }
2968     break;
2969 
2970   default:
2971     break;
2972   }
2973 
2974   return r;
2975 }
2976 
2977 static int
setup_subexp_call(Node * node,ScanEnv * env)2978 setup_subexp_call(Node* node, ScanEnv* env)
2979 {
2980   int type;
2981   int r = 0;
2982 
2983   type = NTYPE(node);
2984   switch (type) {
2985   case NT_LIST:
2986     do {
2987       r = setup_subexp_call(NCAR(node), env);
2988     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2989     break;
2990 
2991   case NT_ALT:
2992     do {
2993       r = setup_subexp_call(NCAR(node), env);
2994     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2995     break;
2996 
2997   case NT_QTFR:
2998     r = setup_subexp_call(NQTFR(node)->target, env);
2999     break;
3000   case NT_ENCLOSE:
3001     r = setup_subexp_call(NENCLOSE(node)->target, env);
3002     break;
3003 
3004   case NT_CALL:
3005     {
3006       CallNode* cn = NCALL(node);
3007       Node** nodes = SCANENV_MEM_NODES(env);
3008 
3009       if (cn->group_num != 0) {
3010 	int gnum = cn->group_num;
3011 
3012 #ifdef USE_NAMED_GROUP
3013 	if (env->num_named > 0 &&
3014 	    IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3015 	    !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3016 	  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3017 	}
3018 #endif
3019 	if (gnum > env->num_mem) {
3020 	  onig_scan_env_set_error_string(env,
3021 		 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3022 	  return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3023 	}
3024 
3025 #ifdef USE_NAMED_GROUP
3026       set_call_attr:
3027 #endif
3028 	cn->target = nodes[cn->group_num];
3029 	if (IS_NULL(cn->target)) {
3030 	  onig_scan_env_set_error_string(env,
3031 		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3032 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3033 	}
3034 	SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3035 	BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3036 	cn->unset_addr_list = env->unset_addr_list;
3037       }
3038 #ifdef USE_NAMED_GROUP
3039       else {
3040 	int *refs;
3041 
3042 	int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3043 					   &refs);
3044 	if (n <= 0) {
3045 	  onig_scan_env_set_error_string(env,
3046 		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3047 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3048 	}
3049 	else if (n > 1) {
3050 	  onig_scan_env_set_error_string(env,
3051 	    ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3052 	  return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3053 	}
3054 	else {
3055 	  cn->group_num = refs[0];
3056 	  goto set_call_attr;
3057 	}
3058       }
3059 #endif
3060     }
3061     break;
3062 
3063   case NT_ANCHOR:
3064     {
3065       AnchorNode* an = NANCHOR(node);
3066 
3067       switch (an->type) {
3068       case ANCHOR_PREC_READ:
3069       case ANCHOR_PREC_READ_NOT:
3070       case ANCHOR_LOOK_BEHIND:
3071       case ANCHOR_LOOK_BEHIND_NOT:
3072 	r = setup_subexp_call(an->target, env);
3073 	break;
3074       }
3075     }
3076     break;
3077 
3078   default:
3079     break;
3080   }
3081 
3082   return r;
3083 }
3084 #endif
3085 
3086 /* divide different length alternatives in look-behind.
3087   (?<=A|B) ==> (?<=A)|(?<=B)
3088   (?<!A|B) ==> (?<!A)(?<!B)
3089 */
3090 static int
divide_look_behind_alternatives(Node * node)3091 divide_look_behind_alternatives(Node* node)
3092 {
3093   Node *head, *np, *insert_node;
3094   AnchorNode* an = NANCHOR(node);
3095   int anc_type = an->type;
3096 
3097   head = an->target;
3098   np = NCAR(head);
3099   swap_node(node, head);
3100   NCAR(node) = head;
3101   NANCHOR(head)->target = np;
3102 
3103   np = node;
3104   while ((np = NCDR(np)) != NULL_NODE) {
3105     insert_node = onig_node_new_anchor(anc_type);
3106     CHECK_NULL_RETURN_MEMERR(insert_node);
3107     NANCHOR(insert_node)->target = NCAR(np);
3108     NCAR(np) = insert_node;
3109   }
3110 
3111   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3112     np = node;
3113     do {
3114       SET_NTYPE(np, NT_LIST);  /* alt -> list */
3115     } while ((np = NCDR(np)) != NULL_NODE);
3116   }
3117   return 0;
3118 }
3119 
3120 static int
setup_look_behind(Node * node,regex_t * reg,ScanEnv * env)3121 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3122 {
3123   int r, len;
3124   AnchorNode* an = NANCHOR(node);
3125 
3126   r = get_char_length_tree(an->target, reg, &len);
3127   if (r == 0)
3128     an->char_len = len;
3129   else if (r == GET_CHAR_LEN_VARLEN)
3130     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3131   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3132     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3133       r = divide_look_behind_alternatives(node);
3134     else
3135       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3136   }
3137 
3138   return r;
3139 }
3140 
3141 static int
next_setup(Node * node,Node * next_node,regex_t * reg)3142 next_setup(Node* node, Node* next_node, regex_t* reg)
3143 {
3144   int type;
3145 
3146  retry:
3147   type = NTYPE(node);
3148   if (type == NT_QTFR) {
3149     QtfrNode* qn = NQTFR(node);
3150     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3151 #ifdef USE_QTFR_PEEK_NEXT
3152       Node* n = get_head_value_node(next_node, 1, reg);
3153       /* '\0': for UTF-16BE etc... */
3154       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3155 	qn->next_head_exact = n;
3156       }
3157 #endif
3158       /* automatic posseivation a*b ==> (?>a*)b */
3159       if (qn->lower <= 1) {
3160 	int ttype = NTYPE(qn->target);
3161 	if (IS_NODE_TYPE_SIMPLE(ttype)) {
3162 	  Node *x, *y;
3163 	  x = get_head_value_node(qn->target, 0, reg);
3164 	  if (IS_NOT_NULL(x)) {
3165 	    y = get_head_value_node(next_node,  0, reg);
3166 	    if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3167 	      Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3168 	      CHECK_NULL_RETURN_MEMERR(en);
3169 	      SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3170 	      swap_node(node, en);
3171 	      NENCLOSE(node)->target = en;
3172 	    }
3173 	  }
3174 	}
3175       }
3176     }
3177   }
3178   else if (type == NT_ENCLOSE) {
3179     EncloseNode* en = NENCLOSE(node);
3180     if (en->type == ENCLOSE_MEMORY) {
3181       node = en->target;
3182       goto retry;
3183     }
3184   }
3185   return 0;
3186 }
3187 
3188 
3189 static int
update_string_node_case_fold(regex_t * reg,Node * node)3190 update_string_node_case_fold(regex_t* reg, Node *node)
3191 {
3192   UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3193   UChar *sbuf, *ebuf, *sp;
3194   int r, i, len, sbuf_size;
3195   StrNode* sn = NSTR(node);
3196 
3197   end = sn->end;
3198   sbuf_size = (end - sn->s) * 2;
3199   sbuf = (UChar* )xmalloc(sbuf_size);
3200   CHECK_NULL_RETURN_MEMERR(sbuf);
3201   ebuf = sbuf + sbuf_size;
3202 
3203   sp = sbuf;
3204   p = sn->s;
3205   while (p < end) {
3206     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3207     q = buf;
3208     for (i = 0; i < len; i++) {
3209       if (sp >= ebuf) {
3210 	sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3211 	CHECK_NULL_RETURN_MEMERR(sbuf);
3212 	sp = sbuf + sbuf_size;
3213 	sbuf_size *= 2;
3214 	ebuf = sbuf + sbuf_size;
3215       }
3216 
3217       *sp++ = buf[i];
3218     }
3219   }
3220 
3221   r = onig_node_str_set(node, sbuf, sp);
3222   if (r != 0) {
3223     xfree(sbuf);
3224     return r;
3225   }
3226 
3227   xfree(sbuf);
3228   return 0;
3229 }
3230 
3231 static int
expand_case_fold_make_rem_string(Node ** rnode,UChar * s,UChar * end,regex_t * reg)3232 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3233 				 regex_t* reg)
3234 {
3235   int r;
3236   Node *node;
3237 
3238   node = onig_node_new_str(s, end);
3239   if (IS_NULL(node)) return ONIGERR_MEMORY;
3240 
3241   r = update_string_node_case_fold(reg, node);
3242   if (r != 0) {
3243     onig_node_free(node);
3244     return r;
3245   }
3246 
3247   NSTRING_SET_AMBIG(node);
3248   NSTRING_SET_DONT_GET_OPT_INFO(node);
3249   *rnode = node;
3250   return 0;
3251 }
3252 
3253 static int
expand_case_fold_string_alt(int item_num,OnigCaseFoldCodeItem items[],UChar * p,int slen,UChar * end,regex_t * reg,Node ** rnode)3254 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3255 			    UChar *p, int slen, UChar *end,
3256 			    regex_t* reg, Node **rnode)
3257 {
3258   int r, i, j, len, varlen;
3259   Node *anode, *var_anode, *snode, *xnode, *an;
3260   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3261 
3262   *rnode = var_anode = NULL_NODE;
3263 
3264   varlen = 0;
3265   for (i = 0; i < item_num; i++) {
3266     if (items[i].byte_len != slen) {
3267       varlen = 1;
3268       break;
3269     }
3270   }
3271 
3272   if (varlen != 0) {
3273     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3274     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3275 
3276     xnode = onig_node_new_list(NULL, NULL);
3277     if (IS_NULL(xnode)) goto mem_err;
3278     NCAR(var_anode) = xnode;
3279 
3280     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3281     if (IS_NULL(anode)) goto