1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
33
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode* node);
42
43 static void
history_tree_clear(OnigCaptureTreeNode * node)44 history_tree_clear(OnigCaptureTreeNode* node)
45 {
46 int i;
47
48 if (IS_NOT_NULL(node)) {
49 for (i = 0; i < node->num_childs; i++) {
50 if (IS_NOT_NULL(node->childs[i])) {
51 history_tree_free(node->childs[i]);
52 }
53 }
54 for (i = 0; i < node->allocated; i++) {
55 node->childs[i] = (OnigCaptureTreeNode* )0;
56 }
57 node->num_childs = 0;
58 node->beg = ONIG_REGION_NOTPOS;
59 node->end = ONIG_REGION_NOTPOS;
60 node->group = -1;
61 }
62 }
63
64 static void
history_tree_free(OnigCaptureTreeNode * node)65 history_tree_free(OnigCaptureTreeNode* node)
66 {
67 history_tree_clear(node);
68 xfree(node);
69 }
70
71 static void
history_root_free(OnigRegion * r)72 history_root_free(OnigRegion* r)
73 {
74 if (IS_NOT_NULL(r->history_root)) {
75 history_tree_free(r->history_root);
76 r->history_root = (OnigCaptureTreeNode* )0;
77 }
78 }
79
80 static OnigCaptureTreeNode*
history_node_new(void)81 history_node_new(void)
82 {
83 OnigCaptureTreeNode* node;
84
85 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
86 CHECK_NULL_RETURN(node);
87 node->childs = (OnigCaptureTreeNode** )0;
88 node->allocated = 0;
89 node->num_childs = 0;
90 node->group = -1;
91 node->beg = ONIG_REGION_NOTPOS;
92 node->end = ONIG_REGION_NOTPOS;
93
94 return node;
95 }
96
97 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)98 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
99 {
100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
101
102 if (parent->num_childs >= parent->allocated) {
103 int n, i;
104
105 if (IS_NULL(parent->childs)) {
106 n = HISTORY_TREE_INIT_ALLOC_SIZE;
107 parent->childs =
108 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
109 }
110 else {
111 n = parent->allocated * 2;
112 parent->childs =
113 (OnigCaptureTreeNode** )xrealloc(parent->childs,
114 sizeof(OnigCaptureTreeNode*) * n);
115 }
116 CHECK_NULL_RETURN_MEMERR(parent->childs);
117 for (i = parent->allocated; i < n; i++) {
118 parent->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 parent->allocated = n;
121 }
122
123 parent->childs[parent->num_childs] = child;
124 parent->num_childs++;
125 return 0;
126 }
127
128 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)129 history_tree_clone(OnigCaptureTreeNode* node)
130 {
131 int i;
132 OnigCaptureTreeNode *clone, *child;
133
134 clone = history_node_new();
135 CHECK_NULL_RETURN(clone);
136
137 clone->beg = node->beg;
138 clone->end = node->end;
139 for (i = 0; i < node->num_childs; i++) {
140 child = history_tree_clone(node->childs[i]);
141 if (IS_NULL(child)) {
142 history_tree_free(clone);
143 return (OnigCaptureTreeNode* )0;
144 }
145 history_tree_add_child(clone, child);
146 }
147
148 return clone;
149 }
150
151 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)152 onig_get_capture_tree(OnigRegion* region)
153 {
154 return region->history_root;
155 }
156 #endif /* USE_CAPTURE_HISTORY */
157
158 extern void
onig_region_clear(OnigRegion * region)159 onig_region_clear(OnigRegion* region)
160 {
161 int i;
162
163 for (i = 0; i < region->num_regs; i++) {
164 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
165 }
166 #ifdef USE_CAPTURE_HISTORY
167 history_root_free(region);
168 #endif
169 }
170
171 extern int
onig_region_resize(OnigRegion * region,int n)172 onig_region_resize(OnigRegion* region, int n)
173 {
174 region->num_regs = n;
175
176 if (n < ONIG_NREGION)
177 n = ONIG_NREGION;
178
179 if (region->allocated == 0) {
180 region->beg = (int* )xmalloc(n * sizeof(int));
181 region->end = (int* )xmalloc(n * sizeof(int));
182
183 if (region->beg == 0 || region->end == 0)
184 return ONIGERR_MEMORY;
185
186 region->allocated = n;
187 }
188 else if (region->allocated < n) {
189 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
190 region->end = (int* )xrealloc(region->end, n * sizeof(int));
191
192 if (region->beg == 0 || region->end == 0)
193 return ONIGERR_MEMORY;
194
195 region->allocated = n;
196 }
197
198 return 0;
199 }
200
201 static int
onig_region_resize_clear(OnigRegion * region,int n)202 onig_region_resize_clear(OnigRegion* region, int n)
203 {
204 int r;
205
206 r = onig_region_resize(region, n);
207 if (r != 0) return r;
208 onig_region_clear(region);
209 return 0;
210 }
211
212 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)213 onig_region_set(OnigRegion* region, int at, int beg, int end)
214 {
215 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
216
217 if (at >= region->allocated) {
218 int r = onig_region_resize(region, at + 1);
219 if (r < 0) return r;
220 }
221
222 region->beg[at] = beg;
223 region->end[at] = end;
224 return 0;
225 }
226
227 extern void
onig_region_init(OnigRegion * region)228 onig_region_init(OnigRegion* region)
229 {
230 region->num_regs = 0;
231 region->allocated = 0;
232 region->beg = (int* )0;
233 region->end = (int* )0;
234 region->history_root = (OnigCaptureTreeNode* )0;
235 }
236
237 extern OnigRegion*
onig_region_new(void)238 onig_region_new(void)
239 {
240 OnigRegion* r;
241
242 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
243 onig_region_init(r);
244 return r;
245 }
246
247 extern void
onig_region_free(OnigRegion * r,int free_self)248 onig_region_free(OnigRegion* r, int free_self)
249 {
250 if (r) {
251 if (r->allocated > 0) {
252 if (r->beg) xfree(r->beg);
253 if (r->end) xfree(r->end);
254 r->allocated = 0;
255 }
256 #ifdef USE_CAPTURE_HISTORY
257 history_root_free(r);
258 #endif
259 if (free_self) xfree(r);
260 }
261 }
262
263 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)264 onig_region_copy(OnigRegion* to, OnigRegion* from)
265 {
266 #define RREGC_SIZE (sizeof(int) * from->num_regs)
267 int i;
268
269 if (to == from) return;
270
271 if (to->allocated == 0) {
272 if (from->num_regs > 0) {
273 to->beg = (int* )xmalloc(RREGC_SIZE);
274 to->end = (int* )xmalloc(RREGC_SIZE);
275 to->allocated = from->num_regs;
276 }
277 }
278 else if (to->allocated < from->num_regs) {
279 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
280 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
281 to->allocated = from->num_regs;
282 }
283
284 for (i = 0; i < from->num_regs; i++) {
285 to->beg[i] = from->beg[i];
286 to->end[i] = from->end[i];
287 }
288 to->num_regs = from->num_regs;
289
290 #ifdef USE_CAPTURE_HISTORY
291 history_root_free(to);
292
293 if (IS_NOT_NULL(from->history_root)) {
294 to->history_root = history_tree_clone(from->history_root);
295 }
296 #endif
297 }
298
299
300 /** stack **/
301 #define INVALID_STACK_INDEX -1
302
303 /* stack type */
304 /* used by normal-POP */
305 #define STK_ALT 0x0001
306 #define STK_LOOK_BEHIND_NOT 0x0002
307 #define STK_POS_NOT 0x0003
308 /* handled by normal-POP */
309 #define STK_MEM_START 0x0100
310 #define STK_MEM_END 0x8200
311 #define STK_REPEAT_INC 0x0300
312 #define STK_STATE_CHECK_MARK 0x1000
313 /* avoided by normal-POP */
314 #define STK_NULL_CHECK_START 0x3000
315 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
316 #define STK_MEM_END_MARK 0x8400
317 #define STK_POS 0x0500 /* used when POP-POS */
318 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
319 #define STK_REPEAT 0x0700
320 #define STK_CALL_FRAME 0x0800
321 #define STK_RETURN 0x0900
322 #define STK_VOID 0x0a00 /* for fill a blank */
323
324 /* stack type check mask */
325 #define STK_MASK_POP_USED 0x00ff
326 #define STK_MASK_TO_VOID_TARGET 0x10ff
327 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
328
329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
330 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
331 (msa).stack_p = (void* )0;\
332 (msa).options = (arg_option);\
333 (msa).region = (arg_region);\
334 (msa).start = (arg_start);\
335 (msa).best_len = ONIG_MISMATCH;\
336 } while(0)
337 #else
338 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
339 (msa).stack_p = (void* )0;\
340 (msa).options = (arg_option);\
341 (msa).region = (arg_region);\
342 (msa).start = (arg_start);\
343 } while(0)
344 #endif
345
346 #ifdef USE_COMBINATION_EXPLOSION_CHECK
347
348 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
349
350 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
351 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
352 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
353 offset = ((offset) * (state_num)) >> 3;\
354 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
355 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
356 (msa).state_check_buff = (void* )xmalloc(size);\
357 else \
358 (msa).state_check_buff = (void* )xalloca(size);\
359 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
360 (size_t )(size - (offset))); \
361 (msa).state_check_buff_size = size;\
362 }\
363 else {\
364 (msa).state_check_buff = (void* )0;\
365 (msa).state_check_buff_size = 0;\
366 }\
367 }\
368 else {\
369 (msa).state_check_buff = (void* )0;\
370 (msa).state_check_buff_size = 0;\
371 }\
372 } while(0)
373
374 #define MATCH_ARG_FREE(msa) do {\
375 if ((msa).stack_p) xfree((msa).stack_p);\
376 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
377 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
378 }\
379 } while(0)
380 #else
381 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
382 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
383 #endif
384
385
386
387 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
388 if (msa->stack_p) {\
389 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
390 stk_alloc = (OnigStackType* )(msa->stack_p);\
391 stk_base = stk_alloc;\
392 stk = stk_base;\
393 stk_end = stk_base + msa->stack_n;\
394 }\
395 else {\
396 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
397 + sizeof(OnigStackType) * (stack_num));\
398 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
399 stk_base = stk_alloc;\
400 stk = stk_base;\
401 stk_end = stk_base + (stack_num);\
402 }\
403 } while(0)
404
405 #define STACK_SAVE do{\
406 if (stk_base != stk_alloc) {\
407 msa->stack_p = stk_base;\
408 msa->stack_n = stk_end - stk_base;\
409 };\
410 } while(0)
411
412 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
413
414 extern unsigned int
onig_get_match_stack_limit_size(void)415 onig_get_match_stack_limit_size(void)
416 {
417 return MatchStackLimitSize;
418 }
419
420 extern int
onig_set_match_stack_limit_size(unsigned int size)421 onig_set_match_stack_limit_size(unsigned int size)
422 {
423 MatchStackLimitSize = size;
424 return 0;
425 }
426
427 static int
stack_double(OnigStackType ** arg_stk_base,OnigStackType ** arg_stk_end,OnigStackType ** arg_stk,OnigStackType * stk_alloc,OnigMatchArg * msa)428 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
429 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
430 {
431 unsigned int n;
432 OnigStackType *x, *stk_base, *stk_end, *stk;
433
434 stk_base = *arg_stk_base;
435 stk_end = *arg_stk_end;
436 stk = *arg_stk;
437
438 n = stk_end - stk_base;
439 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
440 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
441 if (IS_NULL(x)) {
442 STACK_SAVE;
443 return ONIGERR_MEMORY;
444 }
445 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
446 n *= 2;
447 }
448 else {
449 n *= 2;
450 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
451 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
452 return ONIGERR_MATCH_STACK_LIMIT_OVER;
453 else
454 n = MatchStackLimitSize;
455 }
456 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
457 if (IS_NULL(x)) {
458 STACK_SAVE;
459 return ONIGERR_MEMORY;
460 }
461 }
462 *arg_stk = x + (stk - stk_base);
463 *arg_stk_base = x;
464 *arg_stk_end = x + n;
465 return 0;
466 }
467
468 #define STACK_ENSURE(n) do {\
469 if (stk_end - stk < (n)) {\
470 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
471 if (r != 0) { STACK_SAVE; return r; } \
472 }\
473 } while(0)
474
475 #define STACK_AT(index) (stk_base + (index))
476 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
477
478 #define STACK_PUSH_TYPE(stack_type) do {\
479 STACK_ENSURE(1);\
480 stk->type = (stack_type);\
481 STACK_INC;\
482 } while(0)
483
484 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
485
486 #ifdef USE_COMBINATION_EXPLOSION_CHECK
487 #define STATE_CHECK_POS(s,snum) \
488 (((s) - str) * num_comb_exp_check + ((snum) - 1))
489 #define STATE_CHECK_VAL(v,snum) do {\
490 if (state_check_buff != NULL) {\
491 int x = STATE_CHECK_POS(s,snum);\
492 (v) = state_check_buff[x/8] & (1<<(x%8));\
493 }\
494 else (v) = 0;\
495 } while(0)
496
497
498 #define ELSE_IF_STATE_CHECK_MARK(stk) \
499 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
500 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
501 state_check_buff[x/8] |= (1<<(x%8)); \
502 }
503
504 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
505 STACK_ENSURE(1);\
506 stk->type = (stack_type);\
507 stk->u.state.pcode = (pat);\
508 stk->u.state.pstr = (s);\
509 stk->u.state.pstr_prev = (sprev);\
510 stk->u.state.state_check = 0;\
511 STACK_INC;\
512 } while(0)
513
514 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
515 stk->type = (stack_type);\
516 stk->u.state.pcode = (pat);\
517 stk->u.state.state_check = 0;\
518 STACK_INC;\
519 } while(0)
520
521 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
522 STACK_ENSURE(1);\
523 stk->type = STK_ALT;\
524 stk->u.state.pcode = (pat);\
525 stk->u.state.pstr = (s);\
526 stk->u.state.pstr_prev = (sprev);\
527 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
528 STACK_INC;\
529 } while(0)
530
531 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
532 if (state_check_buff != NULL) {\
533 STACK_ENSURE(1);\
534 stk->type = STK_STATE_CHECK_MARK;\
535 stk->u.state.pstr = (s);\
536 stk->u.state.state_check = (snum);\
537 STACK_INC;\
538 }\
539 } while(0)
540
541 #else /* USE_COMBINATION_EXPLOSION_CHECK */
542
543 #define ELSE_IF_STATE_CHECK_MARK(stk)
544
545 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
546 STACK_ENSURE(1);\
547 stk->type = (stack_type);\
548 stk->u.state.pcode = (pat);\
549 stk->u.state.pstr = (s);\
550 stk->u.state.pstr_prev = (sprev);\
551 STACK_INC;\
552 } while(0)
553
554 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
555 stk->type = (stack_type);\
556 stk->u.state.pcode = (pat);\
557 STACK_INC;\
558 } while(0)
559 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
560
561 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
562 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
563 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
564 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
565 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
566 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
567
568 #define STACK_PUSH_REPEAT(id, pat) do {\
569 STACK_ENSURE(1);\
570 stk->type = STK_REPEAT;\
571 stk->u.repeat.num = (id);\
572 stk->u.repeat.pcode = (pat);\
573 stk->u.repeat.count = 0;\
574 STACK_INC;\
575 } while(0)
576
577 #define STACK_PUSH_REPEAT_INC(sindex) do {\
578 STACK_ENSURE(1);\
579 stk->type = STK_REPEAT_INC;\
580 stk->u.repeat_inc.si = (sindex);\
581 STACK_INC;\
582 } while(0)
583
584 #define STACK_PUSH_MEM_START(mnum, s) do {\
585 STACK_ENSURE(1);\
586 stk->type = STK_MEM_START;\
587 stk->u.mem.num = (mnum);\
588 stk->u.mem.pstr = (s);\
589 stk->u.mem.start = mem_start_stk[mnum];\
590 stk->u.mem.end = mem_end_stk[mnum];\
591 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
592 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
593 STACK_INC;\
594 } while(0)
595
596 #define STACK_PUSH_MEM_END(mnum, s) do {\
597 STACK_ENSURE(1);\
598 stk->type = STK_MEM_END;\
599 stk->u.mem.num = (mnum);\
600 stk->u.mem.pstr = (s);\
601 stk->u.mem.start = mem_start_stk[mnum];\
602 stk->u.mem.end = mem_end_stk[mnum];\
603 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
604 STACK_INC;\
605 } while(0)
606
607 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
608 STACK_ENSURE(1);\
609 stk->type = STK_MEM_END_MARK;\
610 stk->u.mem.num = (mnum);\
611 STACK_INC;\
612 } while(0)
613
614 #define STACK_GET_MEM_START(mnum, k) do {\
615 int level = 0;\
616 k = stk;\
617 while (k > stk_base) {\
618 k--;\
619 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
620 && k->u.mem.num == (mnum)) {\
621 level++;\
622 }\
623 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
624 if (level == 0) break;\
625 level--;\
626 }\
627 }\
628 } while(0)
629
630 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
631 int level = 0;\
632 while (k < stk) {\
633 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
634 if (level == 0) (start) = k->u.mem.pstr;\
635 level++;\
636 }\
637 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
638 level--;\
639 if (level == 0) {\
640 (end) = k->u.mem.pstr;\
641 break;\
642 }\
643 }\
644 k++;\
645 }\
646 } while(0)
647
648 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
649 STACK_ENSURE(1);\
650 stk->type = STK_NULL_CHECK_START;\
651 stk->u.null_check.num = (cnum);\
652 stk->u.null_check.pstr = (s);\
653 STACK_INC;\
654 } while(0)
655
656 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
657 STACK_ENSURE(1);\
658 stk->type = STK_NULL_CHECK_END;\
659 stk->u.null_check.num = (cnum);\
660 STACK_INC;\
661 } while(0)
662
663 #define STACK_PUSH_CALL_FRAME(pat) do {\
664 STACK_ENSURE(1);\
665 stk->type = STK_CALL_FRAME;\
666 stk->u.call_frame.ret_addr = (pat);\
667 STACK_INC;\
668 } while(0)
669
670 #define STACK_PUSH_RETURN do {\
671 STACK_ENSURE(1);\
672 stk->type = STK_RETURN;\
673 STACK_INC;\
674 } while(0)
675
676
677 #ifdef ONIG_DEBUG
678 #define STACK_BASE_CHECK(p, at) \
679 if ((p) < stk_base) {\
680 fprintf(stderr, "at %s\n", at);\
681 goto stack_error;\
682 }
683 #else
684 #define STACK_BASE_CHECK(p, at)
685 #endif
686
687 #define STACK_POP_ONE do {\
688 stk--;\
689 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
690 } while(0)
691
692 #define STACK_POP do {\
693 switch (pop_level) {\
694 case STACK_POP_LEVEL_FREE:\
695 while (1) {\
696 stk--;\
697 STACK_BASE_CHECK(stk, "STACK_POP"); \
698 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
699 ELSE_IF_STATE_CHECK_MARK(stk);\
700 }\
701 break;\
702 case STACK_POP_LEVEL_MEM_START:\
703 while (1) {\
704 stk--;\
705 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
706 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
707 else if (stk->type == STK_MEM_START) {\
708 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
709 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
710 }\
711 ELSE_IF_STATE_CHECK_MARK(stk);\
712 }\
713 break;\
714 default:\
715 while (1) {\
716 stk--;\
717 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
718 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
719 else if (stk->type == STK_MEM_START) {\
720 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
721 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
722 }\
723 else if (stk->type == STK_REPEAT_INC) {\
724 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
725 }\
726 else if (stk->type == STK_MEM_END) {\
727 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
728 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
729 }\
730 ELSE_IF_STATE_CHECK_MARK(stk);\
731 }\
732 break;\
733 }\
734 } while(0)
735
736 #define STACK_POP_TIL_POS_NOT do {\
737 while (1) {\
738 stk--;\
739 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
740 if (stk->type == STK_POS_NOT) break;\
741 else if (stk->type == STK_MEM_START) {\
742 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
743 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
744 }\
745 else if (stk->type == STK_REPEAT_INC) {\
746 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
747 }\
748 else if (stk->type == STK_MEM_END) {\
749 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
750 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
751 }\
752 ELSE_IF_STATE_CHECK_MARK(stk);\
753 }\
754 } while(0)
755
756 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
757 while (1) {\
758 stk--;\
759 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
760 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
761 else if (stk->type == STK_MEM_START) {\
762 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
763 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
764 }\
765 else if (stk->type == STK_REPEAT_INC) {\
766 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
767 }\
768 else if (stk->type == STK_MEM_END) {\
769 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
770 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
771 }\
772 ELSE_IF_STATE_CHECK_MARK(stk);\
773 }\
774 } while(0)
775
776 #define STACK_POS_END(k) do {\
777 k = stk;\
778 while (1) {\
779 k--;\
780 STACK_BASE_CHECK(k, "STACK_POS_END"); \
781 if (IS_TO_VOID_TARGET(k)) {\
782 k->type = STK_VOID;\
783 }\
784 else if (k->type == STK_POS) {\
785 k->type = STK_VOID;\
786 break;\
787 }\
788 }\
789 } while(0)
790
791 #define STACK_STOP_BT_END do {\
792 OnigStackType *k = stk;\
793 while (1) {\
794 k--;\
795 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
796 if (IS_TO_VOID_TARGET(k)) {\
797 k->type = STK_VOID;\
798 }\
799 else if (k->type == STK_STOP_BT) {\
800 k->type = STK_VOID;\
801 break;\
802 }\
803 }\
804 } while(0)
805
806 #define STACK_NULL_CHECK(isnull,id,s) do {\
807 OnigStackType* k = stk;\
808 while (1) {\
809 k--;\
810 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
811 if (k->type == STK_NULL_CHECK_START) {\
812 if (k->u.null_check.num == (id)) {\
813 (isnull) = (k->u.null_check.pstr == (s));\
814 break;\
815 }\
816 }\
817 }\
818 } while(0)
819
820 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
821 int level = 0;\
822 OnigStackType* k = stk;\
823 while (1) {\
824 k--;\
825 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
826 if (k->type == STK_NULL_CHECK_START) {\
827 if (k->u.null_check.num == (id)) {\
828 if (level == 0) {\
829 (isnull) = (k->u.null_check.pstr == (s));\
830 break;\
831 }\
832 else level--;\
833 }\
834 }\
835 else if (k->type == STK_NULL_CHECK_END) {\
836 level++;\
837 }\
838 }\
839 } while(0)
840
841 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
842 OnigStackType* k = stk;\
843 while (1) {\
844 k--;\
845 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
846 if (k->type == STK_NULL_CHECK_START) {\
847 if (k->u.null_check.num == (id)) {\
848 if (k->u.null_check.pstr != (s)) {\
849 (isnull) = 0;\
850 break;\
851 }\
852 else {\
853 UChar* endp;\
854 (isnull) = 1;\
855 while (k < stk) {\
856 if (k->type == STK_MEM_START) {\
857 if (k->u.mem.end == INVALID_STACK_INDEX) {\
858 (isnull) = 0; break;\
859 }\
860 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
861 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
862 else\
863 endp = (UChar* )k->u.mem.end;\
864 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
865 (isnull) = 0; break;\
866 }\
867 else if (endp != s) {\
868 (isnull) = -1; /* empty, but position changed */ \
869 }\
870 }\
871 k++;\
872 }\
873 break;\
874 }\
875 }\
876 }\
877 }\
878 } while(0)
879
880 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
881 int level = 0;\
882 OnigStackType* k = stk;\
883 while (1) {\
884 k--;\
885 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
886 if (k->type == STK_NULL_CHECK_START) {\
887 if (k->u.null_check.num == (id)) {\
888 if (level == 0) {\
889 if (k->u.null_check.pstr != (s)) {\
890 (isnull) = 0;\
891 break;\
892 }\
893 else {\
894 UChar* endp;\
895 (isnull) = 1;\
896 while (k < stk) {\
897 if (k->type == STK_MEM_START) {\
898 if (k->u.mem.end == INVALID_STACK_INDEX) {\
899 (isnull) = 0; break;\
900 }\
901 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
902 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
903 else\
904 endp = (UChar* )k->u.mem.end;\
905 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
906 (isnull) = 0; break;\
907 }\
908 else if (endp != s) {\
909 (isnull) = -1; /* empty, but position changed */ \
910 }\
911 }\
912 k++;\
913 }\
914 break;\
915 }\
916 }\
917 else {\
918 level--;\
919 }\
920 }\
921 }\
922 else if (k->type == STK_NULL_CHECK_END) {\
923 if (k->u.null_check.num == (id)) level++;\
924 }\
925 }\
926 } while(0)
927
928 #define STACK_GET_REPEAT(id, k) do {\
929 int level = 0;\
930 k = stk;\
931 while (1) {\
932 k--;\
933 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
934 if (k->type == STK_REPEAT) {\
935 if (level == 0) {\
936 if (k->u.repeat.num == (id)) {\
937 break;\
938 }\
939 }\
940 }\
941 else if (k->type == STK_CALL_FRAME) level--;\
942 else if (k->type == STK_RETURN) level++;\
943 }\
944 } while(0)
945
946 #define STACK_RETURN(addr) do {\
947 int level = 0;\
948 OnigStackType* k = stk;\
949 while (1) {\
950 k--;\
951 STACK_BASE_CHECK(k, "STACK_RETURN"); \
952 if (k->type == STK_CALL_FRAME) {\
953 if (level == 0) {\
954 (addr) = k->u.call_frame.ret_addr;\
955 break;\
956 }\
957 else level--;\
958 }\
959 else if (k->type == STK_RETURN)\
960 level++;\
961 }\
962 } while(0)
963
964
965 #define STRING_CMP(s1,s2,len) do {\
966 while (len-- > 0) {\
967 if (*s1++ != *s2++) goto fail;\
968 }\
969 } while(0)
970
971 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
972 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
973 goto fail; \
974 } while(0)
975
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)976 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
977 UChar* s1, UChar** ps2, int mblen)
978 {
979 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
980 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
981 UChar *p1, *p2, *end1, *s2, *end2;
982 int len1, len2;
983
984 s2 = *ps2;
985 end1 = s1 + mblen;
986 end2 = s2 + mblen;
987 while (s1 < end1) {
988 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
989 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
990 if (len1 != len2) return 0;
991 p1 = buf1;
992 p2 = buf2;
993 while (len1-- > 0) {
994 if (*p1 != *p2) return 0;
995 p1++;
996 p2++;
997 }
998 }
999
1000 *ps2 = s2;
1001 return 1;
1002 }
1003
1004 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1005 is_fail = 0;\
1006 while (len-- > 0) {\
1007 if (*s1++ != *s2++) {\
1008 is_fail = 1; break;\
1009 }\
1010 }\
1011 } while(0)
1012
1013 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1014 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1015 is_fail = 1; \
1016 else \
1017 is_fail = 0; \
1018 } while(0)
1019
1020
1021 #define IS_EMPTY_STR (str == end)
1022 #define ON_STR_BEGIN(s) ((s) == str)
1023 #define ON_STR_END(s) ((s) == end)
1024 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1025 #define DATA_ENSURE_CHECK1 (s < right_range)
1026 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1027 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1028 #else
1029 #define DATA_ENSURE_CHECK1 (s < end)
1030 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1031 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1032 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1033
1034
1035 #ifdef USE_CAPTURE_HISTORY
1036 static int
make_capture_history_tree(OnigCaptureTreeNode * node,OnigStackType ** kp,OnigStackType * stk_top,UChar * str,regex_t * reg)1037 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1038 OnigStackType* stk_top, UChar* str, regex_t* reg)
1039 {
1040 int n, r;
1041 OnigCaptureTreeNode* child;
1042 OnigStackType* k = *kp;
1043
1044 while (k < stk_top) {
1045 if (k->type == STK_MEM_START) {
1046 n = k->u.mem.num;
1047 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1048 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1049 child = history_node_new();
1050 CHECK_NULL_RETURN_MEMERR(child);
1051 child->group = n;
1052 child->beg = (int )(k->u.mem.pstr - str);
1053 r = history_tree_add_child(node, child);
1054 if (r != 0) return r;
1055 *kp = (k + 1);
1056 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1057 if (r != 0) return r;
1058
1059 k = *kp;
1060 child->end = (int )(k->u.mem.pstr - str);
1061 }
1062 }
1063 else if (k->type == STK_MEM_END) {
1064 if (k->u.mem.num == node->group) {
1065 node->end = (int )(k->u.mem.pstr - str);
1066 *kp = k;
1067 return 0;
1068 }
1069 }
1070 k++;
1071 }
1072
1073 return 1; /* 1: root node ending. */
1074 }
1075 #endif
1076
1077 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,UChar * memp)1078 static int mem_is_in_memp(int mem, int num, UChar* memp)
1079 {
1080 int i;
1081 MemNumType m;
1082
1083 for (i = 0; i < num; i++) {
1084 GET_MEMNUM_INC(m, memp);
1085 if (mem == (int )m) return 1;
1086 }
1087 return 0;
1088 }
1089
backref_match_at_nested_level(regex_t * reg,OnigStackType * top,OnigStackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,UChar * memp,UChar ** s,const UChar * send)1090 static int backref_match_at_nested_level(regex_t* reg
1091 , OnigStackType* top, OnigStackType* stk_base
1092 , int ignore_case, int case_fold_flag
1093 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1094 {
1095 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1096 int level;
1097 OnigStackType* k;
1098
1099 level = 0;
1100 k = top;
1101 k--;
1102 while (k >= stk_base) {
1103 if (k->type == STK_CALL_FRAME) {
1104 level--;
1105 }
1106 else if (k->type == STK_RETURN) {
1107 level++;
1108 }
1109 else if (level == nest) {
1110 if (k->type == STK_MEM_START) {
1111 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1112 pstart = k->u.mem.pstr;
1113 if (pend != NULL_UCHARP) {
1114 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1115 p = pstart;
1116 ss = *s;
1117
1118 if (ignore_case != 0) {
1119 if (string_cmp_ic(reg->enc, case_fold_flag,
1120 pstart, &ss, (int )(pend - pstart)) == 0)
1121 return 0; /* or goto next_mem; */
1122 }
1123 else {
1124 while (p < pend) {
1125 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1126 }
1127 }
1128
1129 *s = ss;
1130 return 1;
1131 }
1132 }
1133 }
1134 else if (k->type == STK_MEM_END) {
1135 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1136 pend = k->u.mem.pstr;
1137 }
1138 }
1139 }
1140 k--;
1141 }
1142
1143 return 0;
1144 }
1145 #endif /* USE_BACKREF_WITH_LEVEL */
1146
1147
1148 #ifdef ONIG_DEBUG_STATISTICS
1149
1150 #define USE_TIMEOFDAY
1151
1152 #ifdef USE_TIMEOFDAY
1153 #ifdef HAVE_SYS_TIME_H
1154 #include <sys/time.h>
1155 #endif
1156 #ifdef HAVE_UNISTD_H
1157 #include <unistd.h>
1158 #endif
1159 static struct timeval ts, te;
1160 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1161 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1162 (((te).tv_sec - (ts).tv_sec)*1000000))
1163 #else
1164 #ifdef HAVE_SYS_TIMES_H
1165 #include <sys/times.h>
1166 #endif
1167 static struct tms ts, te;
1168 #define GETTIME(t) times(&(t))
1169 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1170 #endif
1171
1172 static int OpCounter[256];
1173 static int OpPrevCounter[256];
1174 static unsigned long OpTime[256];
1175 static int OpCurr = OP_FINISH;
1176 static int OpPrevTarget = OP_FAIL;
1177 static int MaxStackDepth = 0;
1178
1179 #define MOP_IN(opcode) do {\
1180 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1181 OpCurr = opcode;\
1182 OpCounter[opcode]++;\
1183 GETTIME(ts);\
1184 } while(0)
1185
1186 #define MOP_OUT do {\
1187 GETTIME(te);\
1188 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1189 } while(0)
1190
1191 extern void
onig_statistics_init(void)1192 onig_statistics_init(void)
1193 {
1194 int i;
1195 for (i = 0; i < 256; i++) {
1196 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1197 }
1198 MaxStackDepth = 0;
1199 }
1200
1201 extern void
onig_print_statistics(FILE * f)1202 onig_print_statistics(FILE* f)
1203 {
1204 int i;
1205 fprintf(f, " count prev time\n");
1206 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1207 fprintf(f, "%8d: %8d: %10ld: %s\n",
1208 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1209 }
1210 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1211 }
1212
1213 #define STACK_INC do {\
1214 stk++;\
1215 if (stk - stk_base > MaxStackDepth) \
1216 MaxStackDepth = stk - stk_base;\
1217 } while(0)
1218
1219 #else
1220 #define STACK_INC stk++
1221
1222 #define MOP_IN(opcode)
1223 #define MOP_OUT
1224 #endif
1225
1226
1227 /* matching region of POSIX API */
1228 typedef int regoff_t;
1229
1230 typedef struct {
1231 regoff_t rm_so;
1232 regoff_t rm_eo;
1233 } posix_regmatch_t;
1234
1235 /* match data(str - end) from position (sstart). */
1236 /* if sstart == str then set sprev to NULL. */
1237 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * right_range,const UChar * sstart,UChar * sprev,OnigMatchArg * msa)1238 match_at(regex_t* reg, const UChar* str, const UChar* end,
1239 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1240 const UChar* right_range,
1241 #endif
1242 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1243 {
1244 static UChar FinishCode[] = { OP_FINISH };
1245
1246 int i, n, num_mem, best_len, pop_level;
1247 LengthType tlen, tlen2;
1248 MemNumType mem;
1249 RelAddrType addr;
1250 OnigOptionType option = reg->options;
1251 OnigEncoding encode = reg->enc;
1252 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1253 UChar *s, *q, *sbegin;
1254 UChar *p = reg->p;
1255 char *alloca_base;
1256 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1257 OnigStackType *stkp; /* used as any purpose. */
1258 OnigStackIndex si;
1259 OnigStackIndex *repeat_stk;
1260 OnigStackIndex *mem_start_stk, *mem_end_stk;
1261 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1262 int scv;
1263 unsigned char* state_check_buff = msa->state_check_buff;
1264 int num_comb_exp_check = reg->num_comb_exp_check;
1265 #endif
1266 n = reg->num_repeat + reg->num_mem * 2;
1267
1268 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1269 pop_level = reg->stack_pop_level;
1270 num_mem = reg->num_mem;
1271 repeat_stk = (OnigStackIndex* )alloca_base;
1272
1273 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1274 mem_end_stk = mem_start_stk + num_mem;
1275 mem_start_stk--; /* for index start from 1,
1276 mem_start_stk[1]..mem_start_stk[num_mem] */
1277 mem_end_stk--; /* for index start from 1,
1278 mem_end_stk[1]..mem_end_stk[num_mem] */
1279 for (i = 1; i <= num_mem; i++) {
1280 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1281 }
1282
1283 #ifdef ONIG_DEBUG_MATCH
1284 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1285 (int )str, (int )end, (int )sstart, (int )sprev);
1286 fprintf(stderr, "size: %d, start offset: %d\n",
1287 (int )(end - str), (int )(sstart - str));
1288 #endif
1289
1290 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1291 best_len = ONIG_MISMATCH;
1292 s = (UChar* )sstart;
1293 while (1) {
1294 #ifdef ONIG_DEBUG_MATCH
1295 {
1296 UChar *q, *bp, buf[50];
1297 int len;
1298 fprintf(stderr, "%4d> \"", (int )(s - str));
1299 bp = buf;
1300 for (i = 0, q = s; i < 7 && q < end; i++) {
1301 len = enclen(encode, q);
1302 while (len-- > 0) *bp++ = *q++;
1303 }
1304 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1305 else { xmemcpy(bp, "\"", 1); bp += 1; }
1306 *bp = 0;
1307 fputs((char* )buf, stderr);
1308 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1309 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1310 fprintf(stderr, "\n");
1311 }
1312 #endif
1313
1314 sbegin = s;
1315 switch (*p++) {
1316 case OP_END: MOP_IN(OP_END);
1317 n = s - sstart;
1318 if (n > best_len) {
1319 OnigRegion* region;
1320 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1321 if (IS_FIND_LONGEST(option)) {
1322 if (n > msa->best_len) {
1323 msa->best_len = n;
1324 msa->best_s = (UChar* )sstart;
1325 }
1326 else
1327 goto end_best_len;
1328 }
1329 #endif
1330 best_len = n;
1331 region = msa->region;
1332 if (region) {
1333 #ifdef USE_POSIX_API_REGION_OPTION
1334 if (IS_POSIX_REGION(msa->options)) {
1335 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1336
1337 rmt[0].rm_so = sstart - str;
1338 rmt[0].rm_eo = s - str;
1339 for (i = 1; i <= num_mem; i++) {
1340 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1341 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1342 rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1343 else
1344 rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1345
1346 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1347 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1348 : (UChar* )((void* )mem_end_stk[i])) - str;
1349 }
1350 else {
1351 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1352 }
1353 }
1354 }
1355 else {
1356 #endif /* USE_POSIX_API_REGION_OPTION */
1357 region->beg[0] = sstart - str;
1358 region->end[0] = s - str;
1359 for (i = 1; i <= num_mem; i++) {
1360 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1361 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1362 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1363 else
1364 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1365
1366 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1367 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1368 : (UChar* )((void* )mem_end_stk[i])) - str;
1369 }
1370 else {
1371 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1372 }
1373 }
1374
1375 #ifdef USE_CAPTURE_HISTORY
1376 if (reg->capture_history != 0) {
1377 int r;
1378 OnigCaptureTreeNode* node;
1379
1380 if (IS_NULL(region->history_root)) {
1381 region->history_root = node = history_node_new();
1382 CHECK_NULL_RETURN_MEMERR(node);
1383 }
1384 else {
1385 node = region->history_root;
1386 history_tree_clear(node);
1387 }
1388
1389 node->group = 0;
1390 node->beg = sstart - str;
1391 node->end = s - str;
1392
1393 stkp = stk_base;
1394 r = make_capture_history_tree(region->history_root, &stkp,
1395 stk, (UChar* )str, reg);
1396 if (r < 0) {
1397 best_len = r; /* error code */
1398 goto finish;
1399 }
1400 }
1401 #endif /* USE_CAPTURE_HISTORY */
1402 #ifdef USE_POSIX_API_REGION_OPTION
1403 } /* else IS_POSIX_REGION() */
1404 #endif
1405 } /* if (region) */
1406 } /* n > best_len */
1407
1408 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1409 end_best_len:
1410 #endif
1411 MOP_OUT;
1412
1413 if (IS_FIND_CONDITION(option)) {
1414 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1415 best_len = ONIG_MISMATCH;
1416 goto fail; /* for retry */
1417 }
1418 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1419 goto fail; /* for retry */
1420 }
1421 }
1422
1423 /* default behavior: return first-matching result. */
1424 goto finish;
1425 break;
1426
1427 case OP_EXACT1: MOP_IN(OP_EXACT1);
1428 DATA_ENSURE(1);
1429 if (*p != *s) goto fail;
1430 p++; s++;
1431 MOP_OUT;
1432 break;
1433
1434 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
1435 {
1436 int len;
1437 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1438
1439 DATA_ENSURE(1);
1440 len = ONIGENC_MBC_CASE_FOLD(encode,
1441 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1442 case_fold_flag,
1443 &s, end, lowbuf);
1444 DATA_ENSURE(0);
1445 q = lowbuf;
1446 while (len-- > 0) {
1447 if (*p != *q) {
1448 goto fail;
1449 }
1450 p++; q++;
1451 }
1452 }
1453 MOP_OUT;
1454 break;
1455
1456 case OP_EXACT2: MOP_IN(OP_EXACT2);
1457 DATA_ENSURE(2);
1458 if (*p != *s) goto fail;
1459 p++; s++;
1460 if (*p != *s) goto fail;
1461 sprev = s;
1462 p++; s++;
1463 MOP_OUT;
1464 continue;
1465 break;
1466
1467 case OP_EXACT3: MOP_IN(OP_EXACT3);
1468 DATA_ENSURE(3);
1469 if (*p != *s) goto fail;
1470 p++; s++;
1471 if (*p != *s) goto fail;
1472 p++; s++;
1473 if (*p != *s) goto fail;
1474 sprev = s;
1475 p++; s++;
1476 MOP_OUT;
1477 continue;
1478 break;
1479
1480 case OP_EXACT4: MOP_IN(OP_EXACT4);
1481 DATA_ENSURE(4);
1482 if (*p != *s) goto fail;
1483 p++; s++;
1484 if (*p != *s) goto fail;
1485 p++; s++;
1486 if (*p != *s) goto fail;
1487 p++; s++;
1488 if (*p != *s) goto fail;
1489 sprev = s;
1490 p++; s++;
1491 MOP_OUT;
1492 continue;
1493 break;
1494
1495 case OP_EXACT5: MOP_IN(OP_EXACT5);
1496 DATA_ENSURE(5);
1497 if (*p != *s) goto fail;
1498 p++; s++;
1499 if (*p != *s) goto fail;
1500 p++; s++;
1501 if (*p != *s) goto fail;
1502 p++; s++;
1503 if (*p != *s) goto fail;
1504 p++; s++;
1505 if (*p != *s) goto fail;
1506 sprev = s;
1507 p++; s++;
1508 MOP_OUT;
1509 continue;
1510 break;
1511
1512 case OP_EXACTN: MOP_IN(OP_EXACTN);
1513 GET_LENGTH_INC(tlen, p);
1514 DATA_ENSURE(tlen);
1515 while (tlen-- > 0) {
1516 if (*p++ != *s++) goto fail;
1517 }
1518 sprev = s - 1;
1519 MOP_OUT;
1520 continue;
1521 break;
1522
1523 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
1524 {
1525 int len;
1526 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1527
1528 GET_LENGTH_INC(tlen, p);
1529 endp = p + tlen;
1530
1531 while (p < endp) {
1532 sprev = s;
1533 DATA_ENSURE(1);
1534 len = ONIGENC_MBC_CASE_FOLD(encode,
1535 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1536 case_fold_flag,
1537 &s, end, lowbuf);
1538 DATA_ENSURE(0);
1539 q = lowbuf;
1540 while (len-- > 0) {
1541 if (*p != *q) goto fail;
1542 p++; q++;
1543 }
1544 }
1545 }
1546
1547 MOP_OUT;
1548 continue;
1549 break;
1550
1551 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
1552 DATA_ENSURE(2);
1553 if (*p != *s) goto fail;
1554 p++; s++;
1555 if (*p != *s) goto fail;
1556 p++; s++;
1557 MOP_OUT;
1558 break;
1559
1560 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
1561 DATA_ENSURE(4);
1562 if (*p != *s) goto fail;
1563 p++; s++;
1564 if (*p != *s) goto fail;
1565 p++; s++;
1566 sprev = s;
1567 if (*p != *s) goto fail;
1568 p++; s++;
1569 if (*p != *s) goto fail;
1570 p++; s++;
1571 MOP_OUT;
1572 continue;
1573 break;
1574
1575 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
1576 DATA_ENSURE(6);
1577 if (*p != *s) goto fail;
1578 p++; s++;
1579 if (*p != *s) goto fail;
1580 p++; s++;
1581 if (*p != *s) goto fail;
1582 p++; s++;
1583 if (*p != *s) goto fail;
1584 p++; s++;
1585 sprev = s;
1586 if (*p != *s) goto fail;
1587 p++; s++;
1588 if (*p != *s) goto fail;
1589 p++; s++;
1590 MOP_OUT;
1591 continue;
1592 break;
1593
1594 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
1595 GET_LENGTH_INC(tlen, p);
1596 DATA_ENSURE(tlen * 2);
1597 while (tlen-- > 0) {
1598 if (*p != *s) goto fail;
1599 p++; s++;
1600 if (*p != *s) goto fail;
1601 p++; s++;
1602 }
1603 sprev = s - 2;
1604 MOP_OUT;
1605 continue;
1606 break;
1607
1608 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
1609 GET_LENGTH_INC(tlen, p);
1610 DATA_ENSURE(tlen * 3);
1611 while (tlen-- > 0) {
1612 if (*p != *s) goto fail;
1613 p++; s++;
1614 if (*p != *s) goto fail;
1615 p++; s++;
1616 if (*p != *s) goto fail;
1617 p++; s++;
1618 }
1619 sprev = s - 3;
1620 MOP_OUT;
1621 continue;
1622 break;
1623
1624 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
1625 GET_LENGTH_INC(tlen, p); /* mb-len */
1626 GET_LENGTH_INC(tlen2, p); /* string len */
1627 tlen2 *= tlen;
1628 DATA_ENSURE(tlen2);
1629 while (tlen2-- > 0) {
1630 if (*p != *s) goto fail;
1631 p++; s++;
1632 }
1633 sprev = s - tlen;
1634 MOP_OUT;
1635 continue;
1636 break;
1637
1638 case OP_CCLASS: MOP_IN(OP_CCLASS);
1639 DATA_ENSURE(1);
1640 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1641 p += SIZE_BITSET;
1642 s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
1643 MOP_OUT;
1644 break;
1645
1646 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
1647 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1648
1649 cclass_mb:
1650 GET_LENGTH_INC(tlen, p);
1651 {
1652 OnigCodePoint code;
1653 UChar *ss;
1654 int mb_len;
1655
1656 DATA_ENSURE(1);
1657 mb_len = enclen(encode, s);
1658 DATA_ENSURE(mb_len);
1659 ss = s;
1660 s += mb_len;
1661 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1662
1663 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1664 if (! onig_is_in_code_range(p, code)) goto fail;
1665 #else
1666 q = p;
1667 ALIGNMENT_RIGHT(q);
1668 if (! onig_is_in_code_range(q, code)) goto fail;
1669 #endif
1670 }
1671 p += tlen;
1672 MOP_OUT;
1673 break;
1674
1675 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
1676 DATA_ENSURE(1);
1677 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1678 p += SIZE_BITSET;
1679 goto cclass_mb;
1680 }
1681 else {
1682 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1683 goto fail;
1684
1685 p += SIZE_BITSET;
1686 GET_LENGTH_INC(tlen, p);
1687 p += tlen;
1688 s++;
1689 }
1690 MOP_OUT;
1691 break;
1692
1693 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
1694 DATA_ENSURE(1);
1695 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1696 p += SIZE_BITSET;
1697 s += enclen(encode, s);
1698 MOP_OUT;
1699 break;
1700
1701 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
1702 DATA_ENSURE(1);
1703 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1704 s++;
1705 GET_LENGTH_INC(tlen, p);
1706 p += tlen;
1707 goto cc_mb_not_success;
1708 }
1709
1710 cclass_mb_not:
1711 GET_LENGTH_INC(tlen, p);
1712 {
1713 OnigCodePoint code;
1714 UChar *ss;
1715 int mb_len = enclen(encode, s);
1716
1717 if (! DATA_ENSURE_CHECK(mb_len)) {
1718 DATA_ENSURE(1);
1719 s = (UChar* )end;
1720 p += tlen;
1721 goto cc_mb_not_success;
1722 }
1723
1724 ss = s;
1725 s += mb_len;
1726 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1727
1728 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1729 if (onig_is_in_code_range(p, code)) goto fail;
1730 #else
1731 q = p;
1732 ALIGNMENT_RIGHT(q);
1733 if (onig_is_in_code_range(q, code)) goto fail;
1734 #endif
1735 }
1736 p += tlen;
1737
1738 cc_mb_not_success:
1739 MOP_OUT;
1740 break;
1741
1742 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
1743 DATA_ENSURE(1);
1744 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1745 p += SIZE_BITSET;
1746 goto cclass_mb_not;
1747 }
1748 else {
1749 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1750 goto fail;
1751
1752 p += SIZE_BITSET;
1753 GET_LENGTH_INC(tlen, p);
1754 p += tlen;
1755 s++;
1756 }
1757 MOP_OUT;
1758 break;
1759
1760 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
1761 {
1762 OnigCodePoint code;
1763 void *node;
1764 int mb_len;
1765 UChar *ss;
1766
1767 DATA_ENSURE(1);
1768 GET_POINTER_INC(node, p);
1769 mb_len = enclen(encode, s);
1770 ss = s;
1771 s += mb_len;
1772 DATA_ENSURE(0);
1773 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1774 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1775 }
1776 MOP_OUT;
1777 break;
1778
1779 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
1780 DATA_ENSURE(1);
1781 n = enclen(encode, s);
1782 DATA_ENSURE(n);
1783 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1784 s += n;
1785 MOP_OUT;
1786 break;
1787
1788 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
1789 DATA_ENSURE(1);
1790 n = enclen(encode, s);
1791 DATA_ENSURE(n);
1792 s += n;
1793 MOP_OUT;
1794 break;
1795
1796 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
1797 while (DATA_ENSURE_CHECK1) {
1798 STACK_PUSH_ALT(p, s, sprev);
1799 n = enclen(encode, s);
1800 DATA_ENSURE(n);
1801 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1802 sprev = s;
1803 s += n;
1804 }
1805 MOP_OUT;
1806 break;
1807
1808 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
1809 while (DATA_ENSURE_CHECK1) {
1810 STACK_PUSH_ALT(p, s, sprev);
1811 n = enclen(encode, s);
1812 if (n > 1) {
1813 DATA_ENSURE(n);
1814 sprev = s;
1815 s += n;
1816 }
1817 else {
1818 sprev = s;
1819 s++;
1820 }
1821 }
1822 MOP_OUT;
1823 break;
1824
1825 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1826 while (DATA_ENSURE_CHECK1) {
1827 if (*p == *s) {
1828 STACK_PUSH_ALT(p + 1, s, sprev);
1829 }
1830 n = enclen(encode, s);
1831 DATA_ENSURE(n);
1832 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1833 sprev = s;
1834 s += n;
1835 }
1836 p++;
1837 MOP_OUT;
1838 break;
1839
1840 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1841 while (DATA_ENSURE_CHECK1) {
1842 if (*p == *s) {
1843 STACK_PUSH_ALT(p + 1, s, sprev);
1844 }
1845 n = enclen(encode, s);
1846 if (n > 1) {
1847 DATA_ENSURE(n);
1848 sprev = s;
1849 s += n;
1850 }
1851 else {
1852 sprev = s;
1853 s++;
1854 }
1855 }
1856 p++;
1857 MOP_OUT;
1858 break;
1859
1860 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1861 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1862 GET_STATE_CHECK_NUM_INC(mem, p);
1863 while (DATA_ENSURE_CHECK1) {
1864 STATE_CHECK_VAL(scv, mem);
1865 if (scv) goto fail;
1866
1867 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1868 n = enclen(encode, s);
1869 DATA_ENSURE(n);
1870 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1871 sprev = s;
1872 s += n;
1873 }
1874 MOP_OUT;
1875 break;
1876
1877 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1878 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1879
1880 GET_STATE_CHECK_NUM_INC(mem, p);
1881 while (DATA_ENSURE_CHECK1) {
1882 STATE_CHECK_VAL(scv, mem);
1883 if (scv) goto fail;
1884
1885 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1886 n = enclen(encode, s);
1887 if (n > 1) {
1888 DATA_ENSURE(n);
1889 sprev = s;
1890 s += n;
1891 }
1892 else {
1893 sprev = s;
1894 s++;
1895 }
1896 }
1897 MOP_OUT;
1898 break;
1899 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1900
1901 case OP_WORD: MOP_IN(OP_WORD);
1902 DATA_ENSURE(1);
1903 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1904 goto fail;
1905
1906 s += enclen(encode, s);
1907 MOP_OUT;
1908 break;
1909
1910 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
1911 DATA_ENSURE(1);
1912 if (ONIGENC_IS_MBC_WORD(encode, s, end))
1913 goto fail;
1914
1915 s += enclen(encode, s);
1916 MOP_OUT;
1917 break;
1918
1919 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
1920 if (ON_STR_BEGIN(s)) {
1921 DATA_ENSURE(1);
1922 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1923 goto fail;
1924 }
1925 else if (ON_STR_END(s)) {
1926 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1927 goto fail;
1928 }
1929 else {
1930 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1931 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1932 goto fail;
1933 }
1934 MOP_OUT;
1935 continue;
1936 break;
1937
1938 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
1939 if (ON_STR_BEGIN(s)) {
1940 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1941 goto fail;
1942 }
1943 else if (ON_STR_END(s)) {
1944 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1945 goto fail;
1946 }
1947 else {
1948 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1949 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1950 goto fail;
1951 }
1952 MOP_OUT;
1953 continue;
1954 break;
1955
1956 #ifdef USE_WORD_BEGIN_END
1957 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
1958 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
1959 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1960 MOP_OUT;
1961 continue;
1962 }
1963 }
1964 goto fail;
1965 break;
1966
1967 case OP_WORD_END: MOP_IN(OP_WORD_END);
1968 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1969 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
1970 MOP_OUT;
1971 continue;
1972 }
1973 }
1974 goto fail;
1975 break;
1976 #endif
1977
1978 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
1979 if (! ON_STR_BEGIN(s)) goto fail;
1980
1981 MOP_OUT;
1982 continue;
1983 break;
1984
1985 case OP_END_BUF: MOP_IN(OP_END_BUF);
1986 if (! ON_STR_END(s)) goto fail;
1987
1988 MOP_OUT;
1989 continue;
1990 break;
1991
1992 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
1993 if (ON_STR_BEGIN(s)) {
1994 if (IS_NOTBOL(msa->options)) goto fail;
1995 MOP_OUT;
1996 continue;
1997 }
1998 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
1999 MOP_OUT;
2000 continue;
2001 }
2002 goto fail;
2003 break;
2004
2005 case OP_END_LINE: MOP_IN(OP_END_LINE);
2006 if (ON_STR_END(s)) {
2007 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2008 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2009 #endif
2010 if (IS_NOTEOL(msa->options)) goto fail;
2011 MOP_OUT;
2012 continue;
2013 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2014 }
2015 #endif
2016 }
2017 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2018 MOP_OUT;
2019 continue;
2020 }
2021 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2022 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2023 MOP_OUT;
2024 continue;
2025 }
2026 #endif
2027 goto fail;
2028 break;
2029
2030 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
2031 if (ON_STR_END(s)) {
2032 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2033 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2034 #endif
2035 if (IS_NOTEOL(msa->options)) goto fail;
2036 MOP_OUT;
2037 continue;
2038 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2039 }
2040 #endif
2041 }
2042 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2043 ON_STR_END(s + enclen(encode, s))) {
2044 MOP_OUT;
2045 continue;
2046 }
2047 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2048 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2049 UChar* ss = s + enclen(encode, s);
2050 ss += enclen(encode, ss);
2051 if (ON_STR_END(ss)) {
2052 MOP_OUT;
2053 continue;
2054 }
2055 }
2056 #endif
2057 goto fail;
2058 break;
2059
2060 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
2061 if (s != msa->start)
2062 goto fail;
2063
2064 MOP_OUT;
2065 continue;
2066 break;
2067
2068 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
2069 GET_MEMNUM_INC(mem, p);
2070 STACK_PUSH_MEM_START(mem, s);
2071 MOP_OUT;
2072 continue;
2073 break;
2074
2075 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
2076 GET_MEMNUM_INC(mem, p);
2077 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2078 MOP_OUT;
2079 continue;
2080 break;
2081
2082 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
2083 GET_MEMNUM_INC(mem, p);
2084 STACK_PUSH_MEM_END(mem, s);
2085 MOP_OUT;
2086 continue;
2087 break;
2088
2089 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
2090 GET_MEMNUM_INC(mem, p);
2091 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2092 MOP_OUT;
2093 continue;
2094 break;
2095
2096 #ifdef USE_SUBEXP_CALL
2097 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
2098 GET_MEMNUM_INC(mem, p);
2099 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2100 STACK_PUSH_MEM_END(mem, s);
2101 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2102 MOP_OUT;
2103 continue;
2104 break;
2105
2106 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
2107 GET_MEMNUM_INC(mem, p);
2108 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2109 STACK_GET_MEM_START(mem, stkp);
2110
2111 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2112 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2113 else
2114 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2115
2116 STACK_PUSH_MEM_END_MARK(mem);
2117 MOP_OUT;
2118 continue;
2119 break;
2120 #endif
2121
2122 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
2123 mem = 1;
2124 goto backref;
2125 break;
2126
2127 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
2128 mem = 2;
2129 goto backref;
2130 break;
2131
2132 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
2133 GET_MEMNUM_INC(mem, p);
2134 backref:
2135 {
2136 int len;
2137 UChar *pstart, *pend;
2138
2139 /* if you want to remove following line,
2140 you should check in parse and compile time. */
2141 if (mem > num_mem) goto fail;
2142 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2143 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2144
2145 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2146 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2147 else
2148 pstart = (UChar* )((void* )mem_start_stk[mem]);
2149
2150 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2151 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2152 : (UChar* )((void* )mem_end_stk[mem]));
2153 n = pend - pstart;
2154 DATA_ENSURE(n);
2155 sprev = s;
2156 STRING_CMP(pstart, s, n);
2157 while (sprev + (len = enclen(encode, sprev)) < s)
2158 sprev += len;
2159
2160 MOP_OUT;
2161 continue;
2162 }
2163 break;
2164
2165 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
2166 GET_MEMNUM_INC(mem, p);
2167 {
2168 int len;
2169 UChar *pstart, *pend;
2170
2171 /* if you want to remove following line,
2172 you should check in parse and compile time. */
2173 if (mem > num_mem) goto fail;
2174 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2175 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2176
2177 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2178 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2179 else
2180 pstart = (UChar* )((void* )mem_start_stk[mem]);
2181
2182 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2183 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2184 : (UChar* )((void* )mem_end_stk[mem]));
2185 n = pend - pstart;
2186 DATA_ENSURE(n);
2187 sprev = s;
2188 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2189 while (sprev + (len = enclen(encode, sprev)) < s)
2190 sprev += len;
2191
2192 MOP_OUT;
2193 continue;
2194 }
2195 break;
2196
2197 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
2198 {
2199 int len, is_fail;
2200 UChar *pstart, *pend, *swork;
2201
2202 GET_LENGTH_INC(tlen, p);
2203 for (i = 0; i < tlen; i++) {
2204 GET_MEMNUM_INC(mem, p);
2205
2206 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2207 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2208
2209 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2210 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2211 else
2212 pstart = (UChar* )((void* )mem_start_stk[mem]);
2213
2214 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2215 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2216 : (UChar* )((void* )mem_end_stk[mem]));
2217 n = pend - pstart;
2218 DATA_ENSURE(n);
2219 sprev = s;
2220 swork = s;
2221 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2222 if (is_fail) continue;
2223 s = swork;
2224 while (sprev + (len = enclen(encode, sprev)) < s)
2225 sprev += len;
2226
2227 p += (SIZE_MEMNUM * (tlen - i - 1));
2228 break; /* success */
2229 }
2230 if (i == tlen) goto fail;
2231 MOP_OUT;
2232 continue;
2233 }
2234 break;
2235
2236 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
2237 {
2238 int len, is_fail;
2239 UChar *pstart, *pend, *swork;
2240
2241 GET_LENGTH_INC(tlen, p);
2242 for (i = 0; i < tlen; i++) {
2243 GET_MEMNUM_INC(mem, p);
2244
2245 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2246 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2247
2248 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2249 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2250 else
2251 pstart = (UChar* )((void* )mem_start_stk[mem]);
2252
2253 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2254 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2255 : (UChar* )((void* )mem_end_stk[mem]));
2256 n = pend - pstart;
2257 DATA_ENSURE(n);
2258 sprev = s;
2259 swork = s;
2260 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2261 if (is_fail) continue;
2262 s = swork;
2263 while (sprev + (len = enclen(encode, sprev)) < s)
2264 sprev += len;
2265
2266 p += (SIZE_MEMNUM * (tlen - i - 1));
2267 break; /* success */
2268 }
2269 if (i == tlen) goto fail;
2270 MOP_OUT;
2271 continue;
2272 }
2273 break;
2274
2275 #ifdef USE_BACKREF_WITH_LEVEL
2276 case OP_BACKREF_WITH_LEVEL:
2277 {
2278 int len;
2279 OnigOptionType ic;
2280 LengthType level;
2281
2282 GET_OPTION_INC(ic, p);
2283 GET_LENGTH_INC(level, p);
2284 GET_LENGTH_INC(tlen, p);
2285
2286 sprev = s;
2287 if (backref_match_at_nested_level(reg, stk, stk_base, ic
2288 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2289 while (sprev + (len = enclen(encode, sprev)) < s)
2290 sprev += len;
2291
2292 p += (SIZE_MEMNUM * tlen);
2293 }
2294 else
2295 goto fail;
2296
2297 MOP_OUT;
2298 continue;
2299 }
2300
2301 break;
2302 #endif
2303
2304 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2305 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
2306 GET_OPTION_INC(option, p);
2307 STACK_PUSH_ALT(p, s, sprev);
2308 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2309 MOP_OUT;
2310 continue;
2311 break;
2312
2313 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
2314 GET_OPTION_INC(option, p);
2315 MOP_OUT;
2316 continue;
2317 break;
2318 #endif
2319
2320 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
2321 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2322 STACK_PUSH_NULL_CHECK_START(mem, s);
2323 MOP_OUT;
2324 continue;
2325 break;
2326
2327 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
2328 {
2329 int isnull;
2330
2331 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2332 STACK_NULL_CHECK(isnull, mem, s);
2333 if (isnull) {
2334 #ifdef ONIG_DEBUG_MATCH
2335 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2336 (int )mem, (int )s);
2337 #endif
2338 null_check_found:
2339 /* empty loop founded, skip next instruction */
2340 switch (*p++) {
2341 case OP_JUMP:
2342 case OP_PUSH:
2343 p += SIZE_RELADDR;
2344 break;
2345 case OP_REPEAT_INC:
2346 case OP_REPEAT_INC_NG:
2347 case OP_REPEAT_INC_SG:
2348 case OP_REPEAT_INC_NG_SG:
2349 p += SIZE_MEMNUM;
2350 break;
2351 default:
2352 goto unexpected_bytecode_error;
2353 break;
2354 }
2355 }
2356 }
2357 MOP_OUT;
2358 continue;
2359 break;
2360
2361 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2362 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
2363 {
2364 int isnull;
2365
2366 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2367 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2368 if (isnull) {
2369 #ifdef ONIG_DEBUG_MATCH
2370 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2371 (int )mem, (int )s);
2372 #endif
2373 if (isnull == -1) goto fail;
2374 goto null_check_found;
2375 }
2376 }
2377 MOP_OUT;
2378 continue;
2379 break;
2380 #endif
2381
2382 #ifdef USE_SUBEXP_CALL
2383 case OP_NULL_CHECK_END_MEMST_PUSH:
2384 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2385 {
2386 int isnull;
2387
2388 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2389 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2390 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2391 #else
2392 STACK_NULL_CHECK_REC(isnull, mem, s);
2393 #endif
2394 if (isnull) {
2395 #ifdef ONIG_DEBUG_MATCH
2396 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2397 (int )mem, (int )s);
2398 #endif
2399 if (isnull == -1) goto fail;
2400 goto null_check_found;
2401 }
2402 else {
2403 STACK_PUSH_NULL_CHECK_END(mem);
2404 }
2405 }
2406 MOP_OUT;
2407 continue;
2408 break;
2409 #endif
2410
2411 case OP_JUMP: MOP_IN(OP_JUMP);
2412 GET_RELADDR_INC(addr, p);
2413 p += addr;
2414 MOP_OUT;
2415 CHECK_INTERRUPT_IN_MATCH_AT;
2416 continue;
2417 break;
2418
2419 case OP_PUSH: MOP_IN(OP_PUSH);
2420 GET_RELADDR_INC(addr, p);
2421 STACK_PUSH_ALT(p + addr, s, sprev);
2422 MOP_OUT;
2423 continue;
2424 break;
2425
2426 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2427 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
2428 GET_STATE_CHECK_NUM_INC(mem, p);
2429 STATE_CHECK_VAL(scv, mem);
2430 if (scv) goto fail;
2431
2432 GET_RELADDR_INC(addr, p);
2433 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2434 MOP_OUT;
2435 continue;
2436 break;
2437
2438 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2439 GET_STATE_CHECK_NUM_INC(mem, p);
2440 GET_RELADDR_INC(addr, p);
2441 STATE_CHECK_VAL(scv, mem);
2442 if (scv) {
2443 p += addr;
2444 }
2445 else {
2446 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2447 }
2448 MOP_OUT;
2449 continue;
2450 break;
2451
2452 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
2453 GET_STATE_CHECK_NUM_INC(mem, p);
2454 STATE_CHECK_VAL(scv, mem);
2455 if (scv) goto fail;
2456
2457 STACK_PUSH_STATE_CHECK(s, mem);
2458 MOP_OUT;
2459 continue;
2460 break;
2461 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2462
2463 case OP_POP: MOP_IN(OP_POP);
2464 STACK_POP_ONE;
2465 MOP_OUT;
2466 continue;
2467 break;
2468
2469 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2470 GET_RELADDR_INC(addr, p);
2471 if (*p == *s && DATA_ENSURE_CHECK1) {
2472 p++;
2473 STACK_PUSH_ALT(p + addr, s, sprev);
2474 MOP_OUT;
2475 continue;
2476 }
2477 p += (addr + 1);
2478 MOP_OUT;
2479 continue;
2480 break;
2481
2482 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2483 GET_RELADDR_INC(addr, p);
2484 if (*p == *s) {
2485 p++;
2486 STACK_PUSH_ALT(p + addr, s, sprev);
2487 MOP_OUT;
2488 continue;
2489 }
2490 p++;
2491 MOP_OUT;
2492 continue;
2493 break;
2494
2495 case OP_REPEAT: MOP_IN(OP_REPEAT);
2496 {
2497 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2498 GET_RELADDR_INC(addr, p);
2499
2500 STACK_ENSURE(1);
2501 repeat_stk[mem] = GET_STACK_INDEX(stk);
2502 STACK_PUSH_REPEAT(mem, p);
2503
2504 if (reg->repeat_range[mem].lower == 0) {
2505 STACK_PUSH_ALT(p + addr, s, sprev);
2506 }
2507 }
2508 MOP_OUT;
2509 continue;
2510 break;
2511
2512 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
2513 {
2514 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2515 GET_RELADDR_INC(addr, p);
2516
2517 STACK_ENSURE(1);
2518 repeat_stk[mem] = GET_STACK_INDEX(stk);
2519 STACK_PUSH_REPEAT(mem, p);
2520
2521 if (reg->repeat_range[mem].lower == 0) {
2522 STACK_PUSH_ALT(p, s, sprev);
2523 p += addr;
2524 }
2525 }
2526 MOP_OUT;
2527 continue;
2528 break;
2529
2530 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
2531 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2532 si = repeat_stk[mem];
2533 stkp = STACK_AT(si);
2534
2535 repeat_inc:
2536 stkp->u.repeat.count++;
2537 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2538 /* end of repeat. Nothing to do. */
2539 }
2540 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2541 STACK_PUSH_ALT(p, s, sprev);
2542 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2543 }
2544 else {
2545 p = stkp->u.repeat.pcode;
2546 }
2547 STACK_PUSH_REPEAT_INC(si);
2548 MOP_OUT;
2549 CHECK_INTERRUPT_IN_MATCH_AT;
2550 continue;
2551 break;
2552
2553 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
2554 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2555 STACK_GET_REPEAT(mem, stkp);
2556 si = GET_STACK_INDEX(stkp);
2557 goto repeat_inc;
2558 break;
2559
2560 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
2561 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2562 si = repeat_stk[mem];
2563 stkp = STACK_AT(si);
2564
2565 repeat_inc_ng:
2566 stkp->u.repeat.count++;
2567 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2568 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2569 UChar* pcode = stkp->u.repeat.pcode;
2570
2571 STACK_PUSH_REPEAT_INC(si);
2572 STACK_PUSH_ALT(pcode, s, sprev);
2573 }
2574 else {
2575 p = stkp->u.repeat.pcode;
2576 STACK_PUSH_REPEAT_INC(si);
2577 }
2578 }
2579 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2580 STACK_PUSH_REPEAT_INC(si);
2581 }
2582 MOP_OUT;
2583 CHECK_INTERRUPT_IN_MATCH_AT;
2584 continue;
2585 break;
2586
2587 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
2588 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2589 STACK_GET_REPEAT(mem, stkp);
2590 si = GET_STACK_INDEX(stkp);
2591 goto repeat_inc_ng;
2592 break;
2593
2594 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
2595 STACK_PUSH_POS(s, sprev);
2596 MOP_OUT;
2597 continue;
2598 break;
2599
2600 case OP_POP_POS: MOP_IN(OP_POP_POS);
2601 {
2602 STACK_POS_END(stkp);
2603 s = stkp->u.state.pstr;
2604 sprev = stkp->u.state.pstr_prev;
2605 }
2606 MOP_OUT;
2607 continue;
2608 break;
2609
2610 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
2611 GET_RELADDR_INC(addr, p);
2612 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2613 MOP_OUT;
2614 continue;
2615 break;
2616
2617 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
2618 STACK_POP_TIL_POS_NOT;
2619 goto fail;
2620 break;
2621
2622 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
2623 STACK_PUSH_STOP_BT;
2624 MOP_OUT;
2625 continue;
2626 break;
2627
2628 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
2629 STACK_STOP_BT_END;
2630 MOP_OUT;
2631 continue;
2632 break;
2633
2634 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
2635 GET_LENGTH_INC(tlen, p);
2636 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2637 if (IS_NULL(s)) goto fail;
2638 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2639 MOP_OUT;
2640 continue;
2641 break;
2642
2643 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2644 GET_RELADDR_INC(addr, p);
2645 GET_LENGTH_INC(tlen, p);
2646 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2647 if (IS_NULL(q)) {
2648 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2649 If you want to change to fail, replace following line. */
2650 p += addr;
2651 /* goto fail; */
2652 }
2653 else {
2654 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2655 s = q;
2656 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2657 }
2658 MOP_OUT;
2659 continue;
2660 break;
2661
2662 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2663 STACK_POP_TIL_LOOK_BEHIND_NOT;
2664 goto fail;
2665 break;
2666
2667 #ifdef USE_SUBEXP_CALL
2668 case OP_CALL: MOP_IN(OP_CALL);
2669 GET_ABSADDR_INC(addr, p);
2670 STACK_PUSH_CALL_FRAME(p);
2671 p = reg->p + addr;
2672 MOP_OUT;
2673 continue;
2674 break;
2675
2676 case OP_RETURN: MOP_IN(OP_RETURN);
2677 STACK_RETURN(p);
2678 STACK_PUSH_RETURN;
2679 MOP_OUT;
2680 continue;
2681 break;
2682 #endif
2683
2684 case OP_FINISH:
2685 goto finish;
2686 break;
2687
2688 fail:
2689 MOP_OUT;
2690 /* fall */
2691 case OP_FAIL: MOP_IN(OP_FAIL);
2692 STACK_POP;
2693 p = stk->u.state.pcode;
2694 s = stk->u.state.pstr;
2695 sprev = stk->u.state.pstr_prev;
2696
2697 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2698 if (stk->u.state.state_check != 0) {
2699 stk->type = STK_STATE_CHECK_MARK;
2700 stk++;
2701 }
2702 #endif
2703
2704 MOP_OUT;
2705 continue;
2706 break;
2707
2708 default:
2709 goto bytecode_error;
2710
2711 } /* end of switch */
2712 sprev = sbegin;
2713 } /* end of while(1) */
2714
2715 finish:
2716 STACK_SAVE;
2717 return best_len;
2718
2719 #ifdef ONIG_DEBUG
2720 stack_error:
2721 STACK_SAVE;
2722 return ONIGERR_STACK_BUG;
2723 #endif
2724
2725 bytecode_error:
2726 STACK_SAVE;
2727 return ONIGERR_UNDEFINED_BYTECODE;
2728
2729 unexpected_bytecode_error:
2730 STACK_SAVE;
2731 return ONIGERR_UNEXPECTED_BYTECODE;
2732 }
2733
2734
2735 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2736 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2737 const UChar* text, const UChar* text_end, UChar* text_range)
2738 {
2739 UChar *t, *p, *s, *end;
2740
2741 end = (UChar* )text_end;
2742 end -= target_end - target - 1;
2743 if (end > text_range)
2744 end = text_range;
2745
2746 s = (UChar* )text;
2747
2748 while (s < end) {
2749 if (*s == *target) {
2750 p = s + 1;
2751 t = target + 1;
2752 while (t < target_end) {
2753 if (*t != *p++)
2754 break;
2755 t++;
2756 }
2757 if (t == target_end)
2758 return s;
2759 }
2760 s += enclen(enc, s);
2761 }
2762
2763 return (UChar* )NULL;
2764 }
2765
2766 static int
str_lower_case_match(OnigEncoding enc,int case_fold_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)2767 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2768 const UChar* t, const UChar* tend,
2769 const UChar* p, const UChar* end)
2770 {
2771 int lowlen;
2772 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2773
2774 while (t < tend) {
2775 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2776 q = lowbuf;
2777 while (lowlen > 0) {
2778 if (*t++ != *q++) return 0;
2779 lowlen--;
2780 }
2781 }
2782
2783 return 1;
2784 }
2785
2786 static UChar*
slow_search_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2787 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2788 UChar* target, UChar* target_end,
2789 const UChar* text, const UChar* text_end, UChar* text_range)
2790 {
2791 UChar *s, *end;
2792
2793 end = (UChar* )text_end;
2794 end -= target_end - target - 1;
2795 if (end > text_range)
2796 end = text_range;
2797
2798 s = (UChar* )text;
2799
2800 while (s < end) {
2801 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2802 s, text_end))
2803 return s;
2804
2805 s += enclen(enc, s);
2806 }
2807
2808 return (UChar* )NULL;
2809 }
2810
2811 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2812 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2813 const UChar* text, const UChar* adjust_text,
2814 const UChar* text_end, const UChar* text_start)
2815 {
2816 UChar *t, *p, *s;
2817
2818 s = (UChar* )text_end;
2819 s -= (target_end - target);
2820 if (s > text_start)
2821 s = (UChar* )text_start;
2822 else
2823 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2824
2825 while (s >= text) {
2826 if (*s == *target) {
2827 p = s + 1;
2828 t = target + 1;
2829 while (t < target_end) {
2830 if (*t != *p++)
2831 break;
2832 t++;
2833 }
2834 if (t == target_end)
2835 return s;
2836 }
2837 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2838 }
2839
2840 return (UChar* )NULL;
2841 }
2842
2843 static UChar*
slow_search_backward_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2844 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2845 UChar* target, UChar* target_end,
2846 const UChar* text, const UChar* adjust_text,
2847 const UChar* text_end, const UChar* text_start)
2848 {
2849 UChar *s;
2850
2851 s = (UChar* )text_end;
2852 s -= (target_end - target);
2853 if (s > text_start)
2854 s = (UChar* )text_start;
2855 else
2856 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2857
2858 while (s >= text) {
2859 if (str_lower_case_match(enc, case_fold_flag,
2860 target, target_end, s, text_end))
2861 return s;
2862
2863 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2864 }
2865
2866 return (UChar* )NULL;
2867 }
2868
2869 static UChar*
bm_search_notrev(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2870 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2871 const UChar* text, const UChar* text_end,
2872 const UChar* text_range)
2873 {
2874 const UChar *s, *se, *t, *p, *end;
2875 const UChar *tail;
2876 int skip, tlen1;
2877
2878 #ifdef ONIG_DEBUG_SEARCH
2879 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2880 (int )text, (int )text_end, (int )text_range);
2881 #endif
2882
2883 tail = target_end - 1;
2884 tlen1 = tail - target;
2885 end = text_range;
2886 if (end + tlen1 > text_end)
2887 end = text_end - tlen1;
2888
2889 s = text;
2890
2891 if (IS_NULL(reg->int_map)) {
2892 while (s < end) {
2893 p = se = s + tlen1;
2894 t = tail;
2895 while (*p == *t) {
2896 if (t == target) return (UChar* )s;
2897 p--; t--;
2898 }
2899 skip = reg->map[*se];
2900 t = s;
2901 do {
2902 s += enclen(reg->enc, s);
2903 } while ((s - t) < skip && s < end);
2904 }
2905 }
2906 else {
2907 while (s < end) {
2908 p = se = s + tlen1;
2909 t = tail;
2910 while (*p == *t) {
2911 if (t == target) return (UChar* )s;
2912 p--; t--;
2913 }
2914 skip = reg->int_map[*se];
2915 t = s;
2916 do {
2917 s += enclen(reg->enc, s);
2918 } while ((s - t) < skip && s < end);
2919 }
2920 }
2921
2922 return (UChar* )NULL;
2923 }
2924
2925 static UChar*
bm_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2926 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2927 const UChar* text, const UChar* text_end, const UChar* text_range)
2928 {
2929 const UChar *s, *t, *p, *end;
2930 const UChar *tail;
2931
2932 end = text_range + (target_end - target) - 1;
2933 if (end > text_end)
2934 end = text_end;
2935
2936 tail = target_end - 1;
2937 s = text + (target_end - target) - 1;
2938 if (IS_NULL(reg->int_map)) {
2939 while (s < end) {
2940 p = s;
2941 t = tail;
2942 while (*p == *t) {
2943 if (t == target) return (UChar* )p;
2944 p--; t--;
2945 }
2946 s += reg->map[*s];
2947 }
2948 }
2949 else { /* see int_map[] */
2950 while (s < end) {
2951 p = s;
2952 t = tail;
2953 while (*p == *t) {
2954 if (t == target) return (UChar* )p;
2955 p--; t--;
2956 }
2957 s += reg->int_map[*s];
2958 }
2959 }
2960 return (UChar* )NULL;
2961 }
2962
2963 static int
set_bm_backward_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,int ** skip)2964 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
2965 int** skip)
2966
2967 {
2968 int i, len;
2969
2970 if (IS_NULL(*skip)) {
2971 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
2972 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
2973 }
2974
2975 len = end - s;
2976 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
2977 (*skip)[i] = len;
2978
2979 for (i = len - 1; i > 0; i--)
2980 (*skip)[s[i]] = i;
2981
2982 return 0;
2983 }
2984
2985 static UChar*
bm_search_backward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2986 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
2987 const UChar* text, const UChar* adjust_text,
2988 const UChar* text_end, const UChar* text_start)
2989 {
2990 const UChar *s, *t, *p;
2991
2992 s = text_end - (target_end - target);
2993 if (text_start < s)
2994 s = text_start;
2995 else
2996 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
2997
2998 while (s >= text) {
2999 p = s;
3000 t = target;
3001 while (t < target_end && *p == *t) {
3002 p++; t++;
3003 }
3004 if (t == target_end)
3005 return (UChar* )s;
3006
3007 s -= reg->int_map_backward[*s];
3008 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3009 }
3010
3011 return (UChar* )NULL;
3012 }
3013
3014 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)3015 map_search(OnigEncoding enc, UChar map[],
3016 const UChar* text, const UChar* text_range)
3017 {
3018 const UChar *s = text;
3019
3020 while (s < text_range) {
3021 if (map[*s]) return (UChar* )s;
3022
3023 s += enclen(enc, s);
3024 }
3025 return (UChar* )NULL;
3026 }
3027
3028 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)3029 map_search_backward(OnigEncoding enc, UChar map[],
3030 const UChar* text, const UChar* adjust_text,
3031 const UChar* text_start)
3032 {
3033 const UChar *s = text_start;
3034
3035 while (s >= text) {
3036 if (map[*s]) return (UChar* )s;
3037
3038 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3039 }
3040 return (UChar* )NULL;
3041 }
3042
3043 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)3044 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3045 OnigOptionType option)
3046 {
3047 int r;
3048 UChar *prev;
3049 OnigMatchArg msa;
3050
3051 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3052 start:
3053 THREAD_ATOMIC_START;
3054 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3055 ONIG_STATE_INC(reg);
3056 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3057 onig_chain_reduce(reg);
3058 ONIG_STATE_INC(reg);
3059 }
3060 }
3061 else {
3062 int n;
3063
3064 THREAD_ATOMIC_END;
3065 n = 0;
3066 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3067 if (++n > THREAD_PASS_LIMIT_COUNT)
3068 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3069 THREAD_PASS;
3070 }
3071 goto start;
3072 }
3073 THREAD_ATOMIC_END;
3074 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3075
3076 MATCH_ARG_INIT(msa, option, region, at);
3077 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3078 {
3079 int offset = at - str;
3080 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3081 }
3082 #endif
3083
3084 if (region
3085 #ifdef USE_POSIX_API_REGION_OPTION
3086 && !IS_POSIX_REGION(option)
3087 #endif
3088 ) {
3089 r = onig_region_resize_clear(region, reg->num_mem + 1);
3090 }
3091 else
3092 r = 0;
3093
3094 if (r == 0) {
3095 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3096 r = match_at(reg, str, end,
3097 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3098 end,
3099 #endif
3100 at, prev, &msa);
3101 }
3102
3103 MATCH_ARG_FREE(msa);
3104 ONIG_STATE_DEC_THREAD(reg);
3105 return r;
3106 }
3107
3108 static int
forward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)3109 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3110 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3111 {
3112 UChar *p, *pprev = (UChar* )NULL;
3113
3114 #ifdef ONIG_DEBUG_SEARCH
3115 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3116 (int )str, (int )end, (int )s, (int )range);
3117 #endif
3118
3119 p = s;
3120 if (reg->dmin > 0) {
3121 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3122 p += reg->dmin;
3123 }
3124 else {
3125 UChar *q = p + reg->dmin;
3126
3127 if (q >= end) return 0; /* fail */
3128 while (p < q) p += enclen(reg->enc, p);
3129 }
3130 }
3131
3132 retry:
3133 switch (reg->optimize) {
3134 case ONIG_OPTIMIZE_EXACT:
3135 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3136 break;
3137 case ONIG_OPTIMIZE_EXACT_IC:
3138 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3139 reg->exact, reg->exact_end, p, end, range);
3140 break;
3141
3142 case ONIG_OPTIMIZE_EXACT_BM:
3143 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3144 break;
3145
3146 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3147 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3148 break;
3149
3150 case ONIG_OPTIMIZE_MAP:
3151 p = map_search(reg->enc, reg->map, p, range);
3152 break;
3153 }
3154
3155 if (p && p < range) {
3156 if (p - reg->dmin < s) {
3157 retry_gate:
3158 pprev = p;
3159 p += enclen(reg->enc, p);
3160 goto retry;
3161 }
3162
3163 if (reg->sub_anchor) {
3164 UChar* prev;
3165
3166 switch (reg->sub_anchor) {
3167 case ANCHOR_BEGIN_LINE:
3168 if (!ON_STR_BEGIN(p)) {
3169 prev = onigenc_get_prev_char_head(reg->enc,
3170 (pprev ? pprev : str), p);
3171 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3172 goto retry_gate;
3173 }
3174 break;
3175
3176 case ANCHOR_END_LINE:
3177 if (ON_STR_END(p)) {
3178 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3179 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3180 (pprev ? pprev : str), p);
3181 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3182 goto retry_gate;
3183 #endif
3184 }
3185 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3186 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3187 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3188 #endif
3189 )
3190 goto retry_gate;
3191 break;
3192 }
3193 }
3194
3195 if (reg->dmax == 0) {
3196 *low = p;
3197 if (low_prev) {
3198 if (*low > s)
3199 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3200 else
3201 *low_prev = onigenc_get_prev_char_head(reg->enc,
3202 (pprev ? pprev : str), p);
3203 }
3204 }
3205 else {
3206 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3207 *low = p - reg->dmax;
3208 if (p - str < reg->dmax) {
3209 *low = (UChar* )str;
3210 if (low_prev)
3211 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
3212 }
3213 else {
3214 if (*low > s) {
3215 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3216 *low, (const UChar** )low_prev);
3217 if (low_prev && IS_NULL(*low_prev))
3218 *low_prev = onigenc_get_prev_char_head(reg->enc,
3219 (pprev ? pprev : s), *low);
3220 }
3221 else {
3222 if (low_prev)
3223 *low_prev = onigenc_get_prev_char_head(reg->enc,
3224 (pprev ? pprev : str), *low);
3225 }
3226 }
3227 }
3228 }
3229 /* no needs to adjust *high, *high is used as range check only */
3230 *high = p - reg->dmin;
3231
3232 #ifdef ONIG_DEBUG_SEARCH
3233 fprintf(stderr,
3234 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3235 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3236 #endif
3237 return 1; /* success */
3238 }
3239
3240 return 0; /* fail */
3241 }
3242
3243 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3244 int** skip));
3245
3246 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3247
3248 static int
backward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)3249 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3250 UChar* s, const UChar* range, UChar* adjrange,
3251 UChar** low, UChar** high)
3252 {
3253 int r;
3254 UChar *p;
3255
3256 range += reg->dmin;
3257 p = s;
3258
3259 retry:
3260 switch (reg->optimize) {
3261 case ONIG_OPTIMIZE_EXACT:
3262 exact_method:
3263 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3264 range, adjrange, end, p);
3265 break;
3266
3267 case ONIG_OPTIMIZE_EXACT_IC:
3268 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3269 reg->exact, reg->exact_end,
3270 range, adjrange, end, p);
3271 break;
3272
3273 case ONIG_OPTIMIZE_EXACT_BM:
3274 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3275 if (IS_NULL(reg->int_map_backward)) {
3276 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3277 goto exact_method;
3278
3279 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3280 &(reg->int_map_backward));
3281 if (r) return r;
3282 }
3283 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3284 end, p);
3285 break;
3286
3287 case ONIG_OPTIMIZE_MAP:
3288 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3289 break;
3290 }
3291
3292 if (p) {
3293 if (reg->sub_anchor) {
3294 UChar* prev;
3295
3296 switch (reg->sub_anchor) {
3297 case ANCHOR_BEGIN_LINE:
3298 if (!ON_STR_BEGIN(p)) {
3299 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3300 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3301 p = prev;
3302 goto retry;
3303 }
3304 }
3305 break;
3306
3307 case ANCHOR_END_LINE:
3308 if (ON_STR_END(p)) {
3309 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3310 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3311 if (IS_NULL(prev)) goto fail;
3312 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3313 p = prev;
3314 goto retry;
3315 }
3316 #endif
3317 }
3318 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3319 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3320 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3321 #endif
3322 ) {
3323 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3324 if (IS_NULL(p)) goto fail;
3325 goto retry;
3326 }
3327 break;
3328 }
3329 }
3330
3331 /* no needs to adjust *high, *high is used as range check only */
3332 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3333 *low = p - reg->dmax;
3334 *high = p - reg->dmin;
3335 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3336 }
3337
3338 #ifdef ONIG_DEBUG_SEARCH
3339 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3340 (int )(*low - str), (int )(*high - str));
3341 #endif
3342 return 1; /* success */
3343 }
3344
3345 fail:
3346 #ifdef ONIG_DEBUG_SEARCH
3347 fprintf(stderr, "backward_search_range: fail.\n");
3348 #endif
3349 return 0; /* fail */
3350 }
3351
3352
3353 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)3354 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3355 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3356 {
3357 int r;
3358 UChar *s, *prev;
3359 OnigMatchArg msa;
3360 const UChar *orig_start = start;
3361 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3362 const UChar *orig_range = range;
3363 #endif
3364
3365 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3366 start:
3367 THREAD_ATOMIC_START;
3368 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3369 ONIG_STATE_INC(reg);
3370 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3371 onig_chain_reduce(reg);
3372 ONIG_STATE_INC(reg);
3373 }
3374 }
3375 else {
3376 int n;
3377
3378 THREAD_ATOMIC_END;
3379 n = 0;
3380 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3381 if (++n > THREAD_PASS_LIMIT_COUNT)
3382 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3383 THREAD_PASS;
3384 }
3385 goto start;
3386 }
3387 THREAD_ATOMIC_END;
3388 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3389
3390 #ifdef ONIG_DEBUG_SEARCH
3391 fprintf(stderr,
3392 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3393 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3394 #endif
3395
3396 if (region
3397 #ifdef USE_POSIX_API_REGION_OPTION
3398 && !IS_POSIX_REGION(option)
3399 #endif
3400 ) {
3401 r = onig_region_resize_clear(region, reg->num_mem + 1);
3402 if (r) goto finish_no_msa;
3403 }
3404
3405 if (start > end || start < str) goto mismatch_no_msa;
3406
3407
3408 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3409 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3410 #define MATCH_AND_RETURN_CHECK(upper_range) \
3411 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3412 if (r != ONIG_MISMATCH) {\
3413 if (r >= 0) {\
3414 if (! IS_FIND_LONGEST(reg->options)) {\
3415 goto match;\
3416 }\
3417 }\
3418 else goto finish; /* error */ \
3419 }
3420 #else
3421 #define MATCH_AND_RETURN_CHECK(upper_range) \
3422 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3423 if (r != ONIG_MISMATCH) {\
3424 if (r >= 0) {\
3425 goto match;\
3426 }\
3427 else goto finish; /* error */ \
3428 }
3429 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3430 #else
3431 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3432 #define MATCH_AND_RETURN_CHECK(none) \
3433 r = match_at(reg, str, end, s, prev, &msa);\
3434 if (r != ONIG_MISMATCH) {\
3435 if (r >= 0) {\
3436 if (! IS_FIND_LONGEST(reg->options)) {\
3437 goto match;\
3438 }\
3439 }\
3440 else goto finish; /* error */ \
3441 }
3442 #else
3443 #define MATCH_AND_RETURN_CHECK(none) \
3444 r = match_at(reg, str, end, s, prev, &msa);\
3445 if (r != ONIG_MISMATCH) {\
3446 if (r >= 0) {\
3447 goto match;\
3448 }\
3449 else goto finish; /* error */ \
3450 }
3451 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3452 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3453
3454
3455 /* anchor optimize: resume search range */
3456 if (reg->anchor != 0 && str < end) {
3457 UChar *min_semi_end, *max_semi_end;
3458
3459 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3460 /* search start-position only */
3461 begin_position:
3462 if (range > start)
3463 range = start + 1;
3464 else
3465 range = start;
3466 }
3467 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3468 /* search str-position only */
3469 if (range > start) {
3470 if (start != str) goto mismatch_no_msa;
3471 range = str + 1;
3472 }
3473 else {
3474 if (range <= str) {
3475 start = str;
3476 range = str;
3477 }
3478 else
3479 goto mismatch_no_msa;
3480 }
3481 }
3482 else if (reg->anchor & ANCHOR_END_BUF) {
3483 min_semi_end = max_semi_end = (UChar* )end;
3484
3485 end_buf:
3486 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3487 goto mismatch_no_msa;
3488
3489 if (range > start) {
3490 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3491 start = min_semi_end - reg->anchor_dmax;
3492 if (start < end)
3493 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3494 else { /* match with empty at end */
3495 start = onigenc_get_prev_char_head(reg->enc, str, end);
3496 }
3497 }
3498 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3499 range = max_semi_end - reg->anchor_dmin + 1;
3500 }
3501
3502 if (start >= range) goto mismatch_no_msa;
3503 }
3504 else {
3505 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3506 range = min_semi_end - reg->anchor_dmax;
3507 }
3508 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3509 start = max_semi_end - reg->anchor_dmin;
3510 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3511 }
3512 if (range > start) goto mismatch_no_msa;
3513 }
3514 }
3515 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3516 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3517
3518 max_semi_end = (UChar* )end;
3519 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3520 min_semi_end = pre_end;
3521
3522 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3523 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3524 if (IS_NOT_NULL(pre_end) &&
3525 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3526 min_semi_end = pre_end;
3527 }
3528 #endif
3529 if (min_semi_end > str && start <= min_semi_end) {
3530 goto end_buf;
3531 }
3532 }
3533 else {
3534 min_semi_end = (UChar* )end;
3535 goto end_buf;
3536 }
3537 }
3538 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3539 goto begin_position;
3540 }
3541 }
3542 else if (str == end) { /* empty string */
3543 static const UChar* address_for_empty_string = (UChar* )"";
3544
3545 #ifdef ONIG_DEBUG_SEARCH
3546 fprintf(stderr, "onig_search: empty string.\n");
3547 #endif
3548
3549 if (reg->threshold_len == 0) {
3550 start = end = str = address_for_empty_string;
3551 s = (UChar* )start;
3552 prev = (UChar* )NULL;
3553
3554 MATCH_ARG_INIT(msa, option, region, start);
3555 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3556 msa.state_check_buff = (void* )0;
3557 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
3558 #endif
3559 MATCH_AND_RETURN_CHECK(end);
3560 goto mismatch;
3561 }
3562 goto mismatch_no_msa;
3563 }
3564
3565 #ifdef ONIG_DEBUG_SEARCH
3566 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3567 (int )(end - str), (int )(start - str), (int )(range - str));
3568 #endif
3569
3570 MATCH_ARG_INIT(msa, option, region, orig_start);
3571 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3572 {
3573 int offset = (MIN(start, range) - str);
3574 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3575 }
3576 #endif
3577
3578 s = (UChar* )start;
3579 if (range > start) { /* forward search */
3580 if (s > str)
3581 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3582 else
3583 prev = (UChar* )NULL;
3584
3585 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3586 UChar *sch_range, *low, *high, *low_prev;
3587
3588 sch_range = (UChar* )range;
3589 if (reg->dmax != 0) {
3590 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3591 sch_range = (UChar* )end;
3592 else {
3593 sch_range += reg->dmax;
3594 if (sch_range > end) sch_range = (UChar* )end;
3595 }
3596 }
3597
3598 if ((end - start) < reg->threshold_len)
3599 goto mismatch;
3600
3601 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3602 do {
3603 if (! forward_search_range(reg, str, end, s, sch_range,
3604 &low, &high, &low_prev)) goto mismatch;
3605 if (s < low) {
3606 s = low;
3607 prev = low_prev;
3608 }
3609 while (s <= high) {
3610 MATCH_AND_RETURN_CHECK(orig_range);
3611 prev = s;
3612 s += enclen(reg->enc, s);
3613 }
3614 } while (s < range);
3615 goto mismatch;
3616 }
3617 else { /* check only. */
3618 if (! forward_search_range(reg, str, end, s, sch_range,
3619 &low, &high, (UChar** )NULL)) goto mismatch;
3620
3621 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3622 do {
3623 MATCH_AND_RETURN_CHECK(orig_range);
3624 prev = s;
3625 s += enclen(reg->enc, s);
3626
3627 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3628 prev = s;
3629 s += enclen(reg->enc, s);
3630 }
3631 } while (s < range);
3632 goto mismatch;
3633 }
3634 }
3635 }
3636
3637 do {
3638 MATCH_AND_RETURN_CHECK(orig_range);
3639 prev = s;
3640 s += enclen(reg->enc, s);
3641 } while (s < range);
3642
3643 if (s == range) { /* because empty match with /$/. */
3644 MATCH_AND_RETURN_CHECK(orig_range);
3645 }
3646 }
3647 else { /* backward search */
3648 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3649 if (orig_start < end)
3650 orig_start += enclen(reg->enc, orig_start); /* is upper range */
3651 #endif
3652
3653 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3654 UChar *low, *high, *adjrange, *sch_start;
3655
3656 if (range < end)
3657 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3658 else
3659 adjrange = (UChar* )end;
3660
3661 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3662 (end - range) >= reg->threshold_len) {
3663 do {
3664 sch_start = s + reg->dmax;
3665 if (sch_start > end) sch_start = (UChar* )end;
3666 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3667 &low, &high) <= 0)
3668 goto mismatch;
3669
3670 if (s > high)
3671 s = high;
3672
3673 while (s >= low) {
3674 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3675 MATCH_AND_RETURN_CHECK(orig_start);
3676 s = prev;
3677 }
3678 } while (s >= range);
3679 goto mismatch;
3680 }
3681 else { /* check only. */
3682 if ((end - range) < reg->threshold_len) goto mismatch;
3683
3684 sch_start = s;
3685 if (reg->dmax != 0) {
3686 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3687 sch_start = (UChar* )end;
3688 else {
3689 sch_start += reg->dmax;
3690 if (sch_start > end) sch_start = (UChar* )end;
3691 else
3692 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3693 start, sch_start);
3694 }
3695 }
3696 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3697 &low, &high) <= 0) goto mismatch;
3698 }
3699 }
3700
3701 do {
3702 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3703 MATCH_AND_RETURN_CHECK(orig_start);
3704 s = prev;
3705 } while (s >= range);
3706 }
3707
3708 mismatch:
3709 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3710 if (IS_FIND_LONGEST(reg->options)) {
3711 if (msa.best_len >= 0) {
3712 s = msa.best_s;
3713 goto match;
3714 }
3715 }
3716 #endif
3717 r = ONIG_MISMATCH;
3718
3719 finish:
3720 MATCH_ARG_FREE(msa);
3721 ONIG_STATE_DEC_THREAD(reg);
3722
3723 /* If result is mismatch and no FIND_NOT_EMPTY option,
3724 then the region is not setted in match_at(). */
3725 if (IS_FIND_NOT_EMPTY(reg->options) && region
3726 #ifdef USE_POSIX_API_REGION_OPTION
3727 && !IS_POSIX_REGION(option)
3728 #endif
3729 ) {
3730 onig_region_clear(region);
3731 }
3732
3733 #ifdef ONIG_DEBUG
3734 if (r != ONIG_MISMATCH)
3735 fprintf(stderr, "onig_search: error %d\n", r);
3736 #endif
3737 return r;
3738
3739 mismatch_no_msa:
3740 r = ONIG_MISMATCH;
3741 finish_no_msa:
3742 ONIG_STATE_DEC_THREAD(reg);
3743 #ifdef ONIG_DEBUG
3744 if (r != ONIG_MISMATCH)
3745 fprintf(stderr, "onig_search: error %d\n", r);
3746 #endif
3747 return r;
3748
3749 match:
3750 ONIG_STATE_DEC_THREAD(reg);
3751 MATCH_ARG_FREE(msa);
3752 return s - str;
3753 }
3754
3755 extern OnigEncoding
onig_get_encoding(regex_t * reg)3756 onig_get_encoding(regex_t* reg)
3757 {
3758 return reg->enc;
3759 }
3760
3761 extern OnigOptionType
onig_get_options(regex_t * reg)3762 onig_get_options(regex_t* reg)
3763 {
3764 return reg->options;
3765 }
3766
3767 extern OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)3768 onig_get_case_fold_flag(regex_t* reg)
3769 {
3770 return reg->case_fold_flag;
3771 }
3772
3773 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)3774 onig_get_syntax(regex_t* reg)
3775 {
3776 return reg->syntax;
3777 }
3778
3779 extern int
onig_number_of_captures(regex_t * reg)3780 onig_number_of_captures(regex_t* reg)
3781 {
3782 return reg->num_mem;
3783 }
3784
3785 extern int
onig_number_of_capture_histories(regex_t * reg)3786 onig_number_of_capture_histories(regex_t* reg)
3787 {
3788 #ifdef USE_CAPTURE_HISTORY
3789 int i, n;
3790
3791 n = 0;
3792 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3793 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3794 n++;
3795 }
3796 return n;
3797 #else
3798 return 0;
3799 #endif
3800 }
3801
3802 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)3803 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3804 {
3805 *to = *from;
3806 }
3807
3808