1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
33
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode* node);
42
43 static void
history_tree_clear(OnigCaptureTreeNode * node)44 history_tree_clear(OnigCaptureTreeNode* node)
45 {
46 int i;
47
48 if (IS_NOT_NULL(node)) {
49 for (i = 0; i < node->num_childs; i++) {
50 if (IS_NOT_NULL(node->childs[i])) {
51 history_tree_free(node->childs[i]);
52 }
53 }
54 for (i = 0; i < node->allocated; i++) {
55 node->childs[i] = (OnigCaptureTreeNode* )0;
56 }
57 node->num_childs = 0;
58 node->beg = ONIG_REGION_NOTPOS;
59 node->end = ONIG_REGION_NOTPOS;
60 node->group = -1;
61 }
62 }
63
64 static void
history_tree_free(OnigCaptureTreeNode * node)65 history_tree_free(OnigCaptureTreeNode* node)
66 {
67 history_tree_clear(node);
68 xfree(node);
69 }
70
71 static void
history_root_free(OnigRegion * r)72 history_root_free(OnigRegion* r)
73 {
74 if (IS_NOT_NULL(r->history_root)) {
75 history_tree_free(r->history_root);
76 r->history_root = (OnigCaptureTreeNode* )0;
77 }
78 }
79
80 static OnigCaptureTreeNode*
history_node_new(void)81 history_node_new(void)
82 {
83 OnigCaptureTreeNode* node;
84
85 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
86 CHECK_NULL_RETURN(node);
87 node->childs = (OnigCaptureTreeNode** )0;
88 node->allocated = 0;
89 node->num_childs = 0;
90 node->group = -1;
91 node->beg = ONIG_REGION_NOTPOS;
92 node->end = ONIG_REGION_NOTPOS;
93
94 return node;
95 }
96
97 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)98 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
99 {
100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
101
102 if (parent->num_childs >= parent->allocated) {
103 int n, i;
104
105 if (IS_NULL(parent->childs)) {
106 n = HISTORY_TREE_INIT_ALLOC_SIZE;
107 parent->childs =
108 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
109 }
110 else {
111 n = parent->allocated * 2;
112 parent->childs =
113 (OnigCaptureTreeNode** )xrealloc(parent->childs,
114 sizeof(OnigCaptureTreeNode*) * n);
115 }
116 CHECK_NULL_RETURN_MEMERR(parent->childs);
117 for (i = parent->allocated; i < n; i++) {
118 parent->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 parent->allocated = n;
121 }
122
123 parent->childs[parent->num_childs] = child;
124 parent->num_childs++;
125 return 0;
126 }
127
128 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)129 history_tree_clone(OnigCaptureTreeNode* node)
130 {
131 int i;
132 OnigCaptureTreeNode *clone, *child;
133
134 clone = history_node_new();
135 CHECK_NULL_RETURN(clone);
136
137 clone->beg = node->beg;
138 clone->end = node->end;
139 for (i = 0; i < node->num_childs; i++) {
140 child = history_tree_clone(node->childs[i]);
141 if (IS_NULL(child)) {
142 history_tree_free(clone);
143 return (OnigCaptureTreeNode* )0;
144 }
145 history_tree_add_child(clone, child);
146 }
147
148 return clone;
149 }
150
151 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)152 onig_get_capture_tree(OnigRegion* region)
153 {
154 return region->history_root;
155 }
156 #endif /* USE_CAPTURE_HISTORY */
157
158 extern void
onig_region_clear(OnigRegion * region)159 onig_region_clear(OnigRegion* region)
160 {
161 int i;
162
163 for (i = 0; i < region->num_regs; i++) {
164 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
165 }
166 #ifdef USE_CAPTURE_HISTORY
167 history_root_free(region);
168 #endif
169 }
170
171 extern int
onig_region_resize(OnigRegion * region,int n)172 onig_region_resize(OnigRegion* region, int n)
173 {
174 region->num_regs = n;
175
176 if (n < ONIG_NREGION)
177 n = ONIG_NREGION;
178
179 if (region->allocated == 0) {
180 region->beg = (int* )xmalloc(n * sizeof(int));
181 region->end = (int* )xmalloc(n * sizeof(int));
182
183 if (region->beg == 0 || region->end == 0)
184 return ONIGERR_MEMORY;
185
186 region->allocated = n;
187 }
188 else if (region->allocated < n) {
189 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
190 region->end = (int* )xrealloc(region->end, n * sizeof(int));
191
192 if (region->beg == 0 || region->end == 0)
193 return ONIGERR_MEMORY;
194
195 region->allocated = n;
196 }
197
198 return 0;
199 }
200
201 static int
onig_region_resize_clear(OnigRegion * region,int n)202 onig_region_resize_clear(OnigRegion* region, int n)
203 {
204 int r;
205
206 r = onig_region_resize(region, n);
207 if (r != 0) return r;
208 onig_region_clear(region);
209 return 0;
210 }
211
212 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)213 onig_region_set(OnigRegion* region, int at, int beg, int end)
214 {
215 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
216
217 if (at >= region->allocated) {
218 int r = onig_region_resize(region, at + 1);
219 if (r < 0) return r;
220 }
221
222 region->beg[at] = beg;
223 region->end[at] = end;
224 return 0;
225 }
226
227 extern void
onig_region_init(OnigRegion * region)228 onig_region_init(OnigRegion* region)
229 {
230 region->num_regs = 0;
231 region->allocated = 0;
232 region->beg = (int* )0;
233 region->end = (int* )0;
234 region->history_root = (OnigCaptureTreeNode* )0;
235 }
236
237 extern OnigRegion*
onig_region_new(void)238 onig_region_new(void)
239 {
240 OnigRegion* r;
241
242 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
243 onig_region_init(r);
244 return r;
245 }
246
247 extern void
onig_region_free(OnigRegion * r,int free_self)248 onig_region_free(OnigRegion* r, int free_self)
249 {
250 if (r) {
251 if (r->allocated > 0) {
252 if (r->beg) xfree(r->beg);
253 if (r->end) xfree(r->end);
254 r->allocated = 0;
255 }
256 #ifdef USE_CAPTURE_HISTORY
257 history_root_free(r);
258 #endif
259 if (free_self) xfree(r);
260 }
261 }
262
263 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)264 onig_region_copy(OnigRegion* to, OnigRegion* from)
265 {
266 #define RREGC_SIZE (sizeof(int) * from->num_regs)
267 int i;
268
269 if (to == from) return;
270
271 if (to->allocated == 0) {
272 if (from->num_regs > 0) {
273 to->beg = (int* )xmalloc(RREGC_SIZE);
274 to->end = (int* )xmalloc(RREGC_SIZE);
275 to->allocated = from->num_regs;
276 }
277 }
278 else if (to->allocated < from->num_regs) {
279 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
280 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
281 to->allocated = from->num_regs;
282 }
283
284 for (i = 0; i < from->num_regs; i++) {
285 to->beg[i] = from->beg[i];
286 to->end[i] = from->end[i];
287 }
288 to->num_regs = from->num_regs;
289
290 #ifdef USE_CAPTURE_HISTORY
291 history_root_free(to);
292
293 if (IS_NOT_NULL(from->history_root)) {
294 to->history_root = history_tree_clone(from->history_root);
295 }
296 #endif
297 }
298
299
300 /** stack **/
301 #define INVALID_STACK_INDEX -1
302
303 /* stack type */
304 /* used by normal-POP */
305 #define STK_ALT 0x0001
306 #define STK_LOOK_BEHIND_NOT 0x0002
307 #define STK_POS_NOT 0x0003
308 /* handled by normal-POP */
309 #define STK_MEM_START 0x0100
310 #define STK_MEM_END 0x8200
311 #define STK_REPEAT_INC 0x0300
312 #define STK_STATE_CHECK_MARK 0x1000
313 /* avoided by normal-POP */
314 #define STK_NULL_CHECK_START 0x3000
315 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
316 #define STK_MEM_END_MARK 0x8400
317 #define STK_POS 0x0500 /* used when POP-POS */
318 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
319 #define STK_REPEAT 0x0700
320 #define STK_CALL_FRAME 0x0800
321 #define STK_RETURN 0x0900
322 #define STK_VOID 0x0a00 /* for fill a blank */
323
324 /* stack type check mask */
325 #define STK_MASK_POP_USED 0x00ff
326 #define STK_MASK_TO_VOID_TARGET 0x10ff
327 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
328
329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
330 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
331 (msa).stack_p = (void* )0;\
332 (msa).options = (arg_option);\
333 (msa).region = (arg_region);\
334 (msa).start = (arg_start);\
335 (msa).best_len = ONIG_MISMATCH;\
336 } while(0)
337 #else
338 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
339 (msa).stack_p = (void* )0;\
340 (msa).options = (arg_option);\
341 (msa).region = (arg_region);\
342 (msa).start = (arg_start);\
343 } while(0)
344 #endif
345
346 #ifdef USE_COMBINATION_EXPLOSION_CHECK
347
348 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
349
350 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
351 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
352 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
353 offset = ((offset) * (state_num)) >> 3;\
354 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
355 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
356 (msa).state_check_buff = (void* )xmalloc(size);\
357 else \
358 (msa).state_check_buff = (void* )xalloca(size);\
359 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
360 (size_t )(size - (offset))); \
361 (msa).state_check_buff_size = size;\
362 }\
363 else {\
364 (msa).state_check_buff = (void* )0;\
365 (msa).state_check_buff_size = 0;\
366 }\
367 }\
368 else {\
369 (msa).state_check_buff = (void* )0;\
370 (msa).state_check_buff_size = 0;\
371 }\
372 } while(0)
373
374 #define MATCH_ARG_FREE(msa) do {\
375 if ((msa).stack_p) xfree((msa).stack_p);\
376 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
377 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
378 }\
379 } while(0)
380 #else
381 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
382 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
383 #endif
384
385
386
387 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
388 if (msa->stack_p) {\
389 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
390 stk_alloc = (OnigStackType* )(msa->stack_p);\
391 stk_base = stk_alloc;\
392 stk = stk_base;\
393 stk_end = stk_base + msa->stack_n;\
394 }\
395 else {\
396 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
397 + sizeof(OnigStackType) * (stack_num));\
398 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
399 stk_base = stk_alloc;\
400 stk = stk_base;\
401 stk_end = stk_base + (stack_num);\
402 }\
403 } while(0)
404
405 #define STACK_SAVE do{\
406 if (stk_base != stk_alloc) {\
407 msa->stack_p = stk_base;\
408 msa->stack_n = stk_end - stk_base;\
409 };\
410 } while(0)
411
412 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
413
414 extern unsigned int
onig_get_match_stack_limit_size(void)415 onig_get_match_stack_limit_size(void)
416 {
417 return MatchStackLimitSize;
418 }
419
420 extern int
onig_set_match_stack_limit_size(unsigned int size)421 onig_set_match_stack_limit_size(unsigned int size)
422 {
423 MatchStackLimitSize = size;
424 return 0;
425 }
426
427 static int
stack_double(OnigStackType ** arg_stk_base,OnigStackType ** arg_stk_end,OnigStackType ** arg_stk,OnigStackType * stk_alloc,OnigMatchArg * msa)428 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
429 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
430 {
431 unsigned int n;
432 OnigStackType *x, *stk_base, *stk_end, *stk;
433
434 stk_base = *arg_stk_base;
435 stk_end = *arg_stk_end;
436 stk = *arg_stk;
437
438 n = stk_end - stk_base;
439 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
440 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
441 if (IS_NULL(x)) {
442 STACK_SAVE;
443 return ONIGERR_MEMORY;
444 }
445 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
446 n *= 2;
447 }
448 else {
449 n *= 2;
450 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
451 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
452 return ONIGERR_MATCH_STACK_LIMIT_OVER;
453 else
454 n = MatchStackLimitSize;
455 }
456 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
457 if (IS_NULL(x)) {
458 STACK_SAVE;
459 return ONIGERR_MEMORY;
460 }
461 }
462 *arg_stk = x + (stk - stk_base);
463 *arg_stk_base = x;
464 *arg_stk_end = x + n;
465 return 0;
466 }
467
468 #define STACK_ENSURE(n) do {\
469 if (stk_end - stk < (n)) {\
470 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
471 if (r != 0) { STACK_SAVE; return r; } \
472 }\
473 } while(0)
474
475 #define STACK_AT(index) (stk_base + (index))
476 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
477
478 #define STACK_PUSH_TYPE(stack_type) do {\
479 STACK_ENSURE(1);\
480 stk->type = (stack_type);\
481 STACK_INC;\
482 } while(0)
483
484 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
485
486 #ifdef USE_COMBINATION_EXPLOSION_CHECK
487 #define STATE_CHECK_POS(s,snum) \
488 (((s) - str) * num_comb_exp_check + ((snum) - 1))
489 #define STATE_CHECK_VAL(v,snum) do {\
490 if (state_check_buff != NULL) {\
491 int x = STATE_CHECK_POS(s,snum);\
492 (v) = state_check_buff[x/8] & (1<<(x%8));\
493 }\
494 else (v) = 0;\
495 } while(0)
496
497
498 #define ELSE_IF_STATE_CHECK_MARK(stk) \
499 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
500 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
501 state_check_buff[x/8] |= (1<<(x%8)); \
502 }
503
504 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
505 STACK_ENSURE(1);\
506 stk->type = (stack_type);\
507 stk->u.state.pcode = (pat);\
508 stk->u.state.pstr = (s);\
509 stk->u.state.pstr_prev = (sprev);\
510 stk->u.state.state_check = 0;\
511 STACK_INC;\
512 } while(0)
513
514 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
515 stk->type = (stack_type);\
516 stk->u.state.pcode = (pat);\
517 stk->u.state.state_check = 0;\
518 STACK_INC;\
519 } while(0)
520
521 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
522 STACK_ENSURE(1);\
523 stk->type = STK_ALT;\
524 stk->u.state.pcode = (pat);\
525 stk->u.state.pstr = (s);\
526 stk->u.state.pstr_prev = (sprev);\
527 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
528 STACK_INC;\
529 } while(0)
530
531 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
532 if (state_check_buff != NULL) {\
533 STACK_ENSURE(1);\
534 stk->type = STK_STATE_CHECK_MARK;\
535 stk->u.state.pstr = (s);\
536 stk->u.state.state_check = (snum);\
537 STACK_INC;\
538 }\
539 } while(0)
540
541 #else /* USE_COMBINATION_EXPLOSION_CHECK */
542
543 #define ELSE_IF_STATE_CHECK_MARK(stk)
544
545 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
546 STACK_ENSURE(1);\
547 stk->type = (stack_type);\
548 stk->u.state.pcode = (pat);\
549 stk->u.state.pstr = (s);\
550 stk->u.state.pstr_prev = (sprev);\
551 STACK_INC;\
552 } while(0)
553
554 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
555 stk->type = (stack_type);\
556 stk->u.state.pcode = (pat);\
557 STACK_INC;\
558 } while(0)
559 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
560
561 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
562 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
563 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
564 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
565 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
566 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
567
568 #define STACK_PUSH_REPEAT(id, pat) do {\
569 STACK_ENSURE(1);\
570 stk->type = STK_REPEAT;\
571 stk->u.repeat.num = (id);\
572 stk->u.repeat.pcode = (pat);\
573 stk->u.repeat.count = 0;\
574 STACK_INC;\
575 } while(0)
576
577 #define STACK_PUSH_REPEAT_INC(sindex) do {\
578 STACK_ENSURE(1);\
579 stk->type = STK_REPEAT_INC;\
580 stk->u.repeat_inc.si = (sindex);\
581 STACK_INC;\
582 } while(0)
583
584 #define STACK_PUSH_MEM_START(mnum, s) do {\
585 STACK_ENSURE(1);\
586 stk->type = STK_MEM_START;\
587 stk->u.mem.num = (mnum);\
588 stk->u.mem.pstr = (s);\
589 stk->u.mem.start = mem_start_stk[mnum];\
590 stk->u.mem.end = mem_end_stk[mnum];\
591 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
592 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
593 STACK_INC;\
594 } while(0)
595
596 #define STACK_PUSH_MEM_END(mnum, s) do {\
597 STACK_ENSURE(1);\
598 stk->type = STK_MEM_END;\
599 stk->u.mem.num = (mnum);\
600 stk->u.mem.pstr = (s);\
601 stk->u.mem.start = mem_start_stk[mnum];\
602 stk->u.mem.end = mem_end_stk[mnum];\
603 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
604 STACK_INC;\
605 } while(0)
606
607 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
608 STACK_ENSURE(1);\
609 stk->type = STK_MEM_END_MARK;\
610 stk->u.mem.num = (mnum);\
611 STACK_INC;\
612 } while(0)
613
614 #define STACK_GET_MEM_START(mnum, k) do {\
615 int level = 0;\
616 k = stk;\
617 while (k > stk_base) {\
618 k--;\
619 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
620 && k->u.mem.num == (mnum)) {\
621 level++;\
622 }\
623 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
624 if (level == 0) break;\
625 level--;\
626 }\
627 }\
628 } while(0)
629
630 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
631 int level = 0;\
632 while (k < stk) {\
633 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
634 if (level == 0) (start) = k->u.mem.pstr;\
635 level++;\
636 }\
637 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
638 level--;\
639 if (level == 0) {\
640 (end) = k->u.mem.pstr;\
641 break;\
642 }\
643 }\
644 k++;\
645 }\
646 } while(0)
647
648 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
649 STACK_ENSURE(1);\
650 stk->type = STK_NULL_CHECK_START;\
651 stk->u.null_check.num = (cnum);\
652 stk->u.null_check.pstr = (s);\
653 STACK_INC;\
654 } while(0)
655
656 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
657 STACK_ENSURE(1);\
658 stk->type = STK_NULL_CHECK_END;\
659 stk->u.null_check.num = (cnum);\
660 STACK_INC;\
661 } while(0)
662
663 #define STACK_PUSH_CALL_FRAME(pat) do {\
664 STACK_ENSURE(1);\
665 stk->type = STK_CALL_FRAME;\
666 stk->u.call_frame.ret_addr = (pat);\
667 STACK_INC;\
668 } while(0)
669
670 #define STACK_PUSH_RETURN do {\
671 STACK_ENSURE(1);\
672 stk->type = STK_RETURN;\
673 STACK_INC;\
674 } while(0)
675
676
677 #ifdef ONIG_DEBUG
678 #define STACK_BASE_CHECK(p, at) \
679 if ((p) < stk_base) {\
680 fprintf(stderr, "at %s\n", at);\
681 goto stack_error;\
682 }
683 #else
684 #define STACK_BASE_CHECK(p, at)
685 #endif
686
687 #define STACK_POP_ONE do {\
688 stk--;\
689 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
690 } while(0)
691
692 #define STACK_POP do {\
693 switch (pop_level) {\
694 case STACK_POP_LEVEL_FREE:\
695 while (1) {\
696 stk--;\
697 STACK_BASE_CHECK(stk, "STACK_POP"); \
698 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
699 ELSE_IF_STATE_CHECK_MARK(stk);\
700 }\
701 break;\
702 case STACK_POP_LEVEL_MEM_START:\
703 while (1) {\
704 stk--;\
705 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
706 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
707 else if (stk->type == STK_MEM_START) {\
708 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
709 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
710 }\
711 ELSE_IF_STATE_CHECK_MARK(stk);\
712 }\
713 break;\
714 default:\
715 while (1) {\
716 stk--;\
717 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
718 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
719 else if (stk->type == STK_MEM_START) {\
720 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
721 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
722 }\
723 else if (stk->type == STK_REPEAT_INC) {\
724 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
725 }\
726 else if (stk->type == STK_MEM_END) {\
727 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
728 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
729 }\
730 ELSE_IF_STATE_CHECK_MARK(stk);\
731 }\
732 break;\
733 }\
734 } while(0)
735
736 #define STACK_POP_TIL_POS_NOT do {\
737 while (1) {\
738 stk--;\
739 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
740 if (stk->type == STK_POS_NOT) break;\
741 else if (stk->type == STK_MEM_START) {\
742 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
743 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
744 }\
745 else if (stk->type == STK_REPEAT_INC) {\
746 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
747 }\
748 else if (stk->type == STK_MEM_END) {\
749 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
750 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
751 }\
752 ELSE_IF_STATE_CHECK_MARK(stk);\
753 }\
754 } while(0)
755
756 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
757 while (1) {\
758 stk--;\
759 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
760 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
761 else if (stk->type == STK_MEM_START) {\
762 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
763 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
764 }\
765 else if (stk->type == STK_REPEAT_INC) {\
766 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
767 }\
768 else if (stk->type == STK_MEM_END) {\
769 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
770 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
771 }\
772 ELSE_IF_STATE_CHECK_MARK(stk);\
773 }\
774 } while(0)
775
776 #define STACK_POS_END(k) do {\
777 k = stk;\
778 while (1) {\
779 k--;\
780 STACK_BASE_CHECK(k, "STACK_POS_END"); \
781 if (IS_TO_VOID_TARGET(k)) {\
782 k->type = STK_VOID;\
783 }\
784 else if (k->type == STK_POS) {\
785 k->type = STK_VOID;\
786 break;\
787 }\
788 }\
789 } while(0)
790
791 #define STACK_STOP_BT_END do {\
792 OnigStackType *k = stk;\
793 while (1) {\
794 k--;\
795 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
796 if (IS_TO_VOID_TARGET(k)) {\
797 k->type = STK_VOID;\
798 }\
799 else if (k->type == STK_STOP_BT) {\
800 k->type = STK_VOID;\
801 break;\
802 }\
803 }\
804 } while(0)
805
806 #define STACK_NULL_CHECK(isnull,id,s) do {\
807 OnigStackType* k = stk;\
808 while (1) {\
809 k--;\
810 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
811 if (k->type == STK_NULL_CHECK_START) {\
812 if (k->u.null_check.num == (id)) {\
813 (isnull) = (k->u.null_check.pstr == (s));\
814 break;\
815 }\
816 }\
817 }\
818 } while(0)
819
820 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
821 int level = 0;\
822 OnigStackType* k = stk;\
823 while (1) {\
824 k--;\
825 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
826 if (k->type == STK_NULL_CHECK_START) {\
827 if (k->u.null_check.num == (id)) {\
828 if (level == 0) {\
829 (isnull) = (k->u.null_check.pstr == (s));\
830 break;\
831 }\
832 else level--;\
833 }\
834 }\
835 else if (k->type == STK_NULL_CHECK_END) {\
836 level++;\
837 }\
838 }\
839 } while(0)
840
841 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
842 OnigStackType* k = stk;\
843 while (1) {\
844 k--;\
845 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
846 if (k->type == STK_NULL_CHECK_START) {\
847 if (k->u.null_check.num == (id)) {\
848 if (k->u.null_check.pstr != (s)) {\
849 (isnull) = 0;\
850 break;\
851 }\
852 else {\
853 UChar* endp;\
854 (isnull) = 1;\
855 while (k < stk) {\
856 if (k->type == STK_MEM_START) {\
857 if (k->u.mem.end == INVALID_STACK_INDEX) {\
858 (isnull) = 0; break;\
859 }\
860 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
861 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
862 else\
863 endp = (UChar* )k->u.mem.end;\
864 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
865 (isnull) = 0; break;\
866 }\
867 else if (endp != s) {\
868 (isnull) = -1; /* empty, but position changed */ \
869 }\
870 }\
871 k++;\
872 }\
873 break;\
874 }\
875 }\
876 }\
877 }\
878 } while(0)
879
880 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
881 int level = 0;\
882 OnigStackType* k = stk;\
883 while (1) {\
884 k--;\
885 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
886 if (k->type == STK_NULL_CHECK_START) {\
887 if (k->u.null_check.num == (id)) {\
888 if (level == 0) {\
889 if (k->u.null_check.pstr != (s)) {\
890 (isnull) = 0;\
891 break;\
892 }\
893 else {\
894 UChar* endp;\
895 (isnull) = 1;\
896 while (k < stk) {\
897 if (k->type == STK_MEM_START) {\
898 if (k->u.mem.end == INVALID_STACK_INDEX) {\
899 (isnull) = 0; break;\
900 }\
901 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
902 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
903 else\
904 endp = (UChar* )k->u.mem.end;\
905 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
906 (isnull) = 0; break;\
907 }\
908 else if (endp != s) {\
909 (isnull) = -1; /* empty, but position changed */ \
910 }\
911 }\
912 k++;\
913 }\
914 break;\
915 }\
916 }\
917 else {\
918 level--;\
919 }\
920 }\
921 }\
922 else if (k->type == STK_NULL_CHECK_END) {\
923 if (k->u.null_check.num == (id)) level++;\
924 }\
925 }\
926 } while(0)
927
928 #define STACK_GET_REPEAT(id, k) do {\
929 int level = 0;\
930 k = stk;\
931 while (1) {\
932 k--;\
933 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
934 if (k->type == STK_REPEAT) {\
935 if (level == 0) {\
936 if (k->u.repeat.num == (id)) {\
937 break;\
938 }\
939 }\
940 }\
941 else if (k->type == STK_CALL_FRAME) level--;\
942 else if (k->type == STK_RETURN) level++;\
943 }\
944 } while(0)
945
946 #define STACK_RETURN(addr) do {\
947 int level = 0;\
948 OnigStackType* k = stk;\
949 while (1) {\
950 k--;\
951 STACK_BASE_CHECK(k, "STACK_RETURN"); \
952 if (k->type == STK_CALL_FRAME) {\
953 if (level == 0) {\
954 (addr) = k->u.call_frame.ret_addr;\
955 break;\
956 }\
957 else level--;\
958 }\
959 else if (k->type == STK_RETURN)\
960 level++;\
961 }\
962 } while(0)
963
964
965 #define STRING_CMP(s1,s2,len) do {\
966 while (len-- > 0) {\
967 if (*s1++ != *s2++) goto fail;\
968 }\
969 } while(0)
970
971 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
972 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
973 goto fail; \
974 } while(0)
975
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)976 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
977 UChar* s1, UChar** ps2, int mblen)
978 {
979 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
980 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
981 UChar *p1, *p2, *end1, *s2, *end2;
982 int len1, len2;
983
984 s2 = *ps2;
985 end1 = s1 + mblen;
986 end2 = s2 + mblen;
987 while (s1 < end1) {
988 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
989 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
990 if (len1 != len2) return 0;
991 p1 = buf1;
992 p2 = buf2;
993 while (len1-- > 0) {
994 if (*p1 != *p2) return 0;
995 p1++;
996 p2++;
997 }
998 }
999
1000 *ps2 = s2;
1001 return 1;
1002 }
1003
1004 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1005 is_fail = 0;\
1006 while (len-- > 0) {\
1007 if (*s1++ != *s2++) {\
1008 is_fail = 1; break;\
1009 }\
1010 }\
1011 } while(0)
1012
1013 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1014 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1015 is_fail = 1; \
1016 else \
1017 is_fail = 0; \
1018 } while(0)
1019
1020
1021 #define IS_EMPTY_STR (str == end)
1022 #define ON_STR_BEGIN(s) ((s) == str)
1023 #define ON_STR_END(s) ((s) == end)
1024 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1025 #define DATA_ENSURE_CHECK1 (s < right_range)
1026 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1027 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1028 #else
1029 #define DATA_ENSURE_CHECK1 (s < end)
1030 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1031 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1032 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1033
1034
1035 #ifdef USE_CAPTURE_HISTORY
1036 static int
make_capture_history_tree(OnigCaptureTreeNode * node,OnigStackType ** kp,OnigStackType * stk_top,UChar * str,regex_t * reg)1037 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1038 OnigStackType* stk_top, UChar* str, regex_t* reg)
1039 {
1040 int n, r;
1041 OnigCaptureTreeNode* child;
1042 OnigStackType* k = *kp;
1043
1044 while (k < stk_top) {
1045 if (k->type == STK_MEM_START) {
1046 n = k->u.mem.num;
1047 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1048 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1049 child = history_node_new();
1050 CHECK_NULL_RETURN_MEMERR(child);
1051 child->group = n;
1052 child->beg = (int )(k->u.mem.pstr - str);
1053 r = history_tree_add_child(node, child);
1054 if (r != 0) return r;
1055 *kp = (k + 1);
1056 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1057 if (r != 0) return r;
1058
1059 k = *kp;
1060 child->end = (int )(k->u.mem.pstr - str);
1061 }
1062 }
1063 else if (k->type == STK_MEM_END) {
1064 if (k->u.mem.num == node->group) {
1065 node->end = (int )(k->u.mem.pstr - str);
1066 *kp = k;
1067 return 0;
1068 }
1069 }
1070 k++;
1071 }
1072
1073 return 1; /* 1: root node ending. */
1074 }
1075 #endif
1076
1077 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,UChar * memp)1078 static int mem_is_in_memp(int mem, int num, UChar* memp)
1079 {
1080 int i;
1081 MemNumType m;
1082
1083 for (i = 0; i < num; i++) {
1084 GET_MEMNUM_INC(m, memp);
1085 if (mem == (int )m) return 1;
1086 }
1087 return 0;
1088 }
1089
backref_match_at_nested_level(regex_t * reg,OnigStackType * top,OnigStackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,UChar * memp,UChar ** s,const UChar * send)1090 static int backref_match_at_nested_level(regex_t* reg
1091 , OnigStackType* top, OnigStackType* stk_base
1092 , int ignore_case, int case_fold_flag
1093 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1094 {
1095 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1096 int level;
1097 OnigStackType* k;
1098
1099 level = 0;
1100 k = top;
1101 k--;
1102 while (k >= stk_base) {
1103 if (k->type == STK_CALL_FRAME) {
1104 level--;
1105 }
1106 else if (k->type == STK_RETURN) {
1107 level++;
1108 }
1109 else if (level == nest) {
1110 if (k->type == STK_MEM_START) {
1111 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1112 pstart = k->u.mem.pstr;
1113 if (pend != NULL_UCHARP) {
1114 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1115 p = pstart;
1116 ss = *s;
1117
1118 if (ignore_case != 0) {
1119 if (string_cmp_ic(reg->enc, case_fold_flag,
1120 pstart, &ss, (int )(pend - pstart)) == 0)
1121 return 0; /* or goto next_mem; */
1122 }
1123 else {
1124 while (p < pend) {
1125 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1126 }
1127 }
1128
1129 *s = ss;
1130 return 1;
1131 }
1132 }
1133 }
1134 else if (k->type == STK_MEM_END) {
1135 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1136 pend = k->u.mem.pstr;
1137 }
1138 }
1139 }
1140 k--;
1141 }
1142
1143 return 0;
1144 }
1145 #endif /* USE_BACKREF_WITH_LEVEL */
1146
1147
1148 #ifdef ONIG_DEBUG_STATISTICS
1149
1150 #define USE_TIMEOFDAY
1151
1152 #ifdef USE_TIMEOFDAY
1153 #ifdef HAVE_SYS_TIME_H
1154 #include <sys/time.h>
1155 #endif
1156 #ifdef HAVE_UNISTD_H
1157 #include <unistd.h>
1158 #endif
1159 static struct timeval ts, te;
1160 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1161 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1162 (((te).tv_sec - (ts).tv_sec)*1000000))
1163 #else
1164 #ifdef HAVE_SYS_TIMES_H
1165 #include <sys/times.h>
1166 #endif
1167 static struct tms ts, te;
1168 #define GETTIME(t) times(&(t))
1169 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1170 #endif
1171
1172 static int OpCounter[256];
1173 static int OpPrevCounter[256];
1174 static unsigned long OpTime[256];
1175 static int OpCurr = OP_FINISH;
1176 static int OpPrevTarget = OP_FAIL;
1177 static int MaxStackDepth = 0;
1178
1179 #define MOP_IN(opcode) do {\
1180 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1181 OpCurr = opcode;\
1182 OpCounter[opcode]++;\
1183 GETTIME(ts);\
1184 } while(0)
1185
1186 #define MOP_OUT do {\
1187 GETTIME(te);\
1188 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1189 } while(0)
1190
1191 extern void
onig_statistics_init(void)1192 onig_statistics_init(void)
1193 {
1194 int i;
1195 for (i = 0; i < 256; i++) {
1196 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1197 }
1198 MaxStackDepth = 0;
1199 }
1200
1201 extern void
onig_print_statistics(FILE * f)1202 onig_print_statistics(FILE* f)
1203 {
1204 int i;
1205 fprintf(f, " count prev time\n");
1206 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1207 fprintf(f, "%8d: %8d: %10ld: %s\n",
1208 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1209 }
1210 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1211 }
1212
1213 #define STACK_INC do {\
1214 stk++;\
1215 if (stk - stk_base > MaxStackDepth) \
1216 MaxStackDepth = stk - stk_base;\
1217 } while(0)
1218
1219 #else
1220 #define STACK_INC stk++
1221
1222 #define MOP_IN(opcode)
1223 #define MOP_OUT
1224 #endif
1225
1226
1227 /* matching region of POSIX API */
1228 typedef int regoff_t;
1229
1230 typedef struct {
1231 regoff_t rm_so;
1232 regoff_t rm_eo;
1233 } posix_regmatch_t;
1234
1235 /* match data(str - end) from position (sstart). */
1236 /* if sstart == str then set sprev to NULL. */
1237 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * right_range,const UChar * sstart,UChar * sprev,OnigMatchArg * msa)1238 match_at(regex_t* reg, const UChar* str, const UChar* end,
1239 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1240 const UChar* right_range,
1241 #endif
1242 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1243 {
1244 static UChar FinishCode[] = { OP_FINISH };
1245
1246 int i, n, num_mem, best_len, pop_level;
1247 LengthType tlen, tlen2;
1248 MemNumType mem;
1249 RelAddrType addr;
1250 OnigOptionType option = reg->options;
1251 OnigEncoding encode = reg->enc;
1252 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1253 UChar *s, *q, *sbegin;
1254 UChar *p = reg->p;
1255 char *alloca_base;
1256 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1257 OnigStackType *stkp; /* used as any purpose. */
1258 OnigStackIndex si;
1259 OnigStackIndex *repeat_stk;
1260 OnigStackIndex *mem_start_stk, *mem_end_stk;
1261 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1262 int scv;
1263 unsigned char* state_check_buff = msa->state_check_buff;
1264 int num_comb_exp_check = reg->num_comb_exp_check;
1265 #endif
1266 n = reg->num_repeat + reg->num_mem * 2;
1267
1268 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1269 pop_level = reg->stack_pop_level;
1270 num_mem = reg->num_mem;
1271 repeat_stk = (OnigStackIndex* )alloca_base;
1272
1273 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1274 mem_end_stk = mem_start_stk + num_mem;
1275 mem_start_stk--; /* for index start from 1,
1276 mem_start_stk[1]..mem_start_stk[num_mem] */
1277 mem_end_stk--; /* for index start from 1,
1278 mem_end_stk[1]..mem_end_stk[num_mem] */
1279 for (i = 1; i <= num_mem; i++) {
1280 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1281 }
1282
1283 #ifdef ONIG_DEBUG_MATCH
1284 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1285 (int )str, (int )end, (int )sstart, (int )sprev);
1286 fprintf(stderr, "size: %d, start offset: %d\n",
1287 (int )(end - str), (int )(sstart - str));
1288 #endif
1289
1290 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1291 best_len = ONIG_MISMATCH;
1292 s = (UChar* )sstart;
1293 while (1) {
1294 #ifdef ONIG_DEBUG_MATCH
1295 {
1296 UChar *q, *bp, buf[50];
1297 int len;
1298 fprintf(stderr, "%4d> \"", (int )(s - str));
1299 bp = buf;
1300 for (i = 0, q = s; i < 7 && q < end; i++) {
1301 len = enclen(encode, q);
1302 while (len-- > 0) *bp++ = *q++;
1303 }
1304 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1305 else { xmemcpy(bp, "\"", 1); bp += 1; }
1306 *bp = 0;
1307 fputs((char* )buf, stderr);
1308 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1309 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1310 fprintf(stderr, "\n");
1311 }
1312 #endif
1313
1314 sbegin = s;
1315 switch (*p++) {
1316 case OP_END: MOP_IN(OP_END);
1317 n = s - sstart;
1318 if (n > best_len) {
1319 OnigRegion* region;
1320 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1321 if (IS_FIND_LONGEST(option)) {
1322 if (n > msa->best_len) {
1323 msa->best_len = n;
1324 msa->best_s = (UChar* )sstart;
1325 }
1326 else
1327 goto end_best_len;
1328 }
1329 #endif
1330 best_len = n;
1331 region = msa->region;
1332 if (region) {
1333 #ifdef USE_POSIX_API_REGION_OPTION
1334 if (IS_POSIX_REGION(msa->options)) {
1335 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1336
1337 rmt[0].rm_so = sstart - str;
1338 rmt[0].rm_eo = s - str;
1339 for (i = 1; i <= num_mem; i++) {
1340 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1341 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1342 rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1343 else
1344 rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1345
1346 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1347 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1348 : (UChar* )((void* )mem_end_stk[i])) - str;
1349 }
1350 else {
1351 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1352 }
1353 }
1354 }
1355 else {
1356 #endif /* USE_POSIX_API_REGION_OPTION */
1357 region->beg[0] = sstart - str;
1358 region->end[0] = s - str;
1359 for (i = 1; i <= num_mem; i++) {
1360 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1361 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1362 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1363 else
1364 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1365
1366 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1367 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1368 : (UChar* )((void* )mem_end_stk[i])) - str;
1369 }
1370 else {
1371 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1372 }
1373 }
1374
1375 #ifdef USE_CAPTURE_HISTORY
1376 if (reg->capture_history != 0) {
1377 int r;
1378 OnigCaptureTreeNode* node;
1379
1380 if (IS_NULL(region->history_root)) {
1381 region->history_root = node = history_node_new();
1382 CHECK_NULL_RETURN_MEMERR(node);
1383 }
1384 else {
1385 node = region->history_root;
1386 history_tree_clear(node);
1387 }
1388
1389 node->group = 0;
1390 node->beg = sstart - str;
1391 node->end = s - str;
1392
1393 stkp = stk_base;
1394 r = make_capture_history_tree(region->history_root, &stkp,
1395 stk, (UChar* )str, reg);
1396 if (r < 0) {
1397 best_len = r; /* error code */
1398 goto finish;
1399 }
1400 }
1401 #endif /* USE_CAPTURE_HISTORY */
1402 #ifdef USE_POSIX_API_REGION_OPTION
1403 } /* else IS_POSIX_REGION() */
1404 #endif
1405 } /* if (region) */
1406 } /* n > best_len */
1407
1408 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1409 end_best_len:
1410 #endif
1411 MOP_OUT;
1412
1413 if (IS_FIND_CONDITION(option)) {
1414 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1415 best_len = ONIG_MISMATCH;
1416 goto fail; /* for retry */
1417 }
1418 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1419 goto fail; /* for retry */
1420 }
1421 }
1422
1423 /* default behavior: return first-matching result. */
1424 goto finish;
1425 break;
1426
1427 case OP_EXACT1: MOP_IN(OP_EXACT1);
1428 #if 0
1429 DATA_ENSURE(1);
1430 if (*p != *s) goto fail;
1431 p++; s++;
1432 #endif
1433 if (*p != *s++) goto fail;
1434 DATA_ENSURE(0);
1435 p++;
1436 MOP_OUT;
1437 break;
1438
1439 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
1440 {
1441 int len;
1442 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1443
1444 DATA_ENSURE(1);
1445 len = ONIGENC_MBC_CASE_FOLD(encode,
1446 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1447 case_fold_flag,
1448 &s, end, lowbuf);
1449 DATA_ENSURE(0);
1450 q = lowbuf;
1451 while (len-- > 0) {
1452 if (*p != *q) {
1453 goto fail;
1454 }
1455 p++; q++;
1456 }
1457 }
1458 MOP_OUT;
1459 break;
1460
1461 case OP_EXACT2: MOP_IN(OP_EXACT2);
1462 DATA_ENSURE(2);
1463 if (*p != *s) goto fail;
1464 p++; s++;
1465 if (*p != *s) goto fail;
1466 sprev = s;
1467 p++; s++;
1468 MOP_OUT;
1469 continue;
1470 break;
1471
1472 case OP_EXACT3: MOP_IN(OP_EXACT3);
1473 DATA_ENSURE(3);
1474 if (*p != *s) goto fail;
1475 p++; s++;
1476 if (*p != *s) goto fail;
1477 p++; s++;
1478 if (*p != *s) goto fail;
1479 sprev = s;
1480 p++; s++;
1481 MOP_OUT;
1482 continue;
1483 break;
1484
1485 case OP_EXACT4: MOP_IN(OP_EXACT4);
1486 DATA_ENSURE(4);
1487 if (*p != *s) goto fail;
1488 p++; s++;
1489 if (*p != *s) goto fail;
1490 p++; s++;
1491 if (*p != *s) goto fail;
1492 p++; s++;
1493 if (*p != *s) goto fail;
1494 sprev = s;
1495 p++; s++;
1496 MOP_OUT;
1497 continue;
1498 break;
1499
1500 case OP_EXACT5: MOP_IN(OP_EXACT5);
1501 DATA_ENSURE(5);
1502 if (*p != *s) goto fail;
1503 p++; s++;
1504 if (*p != *s) goto fail;
1505 p++; s++;
1506 if (*p != *s) goto fail;
1507 p++; s++;
1508 if (*p != *s) goto fail;
1509 p++; s++;
1510 if (*p != *s) goto fail;
1511 sprev = s;
1512 p++; s++;
1513 MOP_OUT;
1514 continue;
1515 break;
1516
1517 case OP_EXACTN: MOP_IN(OP_EXACTN);
1518 GET_LENGTH_INC(tlen, p);
1519 DATA_ENSURE(tlen);
1520 while (tlen-- > 0) {
1521 if (*p++ != *s++) goto fail;
1522 }
1523 sprev = s - 1;
1524 MOP_OUT;
1525 continue;
1526 break;
1527
1528 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
1529 {
1530 int len;
1531 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1532
1533 GET_LENGTH_INC(tlen, p);
1534 endp = p + tlen;
1535
1536 while (p < endp) {
1537 sprev = s;
1538 DATA_ENSURE(1);
1539 len = ONIGENC_MBC_CASE_FOLD(encode,
1540 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1541 case_fold_flag,
1542 &s, end, lowbuf);
1543 DATA_ENSURE(0);
1544 q = lowbuf;
1545 while (len-- > 0) {
1546 if (*p != *q) goto fail;
1547 p++; q++;
1548 }
1549 }
1550 }
1551
1552 MOP_OUT;
1553 continue;
1554 break;
1555
1556 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
1557 DATA_ENSURE(2);
1558 if (*p != *s) goto fail;
1559 p++; s++;
1560 if (*p != *s) goto fail;
1561 p++; s++;
1562 MOP_OUT;
1563 break;
1564
1565 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
1566 DATA_ENSURE(4);
1567 if (*p != *s) goto fail;
1568 p++; s++;
1569 if (*p != *s) goto fail;
1570 p++; s++;
1571 sprev = s;
1572 if (*p != *s) goto fail;
1573 p++; s++;
1574 if (*p != *s) goto fail;
1575 p++; s++;
1576 MOP_OUT;
1577 continue;
1578 break;
1579
1580 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
1581 DATA_ENSURE(6);
1582 if (*p != *s) goto fail;
1583 p++; s++;
1584 if (*p != *s) goto fail;
1585 p++; s++;
1586 if (*p != *s) goto fail;
1587 p++; s++;
1588 if (*p != *s) goto fail;
1589 p++; s++;
1590 sprev = s;
1591 if (*p != *s) goto fail;
1592 p++; s++;
1593 if (*p != *s) goto fail;
1594 p++; s++;
1595 MOP_OUT;
1596 continue;
1597 break;
1598
1599 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
1600 GET_LENGTH_INC(tlen, p);
1601 DATA_ENSURE(tlen * 2);
1602 while (tlen-- > 0) {
1603 if (*p != *s) goto fail;
1604 p++; s++;
1605 if (*p != *s) goto fail;
1606 p++; s++;
1607 }
1608 sprev = s - 2;
1609 MOP_OUT;
1610 continue;
1611 break;
1612
1613 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
1614 GET_LENGTH_INC(tlen, p);
1615 DATA_ENSURE(tlen * 3);
1616 while (tlen-- > 0) {
1617 if (*p != *s) goto fail;
1618 p++; s++;
1619 if (*p != *s) goto fail;
1620 p++; s++;
1621 if (*p != *s) goto fail;
1622 p++; s++;
1623 }
1624 sprev = s - 3;
1625 MOP_OUT;
1626 continue;
1627 break;
1628
1629 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
1630 GET_LENGTH_INC(tlen, p); /* mb-len */
1631 GET_LENGTH_INC(tlen2, p); /* string len */
1632 tlen2 *= tlen;
1633 DATA_ENSURE(tlen2);
1634 while (tlen2-- > 0) {
1635 if (*p != *s) goto fail;
1636 p++; s++;
1637 }
1638 sprev = s - tlen;
1639 MOP_OUT;
1640 continue;
1641 break;
1642
1643 case OP_CCLASS: MOP_IN(OP_CCLASS);
1644 DATA_ENSURE(1);
1645 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1646 p += SIZE_BITSET;
1647 s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
1648 MOP_OUT;
1649 break;
1650
1651 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
1652 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1653
1654 cclass_mb:
1655 GET_LENGTH_INC(tlen, p);
1656 {
1657 OnigCodePoint code;
1658 UChar *ss;
1659 int mb_len;
1660
1661 DATA_ENSURE(1);
1662 mb_len = enclen(encode, s);
1663 DATA_ENSURE(mb_len);
1664 ss = s;
1665 s += mb_len;
1666 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1667
1668 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1669 if (! onig_is_in_code_range(p, code)) goto fail;
1670 #else
1671 q = p;
1672 ALIGNMENT_RIGHT(q);
1673 if (! onig_is_in_code_range(q, code)) goto fail;
1674 #endif
1675 }
1676 p += tlen;
1677 MOP_OUT;
1678 break;
1679
1680 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
1681 DATA_ENSURE(1);
1682 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1683 p += SIZE_BITSET;
1684 goto cclass_mb;
1685 }
1686 else {
1687 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1688 goto fail;
1689
1690 p += SIZE_BITSET;
1691 GET_LENGTH_INC(tlen, p);
1692 p += tlen;
1693 s++;
1694 }
1695 MOP_OUT;
1696 break;
1697
1698 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
1699 DATA_ENSURE(1);
1700 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1701 p += SIZE_BITSET;
1702 s += enclen(encode, s);
1703 MOP_OUT;
1704 break;
1705
1706 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
1707 DATA_ENSURE(1);
1708 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1709 s++;
1710 GET_LENGTH_INC(tlen, p);
1711 p += tlen;
1712 goto cc_mb_not_success;
1713 }
1714
1715 cclass_mb_not:
1716 GET_LENGTH_INC(tlen, p);
1717 {
1718 OnigCodePoint code;
1719 UChar *ss;
1720 int mb_len = enclen(encode, s);
1721
1722 if (! DATA_ENSURE_CHECK(mb_len)) {
1723 DATA_ENSURE(1);
1724 s = (UChar* )end;
1725 p += tlen;
1726 goto cc_mb_not_success;
1727 }
1728
1729 ss = s;
1730 s += mb_len;
1731 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1732
1733 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1734 if (onig_is_in_code_range(p, code)) goto fail;
1735 #else
1736 q = p;
1737 ALIGNMENT_RIGHT(q);
1738 if (onig_is_in_code_range(q, code)) goto fail;
1739 #endif
1740 }
1741 p += tlen;
1742
1743 cc_mb_not_success:
1744 MOP_OUT;
1745 break;
1746
1747 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
1748 DATA_ENSURE(1);
1749 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1750 p += SIZE_BITSET;
1751 goto cclass_mb_not;
1752 }
1753 else {
1754 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1755 goto fail;
1756
1757 p += SIZE_BITSET;
1758 GET_LENGTH_INC(tlen, p);
1759 p += tlen;
1760 s++;
1761 }
1762 MOP_OUT;
1763 break;
1764
1765 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
1766 {
1767 OnigCodePoint code;
1768 void *node;
1769 int mb_len;
1770 UChar *ss;
1771
1772 DATA_ENSURE(1);
1773 GET_POINTER_INC(node, p);
1774 mb_len = enclen(encode, s);
1775 ss = s;
1776 s += mb_len;
1777 DATA_ENSURE(0);
1778 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1779 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1780 }
1781 MOP_OUT;
1782 break;
1783
1784 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
1785 DATA_ENSURE(1);
1786 n = enclen(encode, s);
1787 DATA_ENSURE(n);
1788 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1789 s += n;
1790 MOP_OUT;
1791 break;
1792
1793 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
1794 DATA_ENSURE(1);
1795 n = enclen(encode, s);
1796 DATA_ENSURE(n);
1797 s += n;
1798 MOP_OUT;
1799 break;
1800
1801 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
1802 while (DATA_ENSURE_CHECK1) {
1803 STACK_PUSH_ALT(p, s, sprev);
1804 n = enclen(encode, s);
1805 DATA_ENSURE(n);
1806 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1807 sprev = s;
1808 s += n;
1809 }
1810 MOP_OUT;
1811 break;
1812
1813 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
1814 while (DATA_ENSURE_CHECK1) {
1815 STACK_PUSH_ALT(p, s, sprev);
1816 n = enclen(encode, s);
1817 if (n > 1) {
1818 DATA_ENSURE(n);
1819 sprev = s;
1820 s += n;
1821 }
1822 else {
1823 sprev = s;
1824 s++;
1825 }
1826 }
1827 MOP_OUT;
1828 break;
1829
1830 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1831 while (DATA_ENSURE_CHECK1) {
1832 if (*p == *s) {
1833 STACK_PUSH_ALT(p + 1, s, sprev);
1834 }
1835 n = enclen(encode, s);
1836 DATA_ENSURE(n);
1837 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1838 sprev = s;
1839 s += n;
1840 }
1841 p++;
1842 MOP_OUT;
1843 break;
1844
1845 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1846 while (DATA_ENSURE_CHECK1) {
1847 if (*p == *s) {
1848 STACK_PUSH_ALT(p + 1, s, sprev);
1849 }
1850 n = enclen(encode, s);
1851 if (n > 1) {
1852 DATA_ENSURE(n);
1853 sprev = s;
1854 s += n;
1855 }
1856 else {
1857 sprev = s;
1858 s++;
1859 }
1860 }
1861 p++;
1862 MOP_OUT;
1863 break;
1864
1865 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1866 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1867 GET_STATE_CHECK_NUM_INC(mem, p);
1868 while (DATA_ENSURE_CHECK1) {
1869 STATE_CHECK_VAL(scv, mem);
1870 if (scv) goto fail;
1871
1872 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1873 n = enclen(encode, s);
1874 DATA_ENSURE(n);
1875 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1876 sprev = s;
1877 s += n;
1878 }
1879 MOP_OUT;
1880 break;
1881
1882 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1883 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1884
1885 GET_STATE_CHECK_NUM_INC(mem, p);
1886 while (DATA_ENSURE_CHECK1) {
1887 STATE_CHECK_VAL(scv, mem);
1888 if (scv) goto fail;
1889
1890 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1891 n = enclen(encode, s);
1892 if (n > 1) {
1893 DATA_ENSURE(n);
1894 sprev = s;
1895 s += n;
1896 }
1897 else {
1898 sprev = s;
1899 s++;
1900 }
1901 }
1902 MOP_OUT;
1903 break;
1904 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1905
1906 case OP_WORD: MOP_IN(OP_WORD);
1907 DATA_ENSURE(1);
1908 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1909 goto fail;
1910
1911 s += enclen(encode, s);
1912 MOP_OUT;
1913 break;
1914
1915 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
1916 DATA_ENSURE(1);
1917 if (ONIGENC_IS_MBC_WORD(encode, s, end))
1918 goto fail;
1919
1920 s += enclen(encode, s);
1921 MOP_OUT;
1922 break;
1923
1924 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
1925 if (ON_STR_BEGIN(s)) {
1926 DATA_ENSURE(1);
1927 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1928 goto fail;
1929 }
1930 else if (ON_STR_END(s)) {
1931 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1932 goto fail;
1933 }
1934 else {
1935 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1936 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1937 goto fail;
1938 }
1939 MOP_OUT;
1940 continue;
1941 break;
1942
1943 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
1944 if (ON_STR_BEGIN(s)) {
1945 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1946 goto fail;
1947 }
1948 else if (ON_STR_END(s)) {
1949 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1950 goto fail;
1951 }
1952 else {
1953 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1954 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1955 goto fail;
1956 }
1957 MOP_OUT;
1958 continue;
1959 break;
1960
1961 #ifdef USE_WORD_BEGIN_END
1962 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
1963 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
1964 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1965 MOP_OUT;
1966 continue;
1967 }
1968 }
1969 goto fail;
1970 break;
1971
1972 case OP_WORD_END: MOP_IN(OP_WORD_END);
1973 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1974 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
1975 MOP_OUT;
1976 continue;
1977 }
1978 }
1979 goto fail;
1980 break;
1981 #endif
1982
1983 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
1984 if (! ON_STR_BEGIN(s)) goto fail;
1985
1986 MOP_OUT;
1987 continue;
1988 break;
1989
1990 case OP_END_BUF: MOP_IN(OP_END_BUF);
1991 if (! ON_STR_END(s)) goto fail;
1992
1993 MOP_OUT;
1994 continue;
1995 break;
1996
1997 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
1998 if (ON_STR_BEGIN(s)) {
1999 if (IS_NOTBOL(msa->options)) goto fail;
2000 MOP_OUT;
2001 continue;
2002 }
2003 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2004 MOP_OUT;
2005 continue;
2006 }
2007 goto fail;
2008 break;
2009
2010 case OP_END_LINE: MOP_IN(OP_END_LINE);
2011 if (ON_STR_END(s)) {
2012 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2013 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2014 #endif
2015 if (IS_NOTEOL(msa->options)) goto fail;
2016 MOP_OUT;
2017 continue;
2018 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2019 }
2020 #endif
2021 }
2022 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2023 MOP_OUT;
2024 continue;
2025 }
2026 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2027 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2028 MOP_OUT;
2029 continue;
2030 }
2031 #endif
2032 goto fail;
2033 break;
2034
2035 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
2036 if (ON_STR_END(s)) {
2037 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2038 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2039 #endif
2040 if (IS_NOTEOL(msa->options)) goto fail;
2041 MOP_OUT;
2042 continue;
2043 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2044 }
2045 #endif
2046 }
2047 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2048 ON_STR_END(s + enclen(encode, s))) {
2049 MOP_OUT;
2050 continue;
2051 }
2052 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2053 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2054 UChar* ss = s + enclen(encode, s);
2055 ss += enclen(encode, ss);
2056 if (ON_STR_END(ss)) {
2057 MOP_OUT;
2058 continue;
2059 }
2060 }
2061 #endif
2062 goto fail;
2063 break;
2064
2065 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
2066 if (s != msa->start)
2067 goto fail;
2068
2069 MOP_OUT;
2070 continue;
2071 break;
2072
2073 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
2074 GET_MEMNUM_INC(mem, p);
2075 STACK_PUSH_MEM_START(mem, s);
2076 MOP_OUT;
2077 continue;
2078 break;
2079
2080 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
2081 GET_MEMNUM_INC(mem, p);
2082 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2083 MOP_OUT;
2084 continue;
2085 break;
2086
2087 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
2088 GET_MEMNUM_INC(mem, p);
2089 STACK_PUSH_MEM_END(mem, s);
2090 MOP_OUT;
2091 continue;
2092 break;
2093
2094 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
2095 GET_MEMNUM_INC(mem, p);
2096 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2097 MOP_OUT;
2098 continue;
2099 break;
2100
2101 #ifdef USE_SUBEXP_CALL
2102 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
2103 GET_MEMNUM_INC(mem, p);
2104 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2105 STACK_PUSH_MEM_END(mem, s);
2106 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2107 MOP_OUT;
2108 continue;
2109 break;
2110
2111 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
2112 GET_MEMNUM_INC(mem, p);
2113 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2114 STACK_GET_MEM_START(mem, stkp);
2115
2116 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2117 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2118 else
2119 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2120
2121 STACK_PUSH_MEM_END_MARK(mem);
2122 MOP_OUT;
2123 continue;
2124 break;
2125 #endif
2126
2127 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
2128 mem = 1;
2129 goto backref;
2130 break;
2131
2132 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
2133 mem = 2;
2134 goto backref;
2135 break;
2136
2137 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
2138 GET_MEMNUM_INC(mem, p);
2139 backref:
2140 {
2141 int len;
2142 UChar *pstart, *pend;
2143
2144 /* if you want to remove following line,
2145 you should check in parse and compile time. */
2146 if (mem > num_mem) goto fail;
2147 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2148 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2149
2150 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2151 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2152 else
2153 pstart = (UChar* )((void* )mem_start_stk[mem]);
2154
2155 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2156 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2157 : (UChar* )((void* )mem_end_stk[mem]));
2158 n = pend - pstart;
2159 DATA_ENSURE(n);
2160 sprev = s;
2161 STRING_CMP(pstart, s, n);
2162 while (sprev + (len = enclen(encode, sprev)) < s)
2163 sprev += len;
2164
2165 MOP_OUT;
2166 continue;
2167 }
2168 break;
2169
2170 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
2171 GET_MEMNUM_INC(mem, p);
2172 {
2173 int len;
2174 UChar *pstart, *pend;
2175
2176 /* if you want to remove following line,
2177 you should check in parse and compile time. */
2178 if (mem > num_mem) goto fail;
2179 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2180 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2181
2182 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2183 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2184 else
2185 pstart = (UChar* )((void* )mem_start_stk[mem]);
2186
2187 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2188 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2189 : (UChar* )((void* )mem_end_stk[mem]));
2190 n = pend - pstart;
2191 DATA_ENSURE(n);
2192 sprev = s;
2193 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2194 while (sprev + (len = enclen(encode, sprev)) < s)
2195 sprev += len;
2196
2197 MOP_OUT;
2198 continue;
2199 }
2200 break;
2201
2202 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
2203 {
2204 int len, is_fail;
2205 UChar *pstart, *pend, *swork;
2206
2207 GET_LENGTH_INC(tlen, p);
2208 for (i = 0; i < tlen; i++) {
2209 GET_MEMNUM_INC(mem, p);
2210
2211 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2212 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2213
2214 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2215 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2216 else
2217 pstart = (UChar* )((void* )mem_start_stk[mem]);
2218
2219 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2220 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2221 : (UChar* )((void* )mem_end_stk[mem]));
2222 n = pend - pstart;
2223 DATA_ENSURE(n);
2224 sprev = s;
2225 swork = s;
2226 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2227 if (is_fail) continue;
2228 s = swork;
2229 while (sprev + (len = enclen(encode, sprev)) < s)
2230 sprev += len;
2231
2232 p += (SIZE_MEMNUM * (tlen - i - 1));
2233 break; /* success */
2234 }
2235 if (i == tlen) goto fail;
2236 MOP_OUT;
2237 continue;
2238 }
2239 break;
2240
2241 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
2242 {
2243 int len, is_fail;
2244 UChar *pstart, *pend, *swork;
2245
2246 GET_LENGTH_INC(tlen, p);
2247 for (i = 0; i < tlen; i++) {
2248 GET_MEMNUM_INC(mem, p);
2249
2250 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2251 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2252
2253 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2254 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2255 else
2256 pstart = (UChar* )((void* )mem_start_stk[mem]);
2257
2258 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2259 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2260 : (UChar* )((void* )mem_end_stk[mem]));
2261 n = pend - pstart;
2262 DATA_ENSURE(n);
2263 sprev = s;
2264 swork = s;
2265 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2266 if (is_fail) continue;
2267 s = swork;
2268 while (sprev + (len = enclen(encode, sprev)) < s)
2269 sprev += len;
2270
2271 p += (SIZE_MEMNUM * (tlen - i - 1));
2272 break; /* success */
2273 }
2274 if (i == tlen) goto fail;
2275 MOP_OUT;
2276 continue;
2277 }
2278 break;
2279
2280 #ifdef USE_BACKREF_WITH_LEVEL
2281 case OP_BACKREF_WITH_LEVEL:
2282 {
2283 int len;
2284 OnigOptionType ic;
2285 LengthType level;
2286
2287 GET_OPTION_INC(ic, p);
2288 GET_LENGTH_INC(level, p);
2289 GET_LENGTH_INC(tlen, p);
2290
2291 sprev = s;
2292 if (backref_match_at_nested_level(reg, stk, stk_base, ic
2293 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2294 while (sprev + (len = enclen(encode, sprev)) < s)
2295 sprev += len;
2296
2297 p += (SIZE_MEMNUM * tlen);
2298 }
2299 else
2300 goto fail;
2301
2302 MOP_OUT;
2303 continue;
2304 }
2305
2306 break;
2307 #endif
2308
2309 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2310 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
2311 GET_OPTION_INC(option, p);
2312 STACK_PUSH_ALT(p, s, sprev);
2313 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2314 MOP_OUT;
2315 continue;
2316 break;
2317
2318 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
2319 GET_OPTION_INC(option, p);
2320 MOP_OUT;
2321 continue;
2322 break;
2323 #endif
2324
2325 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
2326 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2327 STACK_PUSH_NULL_CHECK_START(mem, s);
2328 MOP_OUT;
2329 continue;
2330 break;
2331
2332 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
2333 {
2334 int isnull;
2335
2336 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2337 STACK_NULL_CHECK(isnull, mem, s);
2338 if (isnull) {
2339 #ifdef ONIG_DEBUG_MATCH
2340 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2341 (int )mem, (int )s);
2342 #endif
2343 null_check_found:
2344 /* empty loop founded, skip next instruction */
2345 switch (*p++) {
2346 case OP_JUMP:
2347 case OP_PUSH:
2348 p += SIZE_RELADDR;
2349 break;
2350 case OP_REPEAT_INC:
2351 case OP_REPEAT_INC_NG:
2352 case OP_REPEAT_INC_SG:
2353 case OP_REPEAT_INC_NG_SG:
2354 p += SIZE_MEMNUM;
2355 break;
2356 default:
2357 goto unexpected_bytecode_error;
2358 break;
2359 }
2360 }
2361 }
2362 MOP_OUT;
2363 continue;
2364 break;
2365
2366 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2367 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
2368 {
2369 int isnull;
2370
2371 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2372 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2373 if (isnull) {
2374 #ifdef ONIG_DEBUG_MATCH
2375 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2376 (int )mem, (int )s);
2377 #endif
2378 if (isnull == -1) goto fail;
2379 goto null_check_found;
2380 }
2381 }
2382 MOP_OUT;
2383 continue;
2384 break;
2385 #endif
2386
2387 #ifdef USE_SUBEXP_CALL
2388 case OP_NULL_CHECK_END_MEMST_PUSH:
2389 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2390 {
2391 int isnull;
2392
2393 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2394 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2395 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2396 #else
2397 STACK_NULL_CHECK_REC(isnull, mem, s);
2398 #endif
2399 if (isnull) {
2400 #ifdef ONIG_DEBUG_MATCH
2401 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2402 (int )mem, (int )s);
2403 #endif
2404 if (isnull == -1) goto fail;
2405 goto null_check_found;
2406 }
2407 else {
2408 STACK_PUSH_NULL_CHECK_END(mem);
2409 }
2410 }
2411 MOP_OUT;
2412 continue;
2413 break;
2414 #endif
2415
2416 case OP_JUMP: MOP_IN(OP_JUMP);
2417 GET_RELADDR_INC(addr, p);
2418 p += addr;
2419 MOP_OUT;
2420 CHECK_INTERRUPT_IN_MATCH_AT;
2421 continue;
2422 break;
2423
2424 case OP_PUSH: MOP_IN(OP_PUSH);
2425 GET_RELADDR_INC(addr, p);
2426 STACK_PUSH_ALT(p + addr, s, sprev);
2427 MOP_OUT;
2428 continue;
2429 break;
2430
2431 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2432 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
2433 GET_STATE_CHECK_NUM_INC(mem, p);
2434 STATE_CHECK_VAL(scv, mem);
2435 if (scv) goto fail;
2436
2437 GET_RELADDR_INC(addr, p);
2438 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2439 MOP_OUT;
2440 continue;
2441 break;
2442
2443 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2444 GET_STATE_CHECK_NUM_INC(mem, p);
2445 GET_RELADDR_INC(addr, p);
2446 STATE_CHECK_VAL(scv, mem);
2447 if (scv) {
2448 p += addr;
2449 }
2450 else {
2451 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2452 }
2453 MOP_OUT;
2454 continue;
2455 break;
2456
2457 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
2458 GET_STATE_CHECK_NUM_INC(mem, p);
2459 STATE_CHECK_VAL(scv, mem);
2460 if (scv) goto fail;
2461
2462 STACK_PUSH_STATE_CHECK(s, mem);
2463 MOP_OUT;
2464 continue;
2465 break;
2466 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2467
2468 case OP_POP: MOP_IN(OP_POP);
2469 STACK_POP_ONE;
2470 MOP_OUT;
2471 continue;
2472 break;
2473
2474 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2475 GET_RELADDR_INC(addr, p);
2476 if (*p == *s && DATA_ENSURE_CHECK1) {
2477 p++;
2478 STACK_PUSH_ALT(p + addr, s, sprev);
2479 MOP_OUT;
2480 continue;
2481 }
2482 p += (addr + 1);
2483 MOP_OUT;
2484 continue;
2485 break;
2486
2487 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2488 GET_RELADDR_INC(addr, p);
2489 if (*p == *s) {
2490 p++;
2491 STACK_PUSH_ALT(p + addr, s, sprev);
2492 MOP_OUT;
2493 continue;
2494 }
2495 p++;
2496 MOP_OUT;
2497 continue;
2498 break;
2499
2500 case OP_REPEAT: MOP_IN(OP_REPEAT);
2501 {
2502 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2503 GET_RELADDR_INC(addr, p);
2504
2505 STACK_ENSURE(1);
2506 repeat_stk[mem] = GET_STACK_INDEX(stk);
2507 STACK_PUSH_REPEAT(mem, p);
2508
2509 if (reg->repeat_range[mem].lower == 0) {
2510 STACK_PUSH_ALT(p + addr, s, sprev);
2511 }
2512 }
2513 MOP_OUT;
2514 continue;
2515 break;
2516
2517 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
2518 {
2519 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2520 GET_RELADDR_INC(addr, p);
2521
2522 STACK_ENSURE(1);
2523 repeat_stk[mem] = GET_STACK_INDEX(stk);
2524 STACK_PUSH_REPEAT(mem, p);
2525
2526 if (reg->repeat_range[mem].lower == 0) {
2527 STACK_PUSH_ALT(p, s, sprev);
2528 p += addr;
2529 }
2530 }
2531 MOP_OUT;
2532 continue;
2533 break;
2534
2535 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
2536 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2537 si = repeat_stk[mem];
2538 stkp = STACK_AT(si);
2539
2540 repeat_inc:
2541 stkp->u.repeat.count++;
2542 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2543 /* end of repeat. Nothing to do. */
2544 }
2545 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2546 STACK_PUSH_ALT(p, s, sprev);
2547 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2548 }
2549 else {
2550 p = stkp->u.repeat.pcode;
2551 }
2552 STACK_PUSH_REPEAT_INC(si);
2553 MOP_OUT;
2554 CHECK_INTERRUPT_IN_MATCH_AT;
2555 continue;
2556 break;
2557
2558 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
2559 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2560 STACK_GET_REPEAT(mem, stkp);
2561 si = GET_STACK_INDEX(stkp);
2562 goto repeat_inc;
2563 break;
2564
2565 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
2566 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2567 si = repeat_stk[mem];
2568 stkp = STACK_AT(si);
2569
2570 repeat_inc_ng:
2571 stkp->u.repeat.count++;
2572 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2573 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2574 UChar* pcode = stkp->u.repeat.pcode;
2575
2576 STACK_PUSH_REPEAT_INC(si);
2577 STACK_PUSH_ALT(pcode, s, sprev);
2578 }
2579 else {
2580 p = stkp->u.repeat.pcode;
2581 STACK_PUSH_REPEAT_INC(si);
2582 }
2583 }
2584 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2585 STACK_PUSH_REPEAT_INC(si);
2586 }
2587 MOP_OUT;
2588 CHECK_INTERRUPT_IN_MATCH_AT;
2589 continue;
2590 break;
2591
2592 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
2593 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2594 STACK_GET_REPEAT(mem, stkp);
2595 si = GET_STACK_INDEX(stkp);
2596 goto repeat_inc_ng;
2597 break;
2598
2599 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
2600 STACK_PUSH_POS(s, sprev);
2601 MOP_OUT;
2602 continue;
2603 break;
2604
2605 case OP_POP_POS: MOP_IN(OP_POP_POS);
2606 {
2607 STACK_POS_END(stkp);
2608 s = stkp->u.state.pstr;
2609 sprev = stkp->u.state.pstr_prev;
2610 }
2611 MOP_OUT;
2612 continue;
2613 break;
2614
2615 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
2616 GET_RELADDR_INC(addr, p);
2617 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2618 MOP_OUT;
2619 continue;
2620 break;
2621
2622 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
2623 STACK_POP_TIL_POS_NOT;
2624 goto fail;
2625 break;
2626
2627 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
2628 STACK_PUSH_STOP_BT;
2629 MOP_OUT;
2630 continue;
2631 break;
2632
2633 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
2634 STACK_STOP_BT_END;
2635 MOP_OUT;
2636 continue;
2637 break;
2638
2639 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
2640 GET_LENGTH_INC(tlen, p);
2641 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2642 if (IS_NULL(s)) goto fail;
2643 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2644 MOP_OUT;
2645 continue;
2646 break;
2647
2648 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2649 GET_RELADDR_INC(addr, p);
2650 GET_LENGTH_INC(tlen, p);
2651 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2652 if (IS_NULL(q)) {
2653 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2654 If you want to change to fail, replace following line. */
2655 p += addr;
2656 /* goto fail; */
2657 }
2658 else {
2659 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2660 s = q;
2661 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2662 }
2663 MOP_OUT;
2664 continue;
2665 break;
2666
2667 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2668 STACK_POP_TIL_LOOK_BEHIND_NOT;
2669 goto fail;
2670 break;
2671
2672 #ifdef USE_SUBEXP_CALL
2673 case OP_CALL: MOP_IN(OP_CALL);
2674 GET_ABSADDR_INC(addr, p);
2675 STACK_PUSH_CALL_FRAME(p);
2676 p = reg->p + addr;
2677 MOP_OUT;
2678 continue;
2679 break;
2680
2681 case OP_RETURN: MOP_IN(OP_RETURN);
2682 STACK_RETURN(p);
2683 STACK_PUSH_RETURN;
2684 MOP_OUT;
2685 continue;
2686 break;
2687 #endif
2688
2689 case OP_FINISH:
2690 goto finish;
2691 break;
2692
2693 fail:
2694 MOP_OUT;
2695 /* fall */
2696 case OP_FAIL: MOP_IN(OP_FAIL);
2697 STACK_POP;
2698 p = stk->u.state.pcode;
2699 s = stk->u.state.pstr;
2700 sprev = stk->u.state.pstr_prev;
2701
2702 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2703 if (stk->u.state.state_check != 0) {
2704 stk->type = STK_STATE_CHECK_MARK;
2705 stk++;
2706 }
2707 #endif
2708
2709 MOP_OUT;
2710 continue;
2711 break;
2712
2713 default:
2714 goto bytecode_error;
2715
2716 } /* end of switch */
2717 sprev = sbegin;
2718 } /* end of while(1) */
2719
2720 finish:
2721 STACK_SAVE;
2722 return best_len;
2723
2724 #ifdef ONIG_DEBUG
2725 stack_error:
2726 STACK_SAVE;
2727 return ONIGERR_STACK_BUG;
2728 #endif
2729
2730 bytecode_error:
2731 STACK_SAVE;
2732 return ONIGERR_UNDEFINED_BYTECODE;
2733
2734 unexpected_bytecode_error:
2735 STACK_SAVE;
2736 return ONIGERR_UNEXPECTED_BYTECODE;
2737 }
2738
2739
2740 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2741 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2742 const UChar* text, const UChar* text_end, UChar* text_range)
2743 {
2744 UChar *t, *p, *s, *end;
2745
2746 end = (UChar* )text_end;
2747 end -= target_end - target - 1;
2748 if (end > text_range)
2749 end = text_range;
2750
2751 s = (UChar* )text;
2752
2753 while (s < end) {
2754 if (*s == *target) {
2755 p = s + 1;
2756 t = target + 1;
2757 while (t < target_end) {
2758 if (*t != *p++)
2759 break;
2760 t++;
2761 }
2762 if (t == target_end)
2763 return s;
2764 }
2765 s += enclen(enc, s);
2766 }
2767
2768 return (UChar* )NULL;
2769 }
2770
2771 static int
str_lower_case_match(OnigEncoding enc,int case_fold_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)2772 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2773 const UChar* t, const UChar* tend,
2774 const UChar* p, const UChar* end)
2775 {
2776 int lowlen;
2777 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2778
2779 while (t < tend) {
2780 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2781 q = lowbuf;
2782 while (lowlen > 0) {
2783 if (*t++ != *q++) return 0;
2784 lowlen--;
2785 }
2786 }
2787
2788 return 1;
2789 }
2790
2791 static UChar*
slow_search_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2792 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2793 UChar* target, UChar* target_end,
2794 const UChar* text, const UChar* text_end, UChar* text_range)
2795 {
2796 UChar *s, *end;
2797
2798 end = (UChar* )text_end;
2799 end -= target_end - target - 1;
2800 if (end > text_range)
2801 end = text_range;
2802
2803 s = (UChar* )text;
2804
2805 while (s < end) {
2806 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2807 s, text_end))
2808 return s;
2809
2810 s += enclen(enc, s);
2811 }
2812
2813 return (UChar* )NULL;
2814 }
2815
2816 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2817 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2818 const UChar* text, const UChar* adjust_text,
2819 const UChar* text_end, const UChar* text_start)
2820 {
2821 UChar *t, *p, *s;
2822
2823 s = (UChar* )text_end;
2824 s -= (target_end - target);
2825 if (s > text_start)
2826 s = (UChar* )text_start;
2827 else
2828 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2829
2830 while (s >= text) {
2831 if (*s == *target) {
2832 p = s + 1;
2833 t = target + 1;
2834 while (t < target_end) {
2835 if (*t != *p++)
2836 break;
2837 t++;
2838 }
2839 if (t == target_end)
2840 return s;
2841 }
2842 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2843 }
2844
2845 return (UChar* )NULL;
2846 }
2847
2848 static UChar*
slow_search_backward_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2849 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2850 UChar* target, UChar* target_end,
2851 const UChar* text, const UChar* adjust_text,
2852 const UChar* text_end, const UChar* text_start)
2853 {
2854 UChar *s;
2855
2856 s = (UChar* )text_end;
2857 s -= (target_end - target);
2858 if (s > text_start)
2859 s = (UChar* )text_start;
2860 else
2861 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2862
2863 while (s >= text) {
2864 if (str_lower_case_match(enc, case_fold_flag,
2865 target, target_end, s, text_end))
2866 return s;
2867
2868 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2869 }
2870
2871 return (UChar* )NULL;
2872 }
2873
2874 static UChar*
bm_search_notrev(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2875 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2876 const UChar* text, const UChar* text_end,
2877 const UChar* text_range)
2878 {
2879 const UChar *s, *se, *t, *p, *end;
2880 const UChar *tail;
2881 int skip, tlen1;
2882
2883 #ifdef ONIG_DEBUG_SEARCH
2884 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2885 (int )text, (int )text_end, (int )text_range);
2886 #endif
2887
2888 tail = target_end - 1;
2889 tlen1 = tail - target;
2890 end = text_range;
2891 if (end + tlen1 > text_end)
2892 end = text_end - tlen1;
2893
2894 s = text;
2895
2896 if (IS_NULL(reg->int_map)) {
2897 while (s < end) {
2898 p = se = s + tlen1;
2899 t = tail;
2900 while (*p == *t) {
2901 if (t == target) return (UChar* )s;
2902 p--; t--;
2903 }
2904 skip = reg->map[*se];
2905 t = s;
2906 do {
2907 s += enclen(reg->enc, s);
2908 } while ((s - t) < skip && s < end);
2909 }
2910 }
2911 else {
2912 while (s < end) {
2913 p = se = s + tlen1;
2914 t = tail;
2915 while (*p == *t) {
2916 if (t == target) return (UChar* )s;
2917 p--; t--;
2918 }
2919 skip = reg->int_map[*se];
2920 t = s;
2921 do {
2922 s += enclen(reg->enc, s);
2923 } while ((s - t) < skip && s < end);
2924 }
2925 }
2926
2927 return (UChar* )NULL;
2928 }
2929
2930 static UChar*
bm_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2931 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2932 const UChar* text, const UChar* text_end, const UChar* text_range)
2933 {
2934 const UChar *s, *t, *p, *end;
2935 const UChar *tail;
2936
2937 end = text_range + (target_end - target) - 1;
2938 if (end > text_end)
2939 end = text_end;
2940
2941 tail = target_end - 1;
2942 s = text + (target_end - target) - 1;
2943 if (IS_NULL(reg->int_map)) {
2944 while (s < end) {
2945 p = s;
2946 t = tail;
2947 while (*p == *t) {
2948 if (t == target) return (UChar* )p;
2949 p--; t--;
2950 }
2951 s += reg->map[*s];
2952 }
2953 }
2954 else { /* see int_map[] */
2955 while (s < end) {
2956 p = s;
2957 t = tail;
2958 while (*p == *t) {
2959 if (t == target) return (UChar* )p;
2960 p--; t--;
2961 }
2962 s += reg->int_map[*s];
2963 }
2964 }
2965 return (UChar* )NULL;
2966 }
2967
2968 static int
set_bm_backward_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,int ** skip)2969 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
2970 int** skip)
2971
2972 {
2973 int i, len;
2974
2975 if (IS_NULL(*skip)) {
2976 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
2977 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
2978 }
2979
2980 len = end - s;
2981 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
2982 (*skip)[i] = len;
2983
2984 for (i = len - 1; i > 0; i--)
2985 (*skip)[s[i]] = i;
2986
2987 return 0;
2988 }
2989
2990 static UChar*
bm_search_backward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2991 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
2992 const UChar* text, const UChar* adjust_text,
2993 const UChar* text_end, const UChar* text_start)
2994 {
2995 const UChar *s, *t, *p;
2996
2997 s = text_end - (target_end - target);
2998 if (text_start < s)
2999 s = text_start;
3000 else
3001 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3002
3003 while (s >= text) {
3004 p = s;
3005 t = target;
3006 while (t < target_end && *p == *t) {
3007 p++; t++;
3008 }
3009 if (t == target_end)
3010 return (UChar* )s;
3011
3012 s -= reg->int_map_backward[*s];
3013 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3014 }
3015
3016 return (UChar* )NULL;
3017 }
3018
3019 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)3020 map_search(OnigEncoding enc, UChar map[],
3021 const UChar* text, const UChar* text_range)
3022 {
3023 const UChar *s = text;
3024
3025 while (s < text_range) {
3026 if (map[*s]) return (UChar* )s;
3027
3028 s += enclen(enc, s);
3029 }
3030 return (UChar* )NULL;
3031 }
3032
3033 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)3034 map_search_backward(OnigEncoding enc, UChar map[],
3035 const UChar* text, const UChar* adjust_text,
3036 const UChar* text_start)
3037 {
3038 const UChar *s = text_start;
3039
3040 while (s >= text) {
3041 if (map[*s]) return (UChar* )s;
3042
3043 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3044 }
3045 return (UChar* )NULL;
3046 }
3047
3048 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)3049 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3050 OnigOptionType option)
3051 {
3052 int r;
3053 UChar *prev;
3054 OnigMatchArg msa;
3055
3056 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3057 start:
3058 THREAD_ATOMIC_START;
3059 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3060 ONIG_STATE_INC(reg);
3061 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3062 onig_chain_reduce(reg);
3063 ONIG_STATE_INC(reg);
3064 }
3065 }
3066 else {
3067 int n;
3068
3069 THREAD_ATOMIC_END;
3070 n = 0;
3071 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3072 if (++n > THREAD_PASS_LIMIT_COUNT)
3073 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3074 THREAD_PASS;
3075 }
3076 goto start;
3077 }
3078 THREAD_ATOMIC_END;
3079 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3080
3081 MATCH_ARG_INIT(msa, option, region, at);
3082 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3083 {
3084 int offset = at - str;
3085 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3086 }
3087 #endif
3088
3089 if (region
3090 #ifdef USE_POSIX_API_REGION_OPTION
3091 && !IS_POSIX_REGION(option)
3092 #endif
3093 ) {
3094 r = onig_region_resize_clear(region, reg->num_mem + 1);
3095 }
3096 else
3097 r = 0;
3098
3099 if (r == 0) {
3100 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3101 r = match_at(reg, str, end,
3102 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3103 end,
3104 #endif
3105 at, prev, &msa);
3106 }
3107
3108 MATCH_ARG_FREE(msa);
3109 ONIG_STATE_DEC_THREAD(reg);
3110 return r;
3111 }
3112
3113 static int
forward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)3114 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3115 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3116 {
3117 UChar *p, *pprev = (UChar* )NULL;
3118
3119 #ifdef ONIG_DEBUG_SEARCH
3120 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3121 (int )str, (int )end, (int )s, (int )range);
3122 #endif
3123
3124 p = s;
3125 if (reg->dmin > 0) {
3126 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3127 p += reg->dmin;
3128 }
3129 else {
3130 UChar *q = p + reg->dmin;
3131 while (p < q) p += enclen(reg->enc, p);
3132 }
3133 }
3134
3135 retry:
3136 switch (reg->optimize) {
3137 case ONIG_OPTIMIZE_EXACT:
3138 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3139 break;
3140 case ONIG_OPTIMIZE_EXACT_IC:
3141 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3142 reg->exact, reg->exact_end, p, end, range);
3143 break;
3144
3145 case ONIG_OPTIMIZE_EXACT_BM:
3146 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3147 break;
3148
3149 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3150 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3151 break;
3152
3153 case ONIG_OPTIMIZE_MAP:
3154 p = map_search(reg->enc, reg->map, p, range);
3155 break;
3156 }
3157
3158 if (p && p < range) {
3159 if (p - reg->dmin < s) {
3160 retry_gate:
3161 pprev = p;
3162 p += enclen(reg->enc, p);
3163 goto retry;
3164 }
3165
3166 if (reg->sub_anchor) {
3167 UChar* prev;
3168
3169 switch (reg->sub_anchor) {
3170 case ANCHOR_BEGIN_LINE:
3171 if (!ON_STR_BEGIN(p)) {
3172 prev = onigenc_get_prev_char_head(reg->enc,
3173 (pprev ? pprev : str), p);
3174 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3175 goto retry_gate;
3176 }
3177 break;
3178
3179 case ANCHOR_END_LINE:
3180 if (ON_STR_END(p)) {
3181 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3182 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3183 (pprev ? pprev : str), p);
3184 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3185 goto retry_gate;
3186 #endif
3187 }
3188 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3189 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3190 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3191 #endif
3192 )
3193 goto retry_gate;
3194 break;
3195 }
3196 }
3197
3198 if (reg->dmax == 0) {
3199 *low = p;
3200 if (low_prev) {
3201 if (*low > s)
3202 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3203 else
3204 *low_prev = onigenc_get_prev_char_head(reg->enc,
3205 (pprev ? pprev : str), p);
3206 }
3207 }
3208 else {
3209 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3210 *low = p - reg->dmax;
3211 if (*low > s) {
3212 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3213 *low, (const UChar** )low_prev);
3214 if (low_prev && IS_NULL(*low_prev))
3215 *low_prev = onigenc_get_prev_char_head(reg->enc,
3216 (pprev ? pprev : s), *low);
3217 }
3218 else {
3219 if (low_prev)
3220 *low_prev = onigenc_get_prev_char_head(reg->enc,
3221 (pprev ? pprev : str), *low);
3222 }
3223 }
3224 }
3225 /* no needs to adjust *high, *high is used as range check only */
3226 *high = p - reg->dmin;
3227
3228 #ifdef ONIG_DEBUG_SEARCH
3229 fprintf(stderr,
3230 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3231 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3232 #endif
3233 return 1; /* success */
3234 }
3235
3236 return 0; /* fail */
3237 }
3238
3239 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3240 int** skip));
3241
3242 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3243
3244 static int
backward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)3245 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3246 UChar* s, const UChar* range, UChar* adjrange,
3247 UChar** low, UChar** high)
3248 {
3249 int r;
3250 UChar *p;
3251
3252 range += reg->dmin;
3253 p = s;
3254
3255 retry:
3256 switch (reg->optimize) {
3257 case ONIG_OPTIMIZE_EXACT:
3258 exact_method:
3259 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3260 range, adjrange, end, p);
3261 break;
3262
3263 case ONIG_OPTIMIZE_EXACT_IC:
3264 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3265 reg->exact, reg->exact_end,
3266 range, adjrange, end, p);
3267 break;
3268
3269 case ONIG_OPTIMIZE_EXACT_BM:
3270 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3271 if (IS_NULL(reg->int_map_backward)) {
3272 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3273 goto exact_method;
3274
3275 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3276 &(reg->int_map_backward));
3277 if (r) return r;
3278 }
3279 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3280 end, p);
3281 break;
3282
3283 case ONIG_OPTIMIZE_MAP:
3284 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3285 break;
3286 }
3287
3288 if (p) {
3289 if (reg->sub_anchor) {
3290 UChar* prev;
3291
3292 switch (reg->sub_anchor) {
3293 case ANCHOR_BEGIN_LINE:
3294 if (!ON_STR_BEGIN(p)) {
3295 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3296 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3297 p = prev;
3298 goto retry;
3299 }
3300 }
3301 break;
3302
3303 case ANCHOR_END_LINE:
3304 if (ON_STR_END(p)) {
3305 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3306 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3307 if (IS_NULL(prev)) goto fail;
3308 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3309 p = prev;
3310 goto retry;
3311 }
3312 #endif
3313 }
3314 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3315 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3316 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3317 #endif
3318 ) {
3319 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3320 if (IS_NULL(p)) goto fail;
3321 goto retry;
3322 }
3323 break;
3324 }
3325 }
3326
3327 /* no needs to adjust *high, *high is used as range check only */
3328 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3329 *low = p - reg->dmax;
3330 *high = p - reg->dmin;
3331 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3332 }
3333
3334 #ifdef ONIG_DEBUG_SEARCH
3335 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3336 (int )(*low - str), (int )(*high - str));
3337 #endif
3338 return 1; /* success */
3339 }
3340
3341 fail:
3342 #ifdef ONIG_DEBUG_SEARCH
3343 fprintf(stderr, "backward_search_range: fail.\n");
3344 #endif
3345 return 0; /* fail */
3346 }
3347
3348
3349 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)3350 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3351 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3352 {
3353 int r;
3354 UChar *s, *prev;
3355 OnigMatchArg msa;
3356 const UChar *orig_start = start;
3357 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3358 const UChar *orig_range = range;
3359 #endif
3360
3361 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3362 start:
3363 THREAD_ATOMIC_START;
3364 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3365 ONIG_STATE_INC(reg);
3366 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3367 onig_chain_reduce(reg);
3368 ONIG_STATE_INC(reg);
3369 }
3370 }
3371 else {
3372 int n;
3373
3374 THREAD_ATOMIC_END;
3375 n = 0;
3376 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3377 if (++n > THREAD_PASS_LIMIT_COUNT)
3378 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3379 THREAD_PASS;
3380 }
3381 goto start;
3382 }
3383 THREAD_ATOMIC_END;
3384 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3385
3386 #ifdef ONIG_DEBUG_SEARCH
3387 fprintf(stderr,
3388 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3389 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3390 #endif
3391
3392 if (region
3393 #ifdef USE_POSIX_API_REGION_OPTION
3394 && !IS_POSIX_REGION(option)
3395 #endif
3396 ) {
3397 r = onig_region_resize_clear(region, reg->num_mem + 1);
3398 if (r) goto finish_no_msa;
3399 }
3400
3401 if (start > end || start < str) goto mismatch_no_msa;
3402
3403
3404 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3405 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3406 #define MATCH_AND_RETURN_CHECK(upper_range) \
3407 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3408 if (r != ONIG_MISMATCH) {\
3409 if (r >= 0) {\
3410 if (! IS_FIND_LONGEST(reg->options)) {\
3411 goto match;\
3412 }\
3413 }\
3414 else goto finish; /* error */ \
3415 }
3416 #else
3417 #define MATCH_AND_RETURN_CHECK(upper_range) \
3418 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3419 if (r != ONIG_MISMATCH) {\
3420 if (r >= 0) {\
3421 goto match;\
3422 }\
3423 else goto finish; /* error */ \
3424 }
3425 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3426 #else
3427 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3428 #define MATCH_AND_RETURN_CHECK(none) \
3429 r = match_at(reg, str, end, s, prev, &msa);\
3430 if (r != ONIG_MISMATCH) {\
3431 if (r >= 0) {\
3432 if (! IS_FIND_LONGEST(reg->options)) {\
3433 goto match;\
3434 }\
3435 }\
3436 else goto finish; /* error */ \
3437 }
3438 #else
3439 #define MATCH_AND_RETURN_CHECK(none) \
3440 r = match_at(reg, str, end, s, prev, &msa);\
3441 if (r != ONIG_MISMATCH) {\
3442 if (r >= 0) {\
3443 goto match;\
3444 }\
3445 else goto finish; /* error */ \
3446 }
3447 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3448 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3449
3450
3451 /* anchor optimize: resume search range */
3452 if (reg->anchor != 0 && str < end) {
3453 UChar *min_semi_end, *max_semi_end;
3454
3455 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3456 /* search start-position only */
3457 begin_position:
3458 if (range > start)
3459 range = start + 1;
3460 else
3461 range = start;
3462 }
3463 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3464 /* search str-position only */
3465 if (range > start) {
3466 if (start != str) goto mismatch_no_msa;
3467 range = str + 1;
3468 }
3469 else {
3470 if (range <= str) {
3471 start = str;
3472 range = str;
3473 }
3474 else
3475 goto mismatch_no_msa;
3476 }
3477 }
3478 else if (reg->anchor & ANCHOR_END_BUF) {
3479 min_semi_end = max_semi_end = (UChar* )end;
3480
3481 end_buf:
3482 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3483 goto mismatch_no_msa;
3484
3485 if (range > start) {
3486 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3487 start = min_semi_end - reg->anchor_dmax;
3488 if (start < end)
3489 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3490 else { /* match with empty at end */
3491 start = onigenc_get_prev_char_head(reg->enc, str, end);
3492 }
3493 }
3494 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3495 range = max_semi_end - reg->anchor_dmin + 1;
3496 }
3497
3498 if (start >= range) goto mismatch_no_msa;
3499 }
3500 else {
3501 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3502 range = min_semi_end - reg->anchor_dmax;
3503 }
3504 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3505 start = max_semi_end - reg->anchor_dmin;
3506 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3507 }
3508 if (range > start) goto mismatch_no_msa;
3509 }
3510 }
3511 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3512 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3513
3514 max_semi_end = (UChar* )end;
3515 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3516 min_semi_end = pre_end;
3517
3518 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3519 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3520 if (IS_NOT_NULL(pre_end) &&
3521 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3522 min_semi_end = pre_end;
3523 }
3524 #endif
3525 if (min_semi_end > str && start <= min_semi_end) {
3526 goto end_buf;
3527 }
3528 }
3529 else {
3530 min_semi_end = (UChar* )end;
3531 goto end_buf;
3532 }
3533 }
3534 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3535 goto begin_position;
3536 }
3537 }
3538 else if (str == end) { /* empty string */
3539 static const UChar* address_for_empty_string = (UChar* )"";
3540
3541 #ifdef ONIG_DEBUG_SEARCH
3542 fprintf(stderr, "onig_search: empty string.\n");
3543 #endif
3544
3545 if (reg->threshold_len == 0) {
3546 start = end = str = address_for_empty_string;
3547 s = (UChar* )start;
3548 prev = (UChar* )NULL;
3549
3550 MATCH_ARG_INIT(msa, option, region, start);
3551 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3552 msa.state_check_buff = (void* )0;
3553 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
3554 #endif
3555 MATCH_AND_RETURN_CHECK(end);
3556 goto mismatch;
3557 }
3558 goto mismatch_no_msa;
3559 }
3560
3561 #ifdef ONIG_DEBUG_SEARCH
3562 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3563 (int )(end - str), (int )(start - str), (int )(range - str));
3564 #endif
3565
3566 MATCH_ARG_INIT(msa, option, region, orig_start);
3567 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3568 {
3569 int offset = (MIN(start, range) - str);
3570 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3571 }
3572 #endif
3573
3574 s = (UChar* )start;
3575 if (range > start) { /* forward search */
3576 if (s > str)
3577 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3578 else
3579 prev = (UChar* )NULL;
3580
3581 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3582 UChar *sch_range, *low, *high, *low_prev;
3583
3584 sch_range = (UChar* )range;
3585 if (reg->dmax != 0) {
3586 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3587 sch_range = (UChar* )end;
3588 else {
3589 sch_range += reg->dmax;
3590 if (sch_range > end) sch_range = (UChar* )end;
3591 }
3592 }
3593
3594 if ((end - start) < reg->threshold_len)
3595 goto mismatch;
3596
3597 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3598 do {
3599 if (! forward_search_range(reg, str, end, s, sch_range,
3600 &low, &high, &low_prev)) goto mismatch;
3601 if (s < low) {
3602 s = low;
3603 prev = low_prev;
3604 }
3605 while (s <= high) {
3606 MATCH_AND_RETURN_CHECK(orig_range);
3607 prev = s;
3608 s += enclen(reg->enc, s);
3609 }
3610 } while (s < range);
3611 goto mismatch;
3612 }
3613 else { /* check only. */
3614 if (! forward_search_range(reg, str, end, s, sch_range,
3615 &low, &high, (UChar** )NULL)) goto mismatch;
3616
3617 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3618 do {
3619 MATCH_AND_RETURN_CHECK(orig_range);
3620 prev = s;
3621 s += enclen(reg->enc, s);
3622
3623 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3624 prev = s;
3625 s += enclen(reg->enc, s);
3626 }
3627 } while (s < range);
3628 goto mismatch;
3629 }
3630 }
3631 }
3632
3633 do {
3634 MATCH_AND_RETURN_CHECK(orig_range);
3635 prev = s;
3636 s += enclen(reg->enc, s);
3637 } while (s < range);
3638
3639 if (s == range) { /* because empty match with /$/. */
3640 MATCH_AND_RETURN_CHECK(orig_range);
3641 }
3642 }
3643 else { /* backward search */
3644 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3645 if (orig_start < end)
3646 orig_start += enclen(reg->enc, orig_start); /* is upper range */
3647 #endif
3648
3649 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3650 UChar *low, *high, *adjrange, *sch_start;
3651
3652 if (range < end)
3653 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3654 else
3655 adjrange = (UChar* )end;
3656
3657 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3658 (end - range) >= reg->threshold_len) {
3659 do {
3660 sch_start = s + reg->dmax;
3661 if (sch_start > end) sch_start = (UChar* )end;
3662 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3663 &low, &high) <= 0)
3664 goto mismatch;
3665
3666 if (s > high)
3667 s = high;
3668
3669 while (s >= low) {
3670 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3671 MATCH_AND_RETURN_CHECK(orig_start);
3672 s = prev;
3673 }
3674 } while (s >= range);
3675 goto mismatch;
3676 }
3677 else { /* check only. */
3678 if ((end - range) < reg->threshold_len) goto mismatch;
3679
3680 sch_start = s;
3681 if (reg->dmax != 0) {
3682 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3683 sch_start = (UChar* )end;
3684 else {
3685 sch_start += reg->dmax;
3686 if (sch_start > end) sch_start = (UChar* )end;
3687 else
3688 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3689 start, sch_start);
3690 }
3691 }
3692 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3693 &low, &high) <= 0) goto mismatch;
3694 }
3695 }
3696
3697 do {
3698 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3699 MATCH_AND_RETURN_CHECK(orig_start);
3700 s = prev;
3701 } while (s >= range);
3702 }
3703
3704 mismatch:
3705 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3706 if (IS_FIND_LONGEST(reg->options)) {
3707 if (msa.best_len >= 0) {
3708 s = msa.best_s;
3709 goto match;
3710 }
3711 }
3712 #endif
3713 r = ONIG_MISMATCH;
3714
3715 finish:
3716 MATCH_ARG_FREE(msa);
3717 ONIG_STATE_DEC_THREAD(reg);
3718
3719 /* If result is mismatch and no FIND_NOT_EMPTY option,
3720 then the region is not setted in match_at(). */
3721 if (IS_FIND_NOT_EMPTY(reg->options) && region
3722 #ifdef USE_POSIX_API_REGION_OPTION
3723 && !IS_POSIX_REGION(option)
3724 #endif
3725 ) {
3726 onig_region_clear(region);
3727 }
3728
3729 #ifdef ONIG_DEBUG
3730 if (r != ONIG_MISMATCH)
3731 fprintf(stderr, "onig_search: error %d\n", r);
3732 #endif
3733 return r;
3734
3735 mismatch_no_msa:
3736 r = ONIG_MISMATCH;
3737 finish_no_msa:
3738 ONIG_STATE_DEC_THREAD(reg);
3739 #ifdef ONIG_DEBUG
3740 if (r != ONIG_MISMATCH)
3741 fprintf(stderr, "onig_search: error %d\n", r);
3742 #endif
3743 return r;
3744
3745 match:
3746 ONIG_STATE_DEC_THREAD(reg);
3747 MATCH_ARG_FREE(msa);
3748 return s - str;
3749 }
3750
3751 extern OnigEncoding
onig_get_encoding(regex_t * reg)3752 onig_get_encoding(regex_t* reg)
3753 {
3754 return reg->enc;
3755 }
3756
3757 extern OnigOptionType
onig_get_options(regex_t * reg)3758 onig_get_options(regex_t* reg)
3759 {
3760 return reg->options;
3761 }
3762
3763 extern OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)3764 onig_get_case_fold_flag(regex_t* reg)
3765 {
3766 return reg->case_fold_flag;
3767 }
3768
3769 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)3770 onig_get_syntax(regex_t* reg)
3771 {
3772 return reg->syntax;
3773 }
3774
3775 extern int
onig_number_of_captures(regex_t * reg)3776 onig_number_of_captures(regex_t* reg)
3777 {
3778 return reg->num_mem;
3779 }
3780
3781 extern int
onig_number_of_capture_histories(regex_t * reg)3782 onig_number_of_capture_histories(regex_t* reg)
3783 {
3784 #ifdef USE_CAPTURE_HISTORY
3785 int i, n;
3786
3787 n = 0;
3788 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3789 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3790 n++;
3791 }
3792 return n;
3793 #else
3794 return 0;
3795 #endif
3796 }
3797
3798 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)3799 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3800 {
3801 *to = *from;
3802 }
3803
3804