1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
33
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39
40 #ifdef USE_CAPTURE_HISTORY
41 static void history_tree_free(OnigCaptureTreeNode* node);
42
43 static void
history_tree_clear(OnigCaptureTreeNode * node)44 history_tree_clear(OnigCaptureTreeNode* node)
45 {
46 int i;
47
48 if (IS_NOT_NULL(node)) {
49 for (i = 0; i < node->num_childs; i++) {
50 if (IS_NOT_NULL(node->childs[i])) {
51 history_tree_free(node->childs[i]);
52 }
53 }
54 for (i = 0; i < node->allocated; i++) {
55 node->childs[i] = (OnigCaptureTreeNode* )0;
56 }
57 node->num_childs = 0;
58 node->beg = ONIG_REGION_NOTPOS;
59 node->end = ONIG_REGION_NOTPOS;
60 node->group = -1;
61 }
62 }
63
64 static void
history_tree_free(OnigCaptureTreeNode * node)65 history_tree_free(OnigCaptureTreeNode* node)
66 {
67 history_tree_clear(node);
68 xfree(node);
69 }
70
71 static void
history_root_free(OnigRegion * r)72 history_root_free(OnigRegion* r)
73 {
74 if (IS_NOT_NULL(r->history_root)) {
75 history_tree_free(r->history_root);
76 r->history_root = (OnigCaptureTreeNode* )0;
77 }
78 }
79
80 static OnigCaptureTreeNode*
history_node_new(void)81 history_node_new(void)
82 {
83 OnigCaptureTreeNode* node;
84
85 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
86 CHECK_NULL_RETURN(node);
87 node->childs = (OnigCaptureTreeNode** )0;
88 node->allocated = 0;
89 node->num_childs = 0;
90 node->group = -1;
91 node->beg = ONIG_REGION_NOTPOS;
92 node->end = ONIG_REGION_NOTPOS;
93
94 return node;
95 }
96
97 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)98 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
99 {
100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
101
102 if (parent->num_childs >= parent->allocated) {
103 int n, i;
104
105 if (IS_NULL(parent->childs)) {
106 n = HISTORY_TREE_INIT_ALLOC_SIZE;
107 parent->childs =
108 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
109 }
110 else {
111 n = parent->allocated * 2;
112 parent->childs =
113 (OnigCaptureTreeNode** )xrealloc(parent->childs,
114 sizeof(OnigCaptureTreeNode*) * n);
115 }
116 CHECK_NULL_RETURN_MEMERR(parent->childs);
117 for (i = parent->allocated; i < n; i++) {
118 parent->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 parent->allocated = n;
121 }
122
123 parent->childs[parent->num_childs] = child;
124 parent->num_childs++;
125 return 0;
126 }
127
128 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)129 history_tree_clone(OnigCaptureTreeNode* node)
130 {
131 int i;
132 OnigCaptureTreeNode *clone, *child;
133
134 clone = history_node_new();
135 CHECK_NULL_RETURN(clone);
136
137 clone->beg = node->beg;
138 clone->end = node->end;
139 for (i = 0; i < node->num_childs; i++) {
140 child = history_tree_clone(node->childs[i]);
141 if (IS_NULL(child)) {
142 history_tree_free(clone);
143 return (OnigCaptureTreeNode* )0;
144 }
145 history_tree_add_child(clone, child);
146 }
147
148 return clone;
149 }
150
151 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)152 onig_get_capture_tree(OnigRegion* region)
153 {
154 return region->history_root;
155 }
156 #endif /* USE_CAPTURE_HISTORY */
157
158 extern void
onig_region_clear(OnigRegion * region)159 onig_region_clear(OnigRegion* region)
160 {
161 int i;
162
163 for (i = 0; i < region->num_regs; i++) {
164 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
165 }
166 #ifdef USE_CAPTURE_HISTORY
167 history_root_free(region);
168 #endif
169 }
170
171 extern int
onig_region_resize(OnigRegion * region,int n)172 onig_region_resize(OnigRegion* region, int n)
173 {
174 region->num_regs = n;
175
176 if (n < ONIG_NREGION)
177 n = ONIG_NREGION;
178
179 if (region->allocated == 0) {
180 region->beg = (int* )xmalloc(n * sizeof(int));
181 region->end = (int* )xmalloc(n * sizeof(int));
182
183 if (region->beg == 0 || region->end == 0)
184 return ONIGERR_MEMORY;
185
186 region->allocated = n;
187 }
188 else if (region->allocated < n) {
189 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
190 region->end = (int* )xrealloc(region->end, n * sizeof(int));
191
192 if (region->beg == 0 || region->end == 0)
193 return ONIGERR_MEMORY;
194
195 region->allocated = n;
196 }
197
198 return 0;
199 }
200
201 static int
onig_region_resize_clear(OnigRegion * region,int n)202 onig_region_resize_clear(OnigRegion* region, int n)
203 {
204 int r;
205
206 r = onig_region_resize(region, n);
207 if (r != 0) return r;
208 onig_region_clear(region);
209 return 0;
210 }
211
212 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)213 onig_region_set(OnigRegion* region, int at, int beg, int end)
214 {
215 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
216
217 if (at >= region->allocated) {
218 int r = onig_region_resize(region, at + 1);
219 if (r < 0) return r;
220 }
221
222 region->beg[at] = beg;
223 region->end[at] = end;
224 return 0;
225 }
226
227 extern void
onig_region_init(OnigRegion * region)228 onig_region_init(OnigRegion* region)
229 {
230 region->num_regs = 0;
231 region->allocated = 0;
232 region->beg = (int* )0;
233 region->end = (int* )0;
234 region->history_root = (OnigCaptureTreeNode* )0;
235 }
236
237 extern OnigRegion*
onig_region_new(void)238 onig_region_new(void)
239 {
240 OnigRegion* r;
241
242 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
243 onig_region_init(r);
244 return r;
245 }
246
247 extern void
onig_region_free(OnigRegion * r,int free_self)248 onig_region_free(OnigRegion* r, int free_self)
249 {
250 if (r) {
251 if (r->allocated > 0) {
252 if (r->beg) xfree(r->beg);
253 if (r->end) xfree(r->end);
254 r->allocated = 0;
255 }
256 #ifdef USE_CAPTURE_HISTORY
257 history_root_free(r);
258 #endif
259 if (free_self) xfree(r);
260 }
261 }
262
263 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)264 onig_region_copy(OnigRegion* to, OnigRegion* from)
265 {
266 #define RREGC_SIZE (sizeof(int) * from->num_regs)
267 int i;
268
269 if (to == from) return;
270
271 if (to->allocated == 0) {
272 if (from->num_regs > 0) {
273 to->beg = (int* )xmalloc(RREGC_SIZE);
274 to->end = (int* )xmalloc(RREGC_SIZE);
275 to->allocated = from->num_regs;
276 }
277 }
278 else if (to->allocated < from->num_regs) {
279 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
280 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
281 to->allocated = from->num_regs;
282 }
283
284 for (i = 0; i < from->num_regs; i++) {
285 to->beg[i] = from->beg[i];
286 to->end[i] = from->end[i];
287 }
288 to->num_regs = from->num_regs;
289
290 #ifdef USE_CAPTURE_HISTORY
291 history_root_free(to);
292
293 if (IS_NOT_NULL(from->history_root)) {
294 to->history_root = history_tree_clone(from->history_root);
295 }
296 #endif
297 }
298
299
300 /** stack **/
301 #define INVALID_STACK_INDEX -1
302
303 /* stack type */
304 /* used by normal-POP */
305 #define STK_ALT 0x0001
306 #define STK_LOOK_BEHIND_NOT 0x0002
307 #define STK_POS_NOT 0x0003
308 /* handled by normal-POP */
309 #define STK_MEM_START 0x0100
310 #define STK_MEM_END 0x8200
311 #define STK_REPEAT_INC 0x0300
312 #define STK_STATE_CHECK_MARK 0x1000
313 /* avoided by normal-POP */
314 #define STK_NULL_CHECK_START 0x3000
315 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
316 #define STK_MEM_END_MARK 0x8400
317 #define STK_POS 0x0500 /* used when POP-POS */
318 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
319 #define STK_REPEAT 0x0700
320 #define STK_CALL_FRAME 0x0800
321 #define STK_RETURN 0x0900
322 #define STK_VOID 0x0a00 /* for fill a blank */
323
324 /* stack type check mask */
325 #define STK_MASK_POP_USED 0x00ff
326 #define STK_MASK_TO_VOID_TARGET 0x10ff
327 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
328
329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
330 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
331 (msa).stack_p = (void* )0;\
332 (msa).options = (arg_option);\
333 (msa).region = (arg_region);\
334 (msa).start = (arg_start);\
335 (msa).best_len = ONIG_MISMATCH;\
336 (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\
337 } while(0)
338 #else
339 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
340 (msa).stack_p = (void* )0;\
341 (msa).options = (arg_option);\
342 (msa).region = (arg_region);\
343 (msa).start = (arg_start);\
344 (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\
345 } while(0)
346 #endif
347
348 #ifdef USE_COMBINATION_EXPLOSION_CHECK
349
350 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
351
352 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
353 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
354 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
355 offset = ((offset) * (state_num)) >> 3;\
356 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
357 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
358 (msa).state_check_buff = (void* )xmalloc(size);\
359 else \
360 (msa).state_check_buff = (void* )xalloca(size);\
361 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
362 (size_t )(size - (offset))); \
363 (msa).state_check_buff_size = size;\
364 }\
365 else {\
366 (msa).state_check_buff = (void* )0;\
367 (msa).state_check_buff_size = 0;\
368 }\
369 }\
370 else {\
371 (msa).state_check_buff = (void* )0;\
372 (msa).state_check_buff_size = 0;\
373 }\
374 } while(0)
375
376 #define MATCH_ARG_FREE(msa) do {\
377 if ((msa).stack_p) xfree((msa).stack_p);\
378 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
379 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
380 }\
381 } while(0)
382 #else
383 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
384 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
385 #endif
386
387
388 #define ALLOCA_PTR_NUM_LIMIT 50
389
390 #define STACK_INIT(stack_num) do {\
391 if (msa->stack_p) {\
392 is_alloca = 0;\
393 alloc_base = msa->stack_p;\
394 stk_base = (OnigStackType* )(alloc_base\
395 + (sizeof(OnigStackIndex) * msa->ptr_num));\
396 stk = stk_base;\
397 stk_end = stk_base + msa->stack_n;\
398 }\
399 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
400 is_alloca = 0;\
401 alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\
402 + sizeof(OnigStackType) * (stack_num));\
403 stk_base = (OnigStackType* )(alloc_base\
404 + (sizeof(OnigStackIndex) * msa->ptr_num));\
405 stk = stk_base;\
406 stk_end = stk_base + (stack_num);\
407 }\
408 else {\
409 is_alloca = 1;\
410 alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\
411 + sizeof(OnigStackType) * (stack_num));\
412 stk_base = (OnigStackType* )(alloc_base\
413 + (sizeof(OnigStackIndex) * msa->ptr_num));\
414 stk = stk_base;\
415 stk_end = stk_base + (stack_num);\
416 }\
417 } while(0);
418
419
420 #define STACK_SAVE do{\
421 msa->stack_n = stk_end - stk_base;\
422 if (is_alloca != 0) {\
423 size_t size = sizeof(OnigStackIndex) * msa->ptr_num \
424 + sizeof(OnigStackType) * msa->stack_n;\
425 msa->stack_p = xmalloc(size);\
426 xmemcpy(msa->stack_p, alloc_base, size);\
427 }\
428 else {\
429 msa->stack_p = alloc_base;\
430 };\
431 } while(0)
432
433 #define UPDATE_FOR_STACK_REALLOC do{\
434 repeat_stk = (OnigStackIndex* )alloc_base;\
435 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\
436 mem_end_stk = mem_start_stk + num_mem;\
437 mem_start_stk--; /* for index start from 1 */\
438 mem_end_stk--; /* for index start from 1 */\
439 } while(0)
440
441 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
442
443 extern unsigned int
onig_get_match_stack_limit_size(void)444 onig_get_match_stack_limit_size(void)
445 {
446 return MatchStackLimitSize;
447 }
448
449 extern int
onig_set_match_stack_limit_size(unsigned int size)450 onig_set_match_stack_limit_size(unsigned int size)
451 {
452 MatchStackLimitSize = size;
453 return 0;
454 }
455
456 static int
stack_double(int is_alloca,char ** arg_alloc_base,OnigStackType ** arg_stk_base,OnigStackType ** arg_stk_end,OnigStackType ** arg_stk,OnigMatchArg * msa)457 stack_double(int is_alloca, char** arg_alloc_base,
458 OnigStackType** arg_stk_base,
459 OnigStackType** arg_stk_end, OnigStackType** arg_stk,
460 OnigMatchArg* msa)
461 {
462 unsigned int n;
463 int used;
464 size_t size;
465 size_t new_size;
466 char* alloc_base;
467 char* new_alloc_base;
468 OnigStackType *stk_base, *stk_end, *stk;
469
470 alloc_base = *arg_alloc_base;
471 stk_base = *arg_stk_base;
472 stk_end = *arg_stk_end;
473 stk = *arg_stk;
474
475 n = stk_end - stk_base;
476 size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
477 n *= 2;
478 new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
479 if (is_alloca != 0) {
480 new_alloc_base = (char* )xmalloc(new_size);
481 if (IS_NULL(new_alloc_base)) {
482 STACK_SAVE;
483 return ONIGERR_MEMORY;
484 }
485 xmemcpy(new_alloc_base, alloc_base, size);
486 }
487 else {
488 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
489 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
490 return ONIGERR_MATCH_STACK_LIMIT_OVER;
491 else
492 n = MatchStackLimitSize;
493 }
494 new_alloc_base = (char* )xrealloc(alloc_base, new_size);
495 if (IS_NULL(new_alloc_base)) {
496 STACK_SAVE;
497 return ONIGERR_MEMORY;
498 }
499 }
500
501 alloc_base = new_alloc_base;
502 used = stk - stk_base;
503 *arg_alloc_base = alloc_base;
504 *arg_stk_base = (OnigStackType* )(alloc_base
505 + (sizeof(OnigStackIndex) * msa->ptr_num));
506 *arg_stk = *arg_stk_base + used;
507 *arg_stk_end = *arg_stk_base + n;
508 return 0;
509 }
510
511 #define STACK_ENSURE(n) do {\
512 if (stk_end - stk < (n)) {\
513 int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk,\
514 msa);\
515 if (r != 0) { STACK_SAVE; return r; } \
516 is_alloca = 0;\
517 UPDATE_FOR_STACK_REALLOC;\
518 }\
519 } while(0)
520
521 #define STACK_AT(index) (stk_base + (index))
522 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
523
524 #define STACK_PUSH_TYPE(stack_type) do {\
525 STACK_ENSURE(1);\
526 stk->type = (stack_type);\
527 STACK_INC;\
528 } while(0)
529
530 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
531
532 #ifdef USE_COMBINATION_EXPLOSION_CHECK
533 #define STATE_CHECK_POS(s,snum) \
534 (((s) - str) * num_comb_exp_check + ((snum) - 1))
535 #define STATE_CHECK_VAL(v,snum) do {\
536 if (state_check_buff != NULL) {\
537 int x = STATE_CHECK_POS(s,snum);\
538 (v) = state_check_buff[x/8] & (1<<(x%8));\
539 }\
540 else (v) = 0;\
541 } while(0)
542
543
544 #define ELSE_IF_STATE_CHECK_MARK(stk) \
545 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
546 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
547 state_check_buff[x/8] |= (1<<(x%8)); \
548 }
549
550 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
551 STACK_ENSURE(1);\
552 stk->type = (stack_type);\
553 stk->u.state.pcode = (pat);\
554 stk->u.state.pstr = (s);\
555 stk->u.state.pstr_prev = (sprev);\
556 stk->u.state.state_check = 0;\
557 STACK_INC;\
558 } while(0)
559
560 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
561 stk->type = (stack_type);\
562 stk->u.state.pcode = (pat);\
563 stk->u.state.state_check = 0;\
564 STACK_INC;\
565 } while(0)
566
567 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
568 STACK_ENSURE(1);\
569 stk->type = STK_ALT;\
570 stk->u.state.pcode = (pat);\
571 stk->u.state.pstr = (s);\
572 stk->u.state.pstr_prev = (sprev);\
573 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
574 STACK_INC;\
575 } while(0)
576
577 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
578 if (state_check_buff != NULL) {\
579 STACK_ENSURE(1);\
580 stk->type = STK_STATE_CHECK_MARK;\
581 stk->u.state.pstr = (s);\
582 stk->u.state.state_check = (snum);\
583 STACK_INC;\
584 }\
585 } while(0)
586
587 #else /* USE_COMBINATION_EXPLOSION_CHECK */
588
589 #define ELSE_IF_STATE_CHECK_MARK(stk)
590
591 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
592 STACK_ENSURE(1);\
593 stk->type = (stack_type);\
594 stk->u.state.pcode = (pat);\
595 stk->u.state.pstr = (s);\
596 stk->u.state.pstr_prev = (sprev);\
597 STACK_INC;\
598 } while(0)
599
600 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
601 stk->type = (stack_type);\
602 stk->u.state.pcode = (pat);\
603 STACK_INC;\
604 } while(0)
605 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
606
607 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
608 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
609 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
610 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
611 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
612 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
613
614 #define STACK_PUSH_REPEAT(id, pat) do {\
615 STACK_ENSURE(1);\
616 stk->type = STK_REPEAT;\
617 stk->u.repeat.num = (id);\
618 stk->u.repeat.pcode = (pat);\
619 stk->u.repeat.count = 0;\
620 STACK_INC;\
621 } while(0)
622
623 #define STACK_PUSH_REPEAT_INC(sindex) do {\
624 STACK_ENSURE(1);\
625 stk->type = STK_REPEAT_INC;\
626 stk->u.repeat_inc.si = (sindex);\
627 STACK_INC;\
628 } while(0)
629
630 #define STACK_PUSH_MEM_START(mnum, s) do {\
631 STACK_ENSURE(1);\
632 stk->type = STK_MEM_START;\
633 stk->u.mem.num = (mnum);\
634 stk->u.mem.pstr = (s);\
635 stk->u.mem.start = mem_start_stk[mnum];\
636 stk->u.mem.end = mem_end_stk[mnum];\
637 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
638 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
639 STACK_INC;\
640 } while(0)
641
642 #define STACK_PUSH_MEM_END(mnum, s) do {\
643 STACK_ENSURE(1);\
644 stk->type = STK_MEM_END;\
645 stk->u.mem.num = (mnum);\
646 stk->u.mem.pstr = (s);\
647 stk->u.mem.start = mem_start_stk[mnum];\
648 stk->u.mem.end = mem_end_stk[mnum];\
649 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
650 STACK_INC;\
651 } while(0)
652
653 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
654 STACK_ENSURE(1);\
655 stk->type = STK_MEM_END_MARK;\
656 stk->u.mem.num = (mnum);\
657 STACK_INC;\
658 } while(0)
659
660 #define STACK_GET_MEM_START(mnum, k) do {\
661 int level = 0;\
662 k = stk;\
663 while (k > stk_base) {\
664 k--;\
665 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
666 && k->u.mem.num == (mnum)) {\
667 level++;\
668 }\
669 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
670 if (level == 0) break;\
671 level--;\
672 }\
673 }\
674 } while(0)
675
676 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
677 int level = 0;\
678 while (k < stk) {\
679 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
680 if (level == 0) (start) = k->u.mem.pstr;\
681 level++;\
682 }\
683 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
684 level--;\
685 if (level == 0) {\
686 (end) = k->u.mem.pstr;\
687 break;\
688 }\
689 }\
690 k++;\
691 }\
692 } while(0)
693
694 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
695 STACK_ENSURE(1);\
696 stk->type = STK_NULL_CHECK_START;\
697 stk->u.null_check.num = (cnum);\
698 stk->u.null_check.pstr = (s);\
699 STACK_INC;\
700 } while(0)
701
702 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
703 STACK_ENSURE(1);\
704 stk->type = STK_NULL_CHECK_END;\
705 stk->u.null_check.num = (cnum);\
706 STACK_INC;\
707 } while(0)
708
709 #define STACK_PUSH_CALL_FRAME(pat) do {\
710 STACK_ENSURE(1);\
711 stk->type = STK_CALL_FRAME;\
712 stk->u.call_frame.ret_addr = (pat);\
713 STACK_INC;\
714 } while(0)
715
716 #define STACK_PUSH_RETURN do {\
717 STACK_ENSURE(1);\
718 stk->type = STK_RETURN;\
719 STACK_INC;\
720 } while(0)
721
722
723 #ifdef ONIG_DEBUG
724 #define STACK_BASE_CHECK(p, at) \
725 if ((p) < stk_base) {\
726 fprintf(stderr, "at %s\n", at);\
727 goto stack_error;\
728 }
729 #else
730 #define STACK_BASE_CHECK(p, at)
731 #endif
732
733 #define STACK_POP_ONE do {\
734 stk--;\
735 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
736 } while(0)
737
738 #define STACK_POP do {\
739 switch (pop_level) {\
740 case STACK_POP_LEVEL_FREE:\
741 while (1) {\
742 stk--;\
743 STACK_BASE_CHECK(stk, "STACK_POP"); \
744 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
745 ELSE_IF_STATE_CHECK_MARK(stk);\
746 }\
747 break;\
748 case STACK_POP_LEVEL_MEM_START:\
749 while (1) {\
750 stk--;\
751 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
752 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
753 else if (stk->type == STK_MEM_START) {\
754 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
755 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
756 }\
757 ELSE_IF_STATE_CHECK_MARK(stk);\
758 }\
759 break;\
760 default:\
761 while (1) {\
762 stk--;\
763 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
764 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
765 else if (stk->type == STK_MEM_START) {\
766 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
767 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
768 }\
769 else if (stk->type == STK_REPEAT_INC) {\
770 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
771 }\
772 else if (stk->type == STK_MEM_END) {\
773 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
774 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
775 }\
776 ELSE_IF_STATE_CHECK_MARK(stk);\
777 }\
778 break;\
779 }\
780 } while(0)
781
782 #define STACK_POP_TIL_POS_NOT do {\
783 while (1) {\
784 stk--;\
785 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
786 if (stk->type == STK_POS_NOT) break;\
787 else if (stk->type == STK_MEM_START) {\
788 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
789 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
790 }\
791 else if (stk->type == STK_REPEAT_INC) {\
792 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
793 }\
794 else if (stk->type == STK_MEM_END) {\
795 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
796 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
797 }\
798 ELSE_IF_STATE_CHECK_MARK(stk);\
799 }\
800 } while(0)
801
802 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
803 while (1) {\
804 stk--;\
805 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
806 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
807 else if (stk->type == STK_MEM_START) {\
808 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
809 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
810 }\
811 else if (stk->type == STK_REPEAT_INC) {\
812 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
813 }\
814 else if (stk->type == STK_MEM_END) {\
815 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
816 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
817 }\
818 ELSE_IF_STATE_CHECK_MARK(stk);\
819 }\
820 } while(0)
821
822 #define STACK_POS_END(k) do {\
823 k = stk;\
824 while (1) {\
825 k--;\
826 STACK_BASE_CHECK(k, "STACK_POS_END"); \
827 if (IS_TO_VOID_TARGET(k)) {\
828 k->type = STK_VOID;\
829 }\
830 else if (k->type == STK_POS) {\
831 k->type = STK_VOID;\
832 break;\
833 }\
834 }\
835 } while(0)
836
837 #define STACK_STOP_BT_END do {\
838 OnigStackType *k = stk;\
839 while (1) {\
840 k--;\
841 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
842 if (IS_TO_VOID_TARGET(k)) {\
843 k->type = STK_VOID;\
844 }\
845 else if (k->type == STK_STOP_BT) {\
846 k->type = STK_VOID;\
847 break;\
848 }\
849 }\
850 } while(0)
851
852 #define STACK_NULL_CHECK(isnull,id,s) do {\
853 OnigStackType* k = stk;\
854 while (1) {\
855 k--;\
856 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
857 if (k->type == STK_NULL_CHECK_START) {\
858 if (k->u.null_check.num == (id)) {\
859 (isnull) = (k->u.null_check.pstr == (s));\
860 break;\
861 }\
862 }\
863 }\
864 } while(0)
865
866 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
867 int level = 0;\
868 OnigStackType* k = stk;\
869 while (1) {\
870 k--;\
871 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
872 if (k->type == STK_NULL_CHECK_START) {\
873 if (k->u.null_check.num == (id)) {\
874 if (level == 0) {\
875 (isnull) = (k->u.null_check.pstr == (s));\
876 break;\
877 }\
878 else level--;\
879 }\
880 }\
881 else if (k->type == STK_NULL_CHECK_END) {\
882 level++;\
883 }\
884 }\
885 } while(0)
886
887 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
888 OnigStackType* k = stk;\
889 while (1) {\
890 k--;\
891 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
892 if (k->type == STK_NULL_CHECK_START) {\
893 if (k->u.null_check.num == (id)) {\
894 if (k->u.null_check.pstr != (s)) {\
895 (isnull) = 0;\
896 break;\
897 }\
898 else {\
899 UChar* endp;\
900 (isnull) = 1;\
901 while (k < stk) {\
902 if (k->type == STK_MEM_START) {\
903 if (k->u.mem.end == INVALID_STACK_INDEX) {\
904 (isnull) = 0; break;\
905 }\
906 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
907 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
908 else\
909 endp = (UChar* )k->u.mem.end;\
910 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
911 (isnull) = 0; break;\
912 }\
913 else if (endp != s) {\
914 (isnull) = -1; /* empty, but position changed */ \
915 }\
916 }\
917 k++;\
918 }\
919 break;\
920 }\
921 }\
922 }\
923 }\
924 } while(0)
925
926 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
927 int level = 0;\
928 OnigStackType* k = stk;\
929 while (1) {\
930 k--;\
931 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
932 if (k->type == STK_NULL_CHECK_START) {\
933 if (k->u.null_check.num == (id)) {\
934 if (level == 0) {\
935 if (k->u.null_check.pstr != (s)) {\
936 (isnull) = 0;\
937 break;\
938 }\
939 else {\
940 UChar* endp;\
941 (isnull) = 1;\
942 while (k < stk) {\
943 if (k->type == STK_MEM_START) {\
944 if (k->u.mem.end == INVALID_STACK_INDEX) {\
945 (isnull) = 0; break;\
946 }\
947 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
948 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
949 else\
950 endp = (UChar* )k->u.mem.end;\
951 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
952 (isnull) = 0; break;\
953 }\
954 else if (endp != s) {\
955 (isnull) = -1; /* empty, but position changed */ \
956 }\
957 }\
958 k++;\
959 }\
960 break;\
961 }\
962 }\
963 else {\
964 level--;\
965 }\
966 }\
967 }\
968 else if (k->type == STK_NULL_CHECK_END) {\
969 if (k->u.null_check.num == (id)) level++;\
970 }\
971 }\
972 } while(0)
973
974 #define STACK_GET_REPEAT(id, k) do {\
975 int level = 0;\
976 k = stk;\
977 while (1) {\
978 k--;\
979 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
980 if (k->type == STK_REPEAT) {\
981 if (level == 0) {\
982 if (k->u.repeat.num == (id)) {\
983 break;\
984 }\
985 }\
986 }\
987 else if (k->type == STK_CALL_FRAME) level--;\
988 else if (k->type == STK_RETURN) level++;\
989 }\
990 } while(0)
991
992 #define STACK_RETURN(addr) do {\
993 int level = 0;\
994 OnigStackType* k = stk;\
995 while (1) {\
996 k--;\
997 STACK_BASE_CHECK(k, "STACK_RETURN"); \
998 if (k->type == STK_CALL_FRAME) {\
999 if (level == 0) {\
1000 (addr) = k->u.call_frame.ret_addr;\
1001 break;\
1002 }\
1003 else level--;\
1004 }\
1005 else if (k->type == STK_RETURN)\
1006 level++;\
1007 }\
1008 } while(0)
1009
1010
1011 #define STRING_CMP(s1,s2,len) do {\
1012 while (len-- > 0) {\
1013 if (*s1++ != *s2++) goto fail;\
1014 }\
1015 } while(0)
1016
1017 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
1018 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1019 goto fail; \
1020 } while(0)
1021
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)1022 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1023 UChar* s1, UChar** ps2, int mblen)
1024 {
1025 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1026 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1027 UChar *p1, *p2, *end1, *s2, *end2;
1028 int len1, len2;
1029
1030 s2 = *ps2;
1031 end1 = s1 + mblen;
1032 end2 = s2 + mblen;
1033 while (s1 < end1) {
1034 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
1035 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
1036 if (len1 != len2) return 0;
1037 p1 = buf1;
1038 p2 = buf2;
1039 while (len1-- > 0) {
1040 if (*p1 != *p2) return 0;
1041 p1++;
1042 p2++;
1043 }
1044 }
1045
1046 *ps2 = s2;
1047 return 1;
1048 }
1049
1050 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1051 is_fail = 0;\
1052 while (len-- > 0) {\
1053 if (*s1++ != *s2++) {\
1054 is_fail = 1; break;\
1055 }\
1056 }\
1057 } while(0)
1058
1059 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1060 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1061 is_fail = 1; \
1062 else \
1063 is_fail = 0; \
1064 } while(0)
1065
1066
1067 #define IS_EMPTY_STR (str == end)
1068 #define ON_STR_BEGIN(s) ((s) == str)
1069 #define ON_STR_END(s) ((s) == end)
1070 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1071 #define DATA_ENSURE_CHECK1 (s < right_range)
1072 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1073 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1074 #else
1075 #define DATA_ENSURE_CHECK1 (s < end)
1076 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1077 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1078 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1079
1080
1081 #ifdef USE_CAPTURE_HISTORY
1082 static int
make_capture_history_tree(OnigCaptureTreeNode * node,OnigStackType ** kp,OnigStackType * stk_top,UChar * str,regex_t * reg)1083 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1084 OnigStackType* stk_top, UChar* str, regex_t* reg)
1085 {
1086 int n, r;
1087 OnigCaptureTreeNode* child;
1088 OnigStackType* k = *kp;
1089
1090 while (k < stk_top) {
1091 if (k->type == STK_MEM_START) {
1092 n = k->u.mem.num;
1093 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1094 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1095 child = history_node_new();
1096 CHECK_NULL_RETURN_MEMERR(child);
1097 child->group = n;
1098 child->beg = (int )(k->u.mem.pstr - str);
1099 r = history_tree_add_child(node, child);
1100 if (r != 0) return r;
1101 *kp = (k + 1);
1102 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1103 if (r != 0) return r;
1104
1105 k = *kp;
1106 child->end = (int )(k->u.mem.pstr - str);
1107 }
1108 }
1109 else if (k->type == STK_MEM_END) {
1110 if (k->u.mem.num == node->group) {
1111 node->end = (int )(k->u.mem.pstr - str);
1112 *kp = k;
1113 return 0;
1114 }
1115 }
1116 k++;
1117 }
1118
1119 return 1; /* 1: root node ending. */
1120 }
1121 #endif
1122
1123 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,UChar * memp)1124 static int mem_is_in_memp(int mem, int num, UChar* memp)
1125 {
1126 int i;
1127 MemNumType m;
1128
1129 for (i = 0; i < num; i++) {
1130 GET_MEMNUM_INC(m, memp);
1131 if (mem == (int )m) return 1;
1132 }
1133 return 0;
1134 }
1135
backref_match_at_nested_level(regex_t * reg,OnigStackType * top,OnigStackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,UChar * memp,UChar ** s,const UChar * send)1136 static int backref_match_at_nested_level(regex_t* reg
1137 , OnigStackType* top, OnigStackType* stk_base
1138 , int ignore_case, int case_fold_flag
1139 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1140 {
1141 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1142 int level;
1143 OnigStackType* k;
1144
1145 level = 0;
1146 k = top;
1147 k--;
1148 while (k >= stk_base) {
1149 if (k->type == STK_CALL_FRAME) {
1150 level--;
1151 }
1152 else if (k->type == STK_RETURN) {
1153 level++;
1154 }
1155 else if (level == nest) {
1156 if (k->type == STK_MEM_START) {
1157 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1158 pstart = k->u.mem.pstr;
1159 if (pend != NULL_UCHARP) {
1160 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1161 p = pstart;
1162 ss = *s;
1163
1164 if (ignore_case != 0) {
1165 if (string_cmp_ic(reg->enc, case_fold_flag,
1166 pstart, &ss, (int )(pend - pstart)) == 0)
1167 return 0; /* or goto next_mem; */
1168 }
1169 else {
1170 while (p < pend) {
1171 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1172 }
1173 }
1174
1175 *s = ss;
1176 return 1;
1177 }
1178 }
1179 }
1180 else if (k->type == STK_MEM_END) {
1181 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1182 pend = k->u.mem.pstr;
1183 }
1184 }
1185 }
1186 k--;
1187 }
1188
1189 return 0;
1190 }
1191 #endif /* USE_BACKREF_WITH_LEVEL */
1192
1193
1194 #ifdef ONIG_DEBUG_STATISTICS
1195
1196 #define USE_TIMEOFDAY
1197
1198 #ifdef USE_TIMEOFDAY
1199 #ifdef HAVE_SYS_TIME_H
1200 #include <sys/time.h>
1201 #endif
1202 #ifdef HAVE_UNISTD_H
1203 #include <unistd.h>
1204 #endif
1205 static struct timeval ts, te;
1206 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1207 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1208 (((te).tv_sec - (ts).tv_sec)*1000000))
1209 #else
1210 #ifdef HAVE_SYS_TIMES_H
1211 #include <sys/times.h>
1212 #endif
1213 static struct tms ts, te;
1214 #define GETTIME(t) times(&(t))
1215 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1216 #endif
1217
1218 static int OpCounter[256];
1219 static int OpPrevCounter[256];
1220 static unsigned long OpTime[256];
1221 static int OpCurr = OP_FINISH;
1222 static int OpPrevTarget = OP_FAIL;
1223 static int MaxStackDepth = 0;
1224
1225 #define MOP_IN(opcode) do {\
1226 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1227 OpCurr = opcode;\
1228 OpCounter[opcode]++;\
1229 GETTIME(ts);\
1230 } while(0)
1231
1232 #define MOP_OUT do {\
1233 GETTIME(te);\
1234 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1235 } while(0)
1236
1237 extern void
onig_statistics_init(void)1238 onig_statistics_init(void)
1239 {
1240 int i;
1241 for (i = 0; i < 256; i++) {
1242 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1243 }
1244 MaxStackDepth = 0;
1245 }
1246
1247 extern int
onig_print_statistics(FILE * f)1248 onig_print_statistics(FILE* f)
1249 {
1250 int r;
1251 int i;
1252
1253 r = fprintf(f, " count prev time\n");
1254 if (r < 0) return -1;
1255
1256 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1257 r = fprintf(f, "%8d: %8d: %10ld: %s\n",
1258 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1259 if (r < 0) return -1;
1260 }
1261 r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1262 if (r < 0) return -1;
1263
1264 return 0;
1265 }
1266
1267 #define STACK_INC do {\
1268 stk++;\
1269 if (stk - stk_base > MaxStackDepth) \
1270 MaxStackDepth = stk - stk_base;\
1271 } while(0)
1272
1273 #else
1274 #define STACK_INC stk++
1275
1276 #define MOP_IN(opcode)
1277 #define MOP_OUT
1278 #endif
1279
1280
1281 /* matching region of POSIX API */
1282 typedef int regoff_t;
1283
1284 typedef struct {
1285 regoff_t rm_so;
1286 regoff_t rm_eo;
1287 } posix_regmatch_t;
1288
1289 /* match data(str - end) from position (sstart). */
1290 /* if sstart == str then set sprev to NULL. */
1291 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * right_range,const UChar * sstart,UChar * sprev,OnigMatchArg * msa)1292 match_at(regex_t* reg, const UChar* str, const UChar* end,
1293 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1294 const UChar* right_range,
1295 #endif
1296 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1297 {
1298 static UChar FinishCode[] = { OP_FINISH };
1299
1300 int i, n, num_mem, best_len, pop_level;
1301 LengthType tlen, tlen2;
1302 MemNumType mem;
1303 RelAddrType addr;
1304 UChar *s, *q, *sbegin;
1305 int is_alloca;
1306 char *alloc_base;
1307 OnigStackType *stk_base, *stk, *stk_end;
1308 OnigStackType *stkp; /* used as any purpose. */
1309 OnigStackIndex si;
1310 OnigStackIndex *repeat_stk;
1311 OnigStackIndex *mem_start_stk, *mem_end_stk;
1312 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1313 int scv;
1314 unsigned char* state_check_buff = msa->state_check_buff;
1315 int num_comb_exp_check = reg->num_comb_exp_check;
1316 #endif
1317 UChar *p = reg->p;
1318 OnigOptionType option = reg->options;
1319 OnigEncoding encode = reg->enc;
1320 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1321
1322 //n = reg->num_repeat + reg->num_mem * 2;
1323 pop_level = reg->stack_pop_level;
1324 num_mem = reg->num_mem;
1325 STACK_INIT(INIT_MATCH_STACK_SIZE);
1326 UPDATE_FOR_STACK_REALLOC;
1327 for (i = 1; i <= num_mem; i++) {
1328 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1329 }
1330
1331 #ifdef ONIG_DEBUG_MATCH
1332 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1333 (int )str, (int )end, (int )sstart, (int )sprev);
1334 fprintf(stderr, "size: %d, start offset: %d\n",
1335 (int )(end - str), (int )(sstart - str));
1336 #endif
1337
1338 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1339 best_len = ONIG_MISMATCH;
1340 s = (UChar* )sstart;
1341 while (1) {
1342 #ifdef ONIG_DEBUG_MATCH
1343 {
1344 UChar *q, *bp, buf[50];
1345 int len;
1346 fprintf(stderr, "%4d> \"", (int )(s - str));
1347 bp = buf;
1348 for (i = 0, q = s; i < 7 && q < end; i++) {
1349 len = enclen(encode, q);
1350 while (len-- > 0) *bp++ = *q++;
1351 }
1352 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1353 else { xmemcpy(bp, "\"", 1); bp += 1; }
1354 *bp = 0;
1355 fputs((char* )buf, stderr);
1356 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1357 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1358 fprintf(stderr, "\n");
1359 }
1360 #endif
1361
1362 sbegin = s;
1363 switch (*p++) {
1364 case OP_END: MOP_IN(OP_END);
1365 n = s - sstart;
1366 if (n > best_len) {
1367 OnigRegion* region;
1368 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1369 if (IS_FIND_LONGEST(option)) {
1370 if (n > msa->best_len) {
1371 msa->best_len = n;
1372 msa->best_s = (UChar* )sstart;
1373 }
1374 else
1375 goto end_best_len;
1376 }
1377 #endif
1378 best_len = n;
1379 region = msa->region;
1380 if (region) {
1381 #ifdef USE_POSIX_API_REGION_OPTION
1382 if (IS_POSIX_REGION(msa->options)) {
1383 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1384
1385 rmt[0].rm_so = sstart - str;
1386 rmt[0].rm_eo = s - str;
1387 for (i = 1; i <= num_mem; i++) {
1388 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1389 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1390 rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1391 else
1392 rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1393
1394 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1395 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1396 : (UChar* )((void* )mem_end_stk[i])) - str;
1397 }
1398 else {
1399 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1400 }
1401 }
1402 }
1403 else {
1404 #endif /* USE_POSIX_API_REGION_OPTION */
1405 region->beg[0] = sstart - str;
1406 region->end[0] = s - str;
1407 for (i = 1; i <= num_mem; i++) {
1408 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1409 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1410 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1411 else
1412 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1413
1414 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1415 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1416 : (UChar* )((void* )mem_end_stk[i])) - str;
1417 }
1418 else {
1419 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1420 }
1421 }
1422
1423 #ifdef USE_CAPTURE_HISTORY
1424 if (reg->capture_history != 0) {
1425 int r;
1426 OnigCaptureTreeNode* node;
1427
1428 if (IS_NULL(region->history_root)) {
1429 region->history_root = node = history_node_new();
1430 CHECK_NULL_RETURN_MEMERR(node);
1431 }
1432 else {
1433 node = region->history_root;
1434 history_tree_clear(node);
1435 }
1436
1437 node->group = 0;
1438 node->beg = sstart - str;
1439 node->end = s - str;
1440
1441 stkp = stk_base;
1442 r = make_capture_history_tree(region->history_root, &stkp,
1443 stk, (UChar* )str, reg);
1444 if (r < 0) {
1445 best_len = r; /* error code */
1446 goto finish;
1447 }
1448 }
1449 #endif /* USE_CAPTURE_HISTORY */
1450 #ifdef USE_POSIX_API_REGION_OPTION
1451 } /* else IS_POSIX_REGION() */
1452 #endif
1453 } /* if (region) */
1454 } /* n > best_len */
1455
1456 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1457 end_best_len:
1458 #endif
1459 MOP_OUT;
1460
1461 if (IS_FIND_CONDITION(option)) {
1462 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1463 best_len = ONIG_MISMATCH;
1464 goto fail; /* for retry */
1465 }
1466 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1467 goto fail; /* for retry */
1468 }
1469 }
1470
1471 /* default behavior: return first-matching result. */
1472 goto finish;
1473 break;
1474
1475 case OP_EXACT1: MOP_IN(OP_EXACT1);
1476 DATA_ENSURE(1);
1477 if (*p != *s) goto fail;
1478 p++; s++;
1479 MOP_OUT;
1480 break;
1481
1482 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
1483 {
1484 int len;
1485 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1486
1487 DATA_ENSURE(1);
1488 len = ONIGENC_MBC_CASE_FOLD(encode,
1489 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1490 case_fold_flag,
1491 &s, end, lowbuf);
1492 DATA_ENSURE(0);
1493 q = lowbuf;
1494 while (len-- > 0) {
1495 if (*p != *q) {
1496 goto fail;
1497 }
1498 p++; q++;
1499 }
1500 }
1501 MOP_OUT;
1502 break;
1503
1504 case OP_EXACT2: MOP_IN(OP_EXACT2);
1505 DATA_ENSURE(2);
1506 if (*p != *s) goto fail;
1507 p++; s++;
1508 if (*p != *s) goto fail;
1509 sprev = s;
1510 p++; s++;
1511 MOP_OUT;
1512 continue;
1513 break;
1514
1515 case OP_EXACT3: MOP_IN(OP_EXACT3);
1516 DATA_ENSURE(3);
1517 if (*p != *s) goto fail;
1518 p++; s++;
1519 if (*p != *s) goto fail;
1520 p++; s++;
1521 if (*p != *s) goto fail;
1522 sprev = s;
1523 p++; s++;
1524 MOP_OUT;
1525 continue;
1526 break;
1527
1528 case OP_EXACT4: MOP_IN(OP_EXACT4);
1529 DATA_ENSURE(4);
1530 if (*p != *s) goto fail;
1531 p++; s++;
1532 if (*p != *s) goto fail;
1533 p++; s++;
1534 if (*p != *s) goto fail;
1535 p++; s++;
1536 if (*p != *s) goto fail;
1537 sprev = s;
1538 p++; s++;
1539 MOP_OUT;
1540 continue;
1541 break;
1542
1543 case OP_EXACT5: MOP_IN(OP_EXACT5);
1544 DATA_ENSURE(5);
1545 if (*p != *s) goto fail;
1546 p++; s++;
1547 if (*p != *s) goto fail;
1548 p++; s++;
1549 if (*p != *s) goto fail;
1550 p++; s++;
1551 if (*p != *s) goto fail;
1552 p++; s++;
1553 if (*p != *s) goto fail;
1554 sprev = s;
1555 p++; s++;
1556 MOP_OUT;
1557 continue;
1558 break;
1559
1560 case OP_EXACTN: MOP_IN(OP_EXACTN);
1561 GET_LENGTH_INC(tlen, p);
1562 DATA_ENSURE(tlen);
1563 while (tlen-- > 0) {
1564 if (*p++ != *s++) goto fail;
1565 }
1566 sprev = s - 1;
1567 MOP_OUT;
1568 continue;
1569 break;
1570
1571 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
1572 {
1573 int len;
1574 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1575
1576 GET_LENGTH_INC(tlen, p);
1577 endp = p + tlen;
1578
1579 while (p < endp) {
1580 sprev = s;
1581 DATA_ENSURE(1);
1582 len = ONIGENC_MBC_CASE_FOLD(encode,
1583 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1584 case_fold_flag,
1585 &s, end, lowbuf);
1586 DATA_ENSURE(0);
1587 q = lowbuf;
1588 while (len-- > 0) {
1589 if (*p != *q) goto fail;
1590 p++; q++;
1591 }
1592 }
1593 }
1594
1595 MOP_OUT;
1596 continue;
1597 break;
1598
1599 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
1600 DATA_ENSURE(2);
1601 if (*p != *s) goto fail;
1602 p++; s++;
1603 if (*p != *s) goto fail;
1604 p++; s++;
1605 MOP_OUT;
1606 break;
1607
1608 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
1609 DATA_ENSURE(4);
1610 if (*p != *s) goto fail;
1611 p++; s++;
1612 if (*p != *s) goto fail;
1613 p++; s++;
1614 sprev = s;
1615 if (*p != *s) goto fail;
1616 p++; s++;
1617 if (*p != *s) goto fail;
1618 p++; s++;
1619 MOP_OUT;
1620 continue;
1621 break;
1622
1623 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
1624 DATA_ENSURE(6);
1625 if (*p != *s) goto fail;
1626 p++; s++;
1627 if (*p != *s) goto fail;
1628 p++; s++;
1629 if (*p != *s) goto fail;
1630 p++; s++;
1631 if (*p != *s) goto fail;
1632 p++; s++;
1633 sprev = s;
1634 if (*p != *s) goto fail;
1635 p++; s++;
1636 if (*p != *s) goto fail;
1637 p++; s++;
1638 MOP_OUT;
1639 continue;
1640 break;
1641
1642 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
1643 GET_LENGTH_INC(tlen, p);
1644 DATA_ENSURE(tlen * 2);
1645 while (tlen-- > 0) {
1646 if (*p != *s) goto fail;
1647 p++; s++;
1648 if (*p != *s) goto fail;
1649 p++; s++;
1650 }
1651 sprev = s - 2;
1652 MOP_OUT;
1653 continue;
1654 break;
1655
1656 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
1657 GET_LENGTH_INC(tlen, p);
1658 DATA_ENSURE(tlen * 3);
1659 while (tlen-- > 0) {
1660 if (*p != *s) goto fail;
1661 p++; s++;
1662 if (*p != *s) goto fail;
1663 p++; s++;
1664 if (*p != *s) goto fail;
1665 p++; s++;
1666 }
1667 sprev = s - 3;
1668 MOP_OUT;
1669 continue;
1670 break;
1671
1672 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
1673 GET_LENGTH_INC(tlen, p); /* mb-len */
1674 GET_LENGTH_INC(tlen2, p); /* string len */
1675 tlen2 *= tlen;
1676 DATA_ENSURE(tlen2);
1677 while (tlen2-- > 0) {
1678 if (*p != *s) goto fail;
1679 p++; s++;
1680 }
1681 sprev = s - tlen;
1682 MOP_OUT;
1683 continue;
1684 break;
1685
1686 case OP_CCLASS: MOP_IN(OP_CCLASS);
1687 DATA_ENSURE(1);
1688 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1689 p += SIZE_BITSET;
1690 s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
1691 MOP_OUT;
1692 break;
1693
1694 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
1695 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1696
1697 cclass_mb:
1698 GET_LENGTH_INC(tlen, p);
1699 {
1700 OnigCodePoint code;
1701 UChar *ss;
1702 int mb_len;
1703
1704 DATA_ENSURE(1);
1705 mb_len = enclen(encode, s);
1706 DATA_ENSURE(mb_len);
1707 ss = s;
1708 s += mb_len;
1709 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1710
1711 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1712 if (! onig_is_in_code_range(p, code)) goto fail;
1713 #else
1714 q = p;
1715 ALIGNMENT_RIGHT(q);
1716 if (! onig_is_in_code_range(q, code)) goto fail;
1717 #endif
1718 }
1719 p += tlen;
1720 MOP_OUT;
1721 break;
1722
1723 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
1724 DATA_ENSURE(1);
1725 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1726 p += SIZE_BITSET;
1727 goto cclass_mb;
1728 }
1729 else {
1730 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1731 goto fail;
1732
1733 p += SIZE_BITSET;
1734 GET_LENGTH_INC(tlen, p);
1735 p += tlen;
1736 s++;
1737 }
1738 MOP_OUT;
1739 break;
1740
1741 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
1742 DATA_ENSURE(1);
1743 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1744 p += SIZE_BITSET;
1745 s += enclen(encode, s);
1746 MOP_OUT;
1747 break;
1748
1749 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
1750 DATA_ENSURE(1);
1751 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1752 s++;
1753 GET_LENGTH_INC(tlen, p);
1754 p += tlen;
1755 goto cc_mb_not_success;
1756 }
1757
1758 cclass_mb_not:
1759 GET_LENGTH_INC(tlen, p);
1760 {
1761 OnigCodePoint code;
1762 UChar *ss;
1763 int mb_len = enclen(encode, s);
1764
1765 if (! DATA_ENSURE_CHECK(mb_len)) {
1766 DATA_ENSURE(1);
1767 s = (UChar* )end;
1768 p += tlen;
1769 goto cc_mb_not_success;
1770 }
1771
1772 ss = s;
1773 s += mb_len;
1774 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1775
1776 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1777 if (onig_is_in_code_range(p, code)) goto fail;
1778 #else
1779 q = p;
1780 ALIGNMENT_RIGHT(q);
1781 if (onig_is_in_code_range(q, code)) goto fail;
1782 #endif
1783 }
1784 p += tlen;
1785
1786 cc_mb_not_success:
1787 MOP_OUT;
1788 break;
1789
1790 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
1791 DATA_ENSURE(1);
1792 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1793 p += SIZE_BITSET;
1794 goto cclass_mb_not;
1795 }
1796 else {
1797 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1798 goto fail;
1799
1800 p += SIZE_BITSET;
1801 GET_LENGTH_INC(tlen, p);
1802 p += tlen;
1803 s++;
1804 }
1805 MOP_OUT;
1806 break;
1807
1808 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
1809 {
1810 OnigCodePoint code;
1811 void *node;
1812 int mb_len;
1813 UChar *ss;
1814
1815 DATA_ENSURE(1);
1816 GET_POINTER_INC(node, p);
1817 mb_len = enclen(encode, s);
1818 ss = s;
1819 s += mb_len;
1820 DATA_ENSURE(0);
1821 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1822 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1823 }
1824 MOP_OUT;
1825 break;
1826
1827 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
1828 DATA_ENSURE(1);
1829 n = enclen(encode, s);
1830 DATA_ENSURE(n);
1831 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1832 s += n;
1833 MOP_OUT;
1834 break;
1835
1836 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
1837 DATA_ENSURE(1);
1838 n = enclen(encode, s);
1839 DATA_ENSURE(n);
1840 s += n;
1841 MOP_OUT;
1842 break;
1843
1844 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
1845 while (DATA_ENSURE_CHECK1) {
1846 STACK_PUSH_ALT(p, s, sprev);
1847 n = enclen(encode, s);
1848 DATA_ENSURE(n);
1849 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1850 sprev = s;
1851 s += n;
1852 }
1853 MOP_OUT;
1854 break;
1855
1856 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
1857 while (DATA_ENSURE_CHECK1) {
1858 STACK_PUSH_ALT(p, s, sprev);
1859 n = enclen(encode, s);
1860 if (n > 1) {
1861 DATA_ENSURE(n);
1862 sprev = s;
1863 s += n;
1864 }
1865 else {
1866 sprev = s;
1867 s++;
1868 }
1869 }
1870 MOP_OUT;
1871 break;
1872
1873 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1874 while (DATA_ENSURE_CHECK1) {
1875 if (*p == *s) {
1876 STACK_PUSH_ALT(p + 1, s, sprev);
1877 }
1878 n = enclen(encode, s);
1879 DATA_ENSURE(n);
1880 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1881 sprev = s;
1882 s += n;
1883 }
1884 p++;
1885 MOP_OUT;
1886 break;
1887
1888 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1889 while (DATA_ENSURE_CHECK1) {
1890 if (*p == *s) {
1891 STACK_PUSH_ALT(p + 1, s, sprev);
1892 }
1893 n = enclen(encode, s);
1894 if (n > 1) {
1895 DATA_ENSURE(n);
1896 sprev = s;
1897 s += n;
1898 }
1899 else {
1900 sprev = s;
1901 s++;
1902 }
1903 }
1904 p++;
1905 MOP_OUT;
1906 break;
1907
1908 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1909 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1910 GET_STATE_CHECK_NUM_INC(mem, p);
1911 while (DATA_ENSURE_CHECK1) {
1912 STATE_CHECK_VAL(scv, mem);
1913 if (scv) goto fail;
1914
1915 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1916 n = enclen(encode, s);
1917 DATA_ENSURE(n);
1918 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1919 sprev = s;
1920 s += n;
1921 }
1922 MOP_OUT;
1923 break;
1924
1925 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1926 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1927
1928 GET_STATE_CHECK_NUM_INC(mem, p);
1929 while (DATA_ENSURE_CHECK1) {
1930 STATE_CHECK_VAL(scv, mem);
1931 if (scv) goto fail;
1932
1933 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1934 n = enclen(encode, s);
1935 if (n > 1) {
1936 DATA_ENSURE(n);
1937 sprev = s;
1938 s += n;
1939 }
1940 else {
1941 sprev = s;
1942 s++;
1943 }
1944 }
1945 MOP_OUT;
1946 break;
1947 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1948
1949 case OP_WORD: MOP_IN(OP_WORD);
1950 DATA_ENSURE(1);
1951 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1952 goto fail;
1953
1954 s += enclen(encode, s);
1955 MOP_OUT;
1956 break;
1957
1958 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
1959 DATA_ENSURE(1);
1960 if (ONIGENC_IS_MBC_WORD(encode, s, end))
1961 goto fail;
1962
1963 s += enclen(encode, s);
1964 MOP_OUT;
1965 break;
1966
1967 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
1968 if (ON_STR_BEGIN(s)) {
1969 DATA_ENSURE(1);
1970 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1971 goto fail;
1972 }
1973 else if (ON_STR_END(s)) {
1974 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1975 goto fail;
1976 }
1977 else {
1978 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1979 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1980 goto fail;
1981 }
1982 MOP_OUT;
1983 continue;
1984 break;
1985
1986 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
1987 if (ON_STR_BEGIN(s)) {
1988 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1989 goto fail;
1990 }
1991 else if (ON_STR_END(s)) {
1992 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1993 goto fail;
1994 }
1995 else {
1996 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1997 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1998 goto fail;
1999 }
2000 MOP_OUT;
2001 continue;
2002 break;
2003
2004 #ifdef USE_WORD_BEGIN_END
2005 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
2006 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2007 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2008 MOP_OUT;
2009 continue;
2010 }
2011 }
2012 goto fail;
2013 break;
2014
2015 case OP_WORD_END: MOP_IN(OP_WORD_END);
2016 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2017 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2018 MOP_OUT;
2019 continue;
2020 }
2021 }
2022 goto fail;
2023 break;
2024 #endif
2025
2026 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
2027 if (! ON_STR_BEGIN(s)) goto fail;
2028
2029 MOP_OUT;
2030 continue;
2031 break;
2032
2033 case OP_END_BUF: MOP_IN(OP_END_BUF);
2034 if (! ON_STR_END(s)) goto fail;
2035
2036 MOP_OUT;
2037 continue;
2038 break;
2039
2040 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
2041 if (ON_STR_BEGIN(s)) {
2042 if (IS_NOTBOL(msa->options)) goto fail;
2043 MOP_OUT;
2044 continue;
2045 }
2046 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2047 MOP_OUT;
2048 continue;
2049 }
2050 goto fail;
2051 break;
2052
2053 case OP_END_LINE: MOP_IN(OP_END_LINE);
2054 if (ON_STR_END(s)) {
2055 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2056 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2057 #endif
2058 if (IS_NOTEOL(msa->options)) goto fail;
2059 MOP_OUT;
2060 continue;
2061 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2062 }
2063 #endif
2064 }
2065 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2066 MOP_OUT;
2067 continue;
2068 }
2069 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2070 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2071 MOP_OUT;
2072 continue;
2073 }
2074 #endif
2075 goto fail;
2076 break;
2077
2078 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
2079 if (ON_STR_END(s)) {
2080 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2081 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2082 #endif
2083 if (IS_NOTEOL(msa->options)) goto fail;
2084 MOP_OUT;
2085 continue;
2086 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2087 }
2088 #endif
2089 }
2090 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2091 ON_STR_END(s + enclen(encode, s))) {
2092 MOP_OUT;
2093 continue;
2094 }
2095 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2096 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2097 UChar* ss = s + enclen(encode, s);
2098 ss += enclen(encode, ss);
2099 if (ON_STR_END(ss)) {
2100 MOP_OUT;
2101 continue;
2102 }
2103 }
2104 #endif
2105 goto fail;
2106 break;
2107
2108 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
2109 if (s != msa->start)
2110 goto fail;
2111
2112 MOP_OUT;
2113 continue;
2114 break;
2115
2116 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
2117 GET_MEMNUM_INC(mem, p);
2118 STACK_PUSH_MEM_START(mem, s);
2119 MOP_OUT;
2120 continue;
2121 break;
2122
2123 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
2124 GET_MEMNUM_INC(mem, p);
2125 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2126 MOP_OUT;
2127 continue;
2128 break;
2129
2130 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
2131 GET_MEMNUM_INC(mem, p);
2132 STACK_PUSH_MEM_END(mem, s);
2133 MOP_OUT;
2134 continue;
2135 break;
2136
2137 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
2138 GET_MEMNUM_INC(mem, p);
2139 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2140 MOP_OUT;
2141 continue;
2142 break;
2143
2144 #ifdef USE_SUBEXP_CALL
2145 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
2146 GET_MEMNUM_INC(mem, p);
2147 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2148 STACK_PUSH_MEM_END(mem, s);
2149 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2150 MOP_OUT;
2151 continue;
2152 break;
2153
2154 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
2155 GET_MEMNUM_INC(mem, p);
2156 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2157 STACK_GET_MEM_START(mem, stkp);
2158
2159 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2160 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2161 else
2162 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2163
2164 STACK_PUSH_MEM_END_MARK(mem);
2165 MOP_OUT;
2166 continue;
2167 break;
2168 #endif
2169
2170 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
2171 mem = 1;
2172 goto backref;
2173 break;
2174
2175 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
2176 mem = 2;
2177 goto backref;
2178 break;
2179
2180 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
2181 GET_MEMNUM_INC(mem, p);
2182 backref:
2183 {
2184 int len;
2185 UChar *pstart, *pend;
2186
2187 /* if you want to remove following line,
2188 you should check in parse and compile time. */
2189 if (mem > num_mem) goto fail;
2190 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2191 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2192
2193 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2194 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2195 else
2196 pstart = (UChar* )((void* )mem_start_stk[mem]);
2197
2198 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2199 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2200 : (UChar* )((void* )mem_end_stk[mem]));
2201 n = pend - pstart;
2202 DATA_ENSURE(n);
2203 sprev = s;
2204 STRING_CMP(pstart, s, n);
2205 while (sprev + (len = enclen(encode, sprev)) < s)
2206 sprev += len;
2207
2208 MOP_OUT;
2209 continue;
2210 }
2211 break;
2212
2213 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
2214 GET_MEMNUM_INC(mem, p);
2215 {
2216 int len;
2217 UChar *pstart, *pend;
2218
2219 /* if you want to remove following line,
2220 you should check in parse and compile time. */
2221 if (mem > num_mem) goto fail;
2222 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2223 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2224
2225 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2226 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2227 else
2228 pstart = (UChar* )((void* )mem_start_stk[mem]);
2229
2230 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2231 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2232 : (UChar* )((void* )mem_end_stk[mem]));
2233 n = pend - pstart;
2234 DATA_ENSURE(n);
2235 sprev = s;
2236 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2237 while (sprev + (len = enclen(encode, sprev)) < s)
2238 sprev += len;
2239
2240 MOP_OUT;
2241 continue;
2242 }
2243 break;
2244
2245 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
2246 {
2247 int len, is_fail;
2248 UChar *pstart, *pend, *swork;
2249
2250 GET_LENGTH_INC(tlen, p);
2251 for (i = 0; i < tlen; i++) {
2252 GET_MEMNUM_INC(mem, p);
2253
2254 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2255 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2256
2257 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2258 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2259 else
2260 pstart = (UChar* )((void* )mem_start_stk[mem]);
2261
2262 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2263 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2264 : (UChar* )((void* )mem_end_stk[mem]));
2265 n = pend - pstart;
2266 DATA_ENSURE(n);
2267 sprev = s;
2268 swork = s;
2269 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2270 if (is_fail) continue;
2271 s = swork;
2272 while (sprev + (len = enclen(encode, sprev)) < s)
2273 sprev += len;
2274
2275 p += (SIZE_MEMNUM * (tlen - i - 1));
2276 break; /* success */
2277 }
2278 if (i == tlen) goto fail;
2279 MOP_OUT;
2280 continue;
2281 }
2282 break;
2283
2284 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
2285 {
2286 int len, is_fail;
2287 UChar *pstart, *pend, *swork;
2288
2289 GET_LENGTH_INC(tlen, p);
2290 for (i = 0; i < tlen; i++) {
2291 GET_MEMNUM_INC(mem, p);
2292
2293 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2294 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2295
2296 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2297 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2298 else
2299 pstart = (UChar* )((void* )mem_start_stk[mem]);
2300
2301 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2302 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2303 : (UChar* )((void* )mem_end_stk[mem]));
2304 n = pend - pstart;
2305 DATA_ENSURE(n);
2306 sprev = s;
2307 swork = s;
2308 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2309 if (is_fail) continue;
2310 s = swork;
2311 while (sprev + (len = enclen(encode, sprev)) < s)
2312 sprev += len;
2313
2314 p += (SIZE_MEMNUM * (tlen - i - 1));
2315 break; /* success */
2316 }
2317 if (i == tlen) goto fail;
2318 MOP_OUT;
2319 continue;
2320 }
2321 break;
2322
2323 #ifdef USE_BACKREF_WITH_LEVEL
2324 case OP_BACKREF_WITH_LEVEL:
2325 {
2326 int len;
2327 OnigOptionType ic;
2328 LengthType level;
2329
2330 GET_OPTION_INC(ic, p);
2331 GET_LENGTH_INC(level, p);
2332 GET_LENGTH_INC(tlen, p);
2333
2334 sprev = s;
2335 if (backref_match_at_nested_level(reg, stk, stk_base, ic
2336 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2337 while (sprev + (len = enclen(encode, sprev)) < s)
2338 sprev += len;
2339
2340 p += (SIZE_MEMNUM * tlen);
2341 }
2342 else
2343 goto fail;
2344
2345 MOP_OUT;
2346 continue;
2347 }
2348 break;
2349 #endif
2350
2351 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2352 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
2353 GET_OPTION_INC(option, p);
2354 STACK_PUSH_ALT(p, s, sprev);
2355 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2356 MOP_OUT;
2357 continue;
2358 break;
2359
2360 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
2361 GET_OPTION_INC(option, p);
2362 MOP_OUT;
2363 continue;
2364 break;
2365 #endif
2366
2367 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
2368 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2369 STACK_PUSH_NULL_CHECK_START(mem, s);
2370 MOP_OUT;
2371 continue;
2372 break;
2373
2374 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
2375 {
2376 int isnull;
2377
2378 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2379 STACK_NULL_CHECK(isnull, mem, s);
2380 if (isnull) {
2381 #ifdef ONIG_DEBUG_MATCH
2382 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2383 (int )mem, (int )s);
2384 #endif
2385 null_check_found:
2386 /* empty loop founded, skip next instruction */
2387 switch (*p++) {
2388 case OP_JUMP:
2389 case OP_PUSH:
2390 p += SIZE_RELADDR;
2391 break;
2392 case OP_REPEAT_INC:
2393 case OP_REPEAT_INC_NG:
2394 case OP_REPEAT_INC_SG:
2395 case OP_REPEAT_INC_NG_SG:
2396 p += SIZE_MEMNUM;
2397 break;
2398 default:
2399 goto unexpected_bytecode_error;
2400 break;
2401 }
2402 }
2403 }
2404 MOP_OUT;
2405 continue;
2406 break;
2407
2408 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2409 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
2410 {
2411 int isnull;
2412
2413 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2414 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2415 if (isnull) {
2416 #ifdef ONIG_DEBUG_MATCH
2417 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2418 (int )mem, (int )s);
2419 #endif
2420 if (isnull == -1) goto fail;
2421 goto null_check_found;
2422 }
2423 }
2424 MOP_OUT;
2425 continue;
2426 break;
2427 #endif
2428
2429 #ifdef USE_SUBEXP_CALL
2430 case OP_NULL_CHECK_END_MEMST_PUSH:
2431 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2432 {
2433 int isnull;
2434
2435 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2436 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2437 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2438 #else
2439 STACK_NULL_CHECK_REC(isnull, mem, s);
2440 #endif
2441 if (isnull) {
2442 #ifdef ONIG_DEBUG_MATCH
2443 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2444 (int )mem, (int )s);
2445 #endif
2446 if (isnull == -1) goto fail;
2447 goto null_check_found;
2448 }
2449 else {
2450 STACK_PUSH_NULL_CHECK_END(mem);
2451 }
2452 }
2453 MOP_OUT;
2454 continue;
2455 break;
2456 #endif
2457
2458 case OP_JUMP: MOP_IN(OP_JUMP);
2459 GET_RELADDR_INC(addr, p);
2460 p += addr;
2461 MOP_OUT;
2462 CHECK_INTERRUPT_IN_MATCH_AT;
2463 continue;
2464 break;
2465
2466 case OP_PUSH: MOP_IN(OP_PUSH);
2467 GET_RELADDR_INC(addr, p);
2468 STACK_PUSH_ALT(p + addr, s, sprev);
2469 MOP_OUT;
2470 continue;
2471 break;
2472
2473 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2474 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
2475 GET_STATE_CHECK_NUM_INC(mem, p);
2476 STATE_CHECK_VAL(scv, mem);
2477 if (scv) goto fail;
2478
2479 GET_RELADDR_INC(addr, p);
2480 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2481 MOP_OUT;
2482 continue;
2483 break;
2484
2485 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2486 GET_STATE_CHECK_NUM_INC(mem, p);
2487 GET_RELADDR_INC(addr, p);
2488 STATE_CHECK_VAL(scv, mem);
2489 if (scv) {
2490 p += addr;
2491 }
2492 else {
2493 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2494 }
2495 MOP_OUT;
2496 continue;
2497 break;
2498
2499 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
2500 GET_STATE_CHECK_NUM_INC(mem, p);
2501 STATE_CHECK_VAL(scv, mem);
2502 if (scv) goto fail;
2503
2504 STACK_PUSH_STATE_CHECK(s, mem);
2505 MOP_OUT;
2506 continue;
2507 break;
2508 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2509
2510 case OP_POP: MOP_IN(OP_POP);
2511 STACK_POP_ONE;
2512 MOP_OUT;
2513 continue;
2514 break;
2515
2516 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2517 GET_RELADDR_INC(addr, p);
2518 if (*p == *s && DATA_ENSURE_CHECK1) {
2519 p++;
2520 STACK_PUSH_ALT(p + addr, s, sprev);
2521 MOP_OUT;
2522 continue;
2523 }
2524 p += (addr + 1);
2525 MOP_OUT;
2526 continue;
2527 break;
2528
2529 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2530 GET_RELADDR_INC(addr, p);
2531 if (*p == *s) {
2532 p++;
2533 STACK_PUSH_ALT(p + addr, s, sprev);
2534 MOP_OUT;
2535 continue;
2536 }
2537 p++;
2538 MOP_OUT;
2539 continue;
2540 break;
2541
2542 case OP_REPEAT: MOP_IN(OP_REPEAT);
2543 {
2544 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2545 GET_RELADDR_INC(addr, p);
2546
2547 STACK_ENSURE(1);
2548 repeat_stk[mem] = GET_STACK_INDEX(stk);
2549 STACK_PUSH_REPEAT(mem, p);
2550
2551 if (reg->repeat_range[mem].lower == 0) {
2552 STACK_PUSH_ALT(p + addr, s, sprev);
2553 }
2554 }
2555 MOP_OUT;
2556 continue;
2557 break;
2558
2559 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
2560 {
2561 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2562 GET_RELADDR_INC(addr, p);
2563
2564 STACK_ENSURE(1);
2565 repeat_stk[mem] = GET_STACK_INDEX(stk);
2566 STACK_PUSH_REPEAT(mem, p);
2567
2568 if (reg->repeat_range[mem].lower == 0) {
2569 STACK_PUSH_ALT(p, s, sprev);
2570 p += addr;
2571 }
2572 }
2573 MOP_OUT;
2574 continue;
2575 break;
2576
2577 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
2578 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2579 si = repeat_stk[mem];
2580 stkp = STACK_AT(si);
2581
2582 repeat_inc:
2583 stkp->u.repeat.count++;
2584 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2585 /* end of repeat. Nothing to do. */
2586 }
2587 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2588 STACK_PUSH_ALT(p, s, sprev);
2589 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2590 }
2591 else {
2592 p = stkp->u.repeat.pcode;
2593 }
2594 STACK_PUSH_REPEAT_INC(si);
2595 MOP_OUT;
2596 CHECK_INTERRUPT_IN_MATCH_AT;
2597 continue;
2598 break;
2599
2600 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
2601 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2602 STACK_GET_REPEAT(mem, stkp);
2603 si = GET_STACK_INDEX(stkp);
2604 goto repeat_inc;
2605 break;
2606
2607 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
2608 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2609 si = repeat_stk[mem];
2610 stkp = STACK_AT(si);
2611
2612 repeat_inc_ng:
2613 stkp->u.repeat.count++;
2614 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2615 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2616 UChar* pcode = stkp->u.repeat.pcode;
2617
2618 STACK_PUSH_REPEAT_INC(si);
2619 STACK_PUSH_ALT(pcode, s, sprev);
2620 }
2621 else {
2622 p = stkp->u.repeat.pcode;
2623 STACK_PUSH_REPEAT_INC(si);
2624 }
2625 }
2626 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2627 STACK_PUSH_REPEAT_INC(si);
2628 }
2629 MOP_OUT;
2630 CHECK_INTERRUPT_IN_MATCH_AT;
2631 continue;
2632 break;
2633
2634 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
2635 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2636 STACK_GET_REPEAT(mem, stkp);
2637 si = GET_STACK_INDEX(stkp);
2638 goto repeat_inc_ng;
2639 break;
2640
2641 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
2642 STACK_PUSH_POS(s, sprev);
2643 MOP_OUT;
2644 continue;
2645 break;
2646
2647 case OP_POP_POS: MOP_IN(OP_POP_POS);
2648 {
2649 STACK_POS_END(stkp);
2650 s = stkp->u.state.pstr;
2651 sprev = stkp->u.state.pstr_prev;
2652 }
2653 MOP_OUT;
2654 continue;
2655 break;
2656
2657 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
2658 GET_RELADDR_INC(addr, p);
2659 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2660 MOP_OUT;
2661 continue;
2662 break;
2663
2664 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
2665 STACK_POP_TIL_POS_NOT;
2666 goto fail;
2667 break;
2668
2669 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
2670 STACK_PUSH_STOP_BT;
2671 MOP_OUT;
2672 continue;
2673 break;
2674
2675 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
2676 STACK_STOP_BT_END;
2677 MOP_OUT;
2678 continue;
2679 break;
2680
2681 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
2682 GET_LENGTH_INC(tlen, p);
2683 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2684 if (IS_NULL(s)) goto fail;
2685 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2686 MOP_OUT;
2687 continue;
2688 break;
2689
2690 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2691 GET_RELADDR_INC(addr, p);
2692 GET_LENGTH_INC(tlen, p);
2693 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2694 if (IS_NULL(q)) {
2695 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2696 If you want to change to fail, replace following line. */
2697 p += addr;
2698 /* goto fail; */
2699 }
2700 else {
2701 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2702 s = q;
2703 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2704 }
2705 MOP_OUT;
2706 continue;
2707 break;
2708
2709 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2710 STACK_POP_TIL_LOOK_BEHIND_NOT;
2711 goto fail;
2712 break;
2713
2714 #ifdef USE_SUBEXP_CALL
2715 case OP_CALL: MOP_IN(OP_CALL);
2716 GET_ABSADDR_INC(addr, p);
2717 STACK_PUSH_CALL_FRAME(p);
2718 p = reg->p + addr;
2719 MOP_OUT;
2720 continue;
2721 break;
2722
2723 case OP_RETURN: MOP_IN(OP_RETURN);
2724 STACK_RETURN(p);
2725 STACK_PUSH_RETURN;
2726 MOP_OUT;
2727 continue;
2728 break;
2729 #endif
2730
2731 case OP_FINISH:
2732 goto finish;
2733 break;
2734
2735 fail:
2736 MOP_OUT;
2737 /* fall */
2738 case OP_FAIL: MOP_IN(OP_FAIL);
2739 STACK_POP;
2740 p = stk->u.state.pcode;
2741 s = stk->u.state.pstr;
2742 sprev = stk->u.state.pstr_prev;
2743
2744 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2745 if (stk->u.state.state_check != 0) {
2746 stk->type = STK_STATE_CHECK_MARK;
2747 stk++;
2748 }
2749 #endif
2750
2751 MOP_OUT;
2752 continue;
2753 break;
2754
2755 default:
2756 goto bytecode_error;
2757
2758 } /* end of switch */
2759 sprev = sbegin;
2760 } /* end of while(1) */
2761
2762 finish:
2763 STACK_SAVE;
2764 return best_len;
2765
2766 #ifdef ONIG_DEBUG
2767 stack_error:
2768 STACK_SAVE;
2769 return ONIGERR_STACK_BUG;
2770 #endif
2771
2772 bytecode_error:
2773 STACK_SAVE;
2774 return ONIGERR_UNDEFINED_BYTECODE;
2775
2776 unexpected_bytecode_error:
2777 STACK_SAVE;
2778 return ONIGERR_UNEXPECTED_BYTECODE;
2779 }
2780
2781
2782 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2783 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2784 const UChar* text, const UChar* text_end, UChar* text_range)
2785 {
2786 UChar *t, *p, *s, *end;
2787
2788 end = (UChar* )text_end;
2789 end -= target_end - target - 1;
2790 if (end > text_range)
2791 end = text_range;
2792
2793 s = (UChar* )text;
2794
2795 while (s < end) {
2796 if (*s == *target) {
2797 p = s + 1;
2798 t = target + 1;
2799 while (t < target_end) {
2800 if (*t != *p++)
2801 break;
2802 t++;
2803 }
2804 if (t == target_end)
2805 return s;
2806 }
2807 s += enclen(enc, s);
2808 }
2809
2810 return (UChar* )NULL;
2811 }
2812
2813 static int
str_lower_case_match(OnigEncoding enc,int case_fold_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)2814 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2815 const UChar* t, const UChar* tend,
2816 const UChar* p, const UChar* end)
2817 {
2818 int lowlen;
2819 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2820
2821 while (t < tend) {
2822 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2823 q = lowbuf;
2824 while (lowlen > 0) {
2825 if (*t++ != *q++) return 0;
2826 lowlen--;
2827 }
2828 }
2829
2830 return 1;
2831 }
2832
2833 static UChar*
slow_search_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2834 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2835 UChar* target, UChar* target_end,
2836 const UChar* text, const UChar* text_end, UChar* text_range)
2837 {
2838 UChar *s, *end;
2839
2840 end = (UChar* )text_end;
2841 end -= target_end - target - 1;
2842 if (end > text_range)
2843 end = text_range;
2844
2845 s = (UChar* )text;
2846
2847 while (s < end) {
2848 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2849 s, text_end))
2850 return s;
2851
2852 s += enclen(enc, s);
2853 }
2854
2855 return (UChar* )NULL;
2856 }
2857
2858 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2859 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2860 const UChar* text, const UChar* adjust_text,
2861 const UChar* text_end, const UChar* text_start)
2862 {
2863 UChar *t, *p, *s;
2864
2865 s = (UChar* )text_end;
2866 s -= (target_end - target);
2867 if (s > text_start)
2868 s = (UChar* )text_start;
2869 else
2870 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2871
2872 while (s >= text) {
2873 if (*s == *target) {
2874 p = s + 1;
2875 t = target + 1;
2876 while (t < target_end) {
2877 if (*t != *p++)
2878 break;
2879 t++;
2880 }
2881 if (t == target_end)
2882 return s;
2883 }
2884 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2885 }
2886
2887 return (UChar* )NULL;
2888 }
2889
2890 static UChar*
slow_search_backward_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2891 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2892 UChar* target, UChar* target_end,
2893 const UChar* text, const UChar* adjust_text,
2894 const UChar* text_end, const UChar* text_start)
2895 {
2896 UChar *s;
2897
2898 s = (UChar* )text_end;
2899 s -= (target_end - target);
2900 if (s > text_start)
2901 s = (UChar* )text_start;
2902 else
2903 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2904
2905 while (s >= text) {
2906 if (str_lower_case_match(enc, case_fold_flag,
2907 target, target_end, s, text_end))
2908 return s;
2909
2910 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2911 }
2912
2913 return (UChar* )NULL;
2914 }
2915
2916 static UChar*
bm_search_notrev(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2917 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2918 const UChar* text, const UChar* text_end,
2919 const UChar* text_range)
2920 {
2921 const UChar *s, *se, *t, *p, *end;
2922 const UChar *tail;
2923 int skip, tlen1;
2924
2925 #ifdef ONIG_DEBUG_SEARCH
2926 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2927 (int )text, (int )text_end, (int )text_range);
2928 #endif
2929
2930 tail = target_end - 1;
2931 tlen1 = tail - target;
2932 end = text_range;
2933 if (end + tlen1 > text_end)
2934 end = text_end - tlen1;
2935
2936 s = text;
2937
2938 if (IS_NULL(reg->int_map)) {
2939 while (s < end) {
2940 p = se = s + tlen1;
2941 t = tail;
2942 while (*p == *t) {
2943 if (t == target) return (UChar* )s;
2944 p--; t--;
2945 }
2946 skip = reg->map[*se];
2947 t = s;
2948 do {
2949 s += enclen(reg->enc, s);
2950 } while ((s - t) < skip && s < end);
2951 }
2952 }
2953 else {
2954 while (s < end) {
2955 p = se = s + tlen1;
2956 t = tail;
2957 while (*p == *t) {
2958 if (t == target) return (UChar* )s;
2959 p--; t--;
2960 }
2961 skip = reg->int_map[*se];
2962 t = s;
2963 do {
2964 s += enclen(reg->enc, s);
2965 } while ((s - t) < skip && s < end);
2966 }
2967 }
2968
2969 return (UChar* )NULL;
2970 }
2971
2972 static UChar*
bm_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2973 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2974 const UChar* text, const UChar* text_end, const UChar* text_range)
2975 {
2976 const UChar *s, *t, *p, *end;
2977 const UChar *tail;
2978
2979 end = text_range + (target_end - target) - 1;
2980 if (end > text_end)
2981 end = text_end;
2982
2983 tail = target_end - 1;
2984 s = text + (target_end - target) - 1;
2985 if (IS_NULL(reg->int_map)) {
2986 while (s < end) {
2987 p = s;
2988 t = tail;
2989 while (*p == *t) {
2990 if (t == target) return (UChar* )p;
2991 p--; t--;
2992 }
2993 s += reg->map[*s];
2994 }
2995 }
2996 else { /* see int_map[] */
2997 while (s < end) {
2998 p = s;
2999 t = tail;
3000 while (*p == *t) {
3001 if (t == target) return (UChar* )p;
3002 p--; t--;
3003 }
3004 s += reg->int_map[*s];
3005 }
3006 }
3007 return (UChar* )NULL;
3008 }
3009
3010 #ifdef USE_INT_MAP_BACKWARD
3011 static int
set_bm_backward_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,int ** skip)3012 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3013 int** skip)
3014
3015 {
3016 int i, len;
3017
3018 if (IS_NULL(*skip)) {
3019 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3020 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3021 }
3022
3023 len = end - s;
3024 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3025 (*skip)[i] = len;
3026
3027 for (i = len - 1; i > 0; i--)
3028 (*skip)[s[i]] = i;
3029
3030 return 0;
3031 }
3032
3033 static UChar*
bm_search_backward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3034 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3035 const UChar* text, const UChar* adjust_text,
3036 const UChar* text_end, const UChar* text_start)
3037 {
3038 const UChar *s, *t, *p;
3039
3040 s = text_end - (target_end - target);
3041 if (text_start < s)
3042 s = text_start;
3043 else
3044 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3045
3046 while (s >= text) {
3047 p = s;
3048 t = target;
3049 while (t < target_end && *p == *t) {
3050 p++; t++;
3051 }
3052 if (t == target_end)
3053 return (UChar* )s;
3054
3055 s -= reg->int_map_backward[*s];
3056 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3057 }
3058
3059 return (UChar* )NULL;
3060 }
3061 #endif
3062
3063 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)3064 map_search(OnigEncoding enc, UChar map[],
3065 const UChar* text, const UChar* text_range)
3066 {
3067 const UChar *s = text;
3068
3069 while (s < text_range) {
3070 if (map[*s]) return (UChar* )s;
3071
3072 s += enclen(enc, s);
3073 }
3074 return (UChar* )NULL;
3075 }
3076
3077 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)3078 map_search_backward(OnigEncoding enc, UChar map[],
3079 const UChar* text, const UChar* adjust_text,
3080 const UChar* text_start)
3081 {
3082 const UChar *s = text_start;
3083
3084 while (s >= text) {
3085 if (map[*s]) return (UChar* )s;
3086
3087 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3088 }
3089 return (UChar* )NULL;
3090 }
3091
3092 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)3093 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3094 OnigOptionType option)
3095 {
3096 int r;
3097 UChar *prev;
3098 OnigMatchArg msa;
3099
3100 MATCH_ARG_INIT(msa, reg, option, region, at);
3101 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3102 {
3103 int offset = at - str;
3104 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3105 }
3106 #endif
3107
3108 if (region
3109 #ifdef USE_POSIX_API_REGION_OPTION
3110 && !IS_POSIX_REGION(option)
3111 #endif
3112 ) {
3113 r = onig_region_resize_clear(region, reg->num_mem + 1);
3114 }
3115 else
3116 r = 0;
3117
3118 if (r == 0) {
3119 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
3120 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
3121 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
3122 goto end;
3123 }
3124 }
3125
3126 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3127 r = match_at(reg, str, end,
3128 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3129 end,
3130 #endif
3131 at, prev, &msa);
3132 }
3133
3134 end:
3135 MATCH_ARG_FREE(msa);
3136 return r;
3137 }
3138
3139 static int
forward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)3140 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3141 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3142 {
3143 UChar *p, *pprev = (UChar* )NULL;
3144
3145 #ifdef ONIG_DEBUG_SEARCH
3146 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3147 (int )str, (int )end, (int )s, (int )range);
3148 #endif
3149
3150 p = s;
3151 if (reg->dmin > 0) {
3152 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3153 p += reg->dmin;
3154 }
3155 else {
3156 UChar *q = p + reg->dmin;
3157
3158 if (q >= end) return 0; /* fail */
3159 while (p < q) p += enclen(reg->enc, p);
3160 }
3161 }
3162
3163 retry:
3164 switch (reg->optimize) {
3165 case ONIG_OPTIMIZE_EXACT:
3166 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3167 break;
3168 case ONIG_OPTIMIZE_EXACT_IC:
3169 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3170 reg->exact, reg->exact_end, p, end, range);
3171 break;
3172
3173 case ONIG_OPTIMIZE_EXACT_BM:
3174 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3175 break;
3176
3177 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3178 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3179 break;
3180
3181 case ONIG_OPTIMIZE_MAP:
3182 p = map_search(reg->enc, reg->map, p, range);
3183 break;
3184 }
3185
3186 if (p && p < range) {
3187 if (p - reg->dmin < s) {
3188 retry_gate:
3189 pprev = p;
3190 p += enclen(reg->enc, p);
3191 goto retry;
3192 }
3193
3194 if (reg->sub_anchor) {
3195 UChar* prev;
3196
3197 switch (reg->sub_anchor) {
3198 case ANCHOR_BEGIN_LINE:
3199 if (!ON_STR_BEGIN(p)) {
3200 prev = onigenc_get_prev_char_head(reg->enc,
3201 (pprev ? pprev : str), p);
3202 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3203 goto retry_gate;
3204 }
3205 break;
3206
3207 case ANCHOR_END_LINE:
3208 if (ON_STR_END(p)) {
3209 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3210 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3211 (pprev ? pprev : str), p);
3212 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3213 goto retry_gate;
3214 #endif
3215 }
3216 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3217 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3218 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3219 #endif
3220 )
3221 goto retry_gate;
3222 break;
3223 }
3224 }
3225
3226 if (reg->dmax == 0) {
3227 *low = p;
3228 if (low_prev) {
3229 if (*low > s)
3230 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3231 else
3232 *low_prev = onigenc_get_prev_char_head(reg->enc,
3233 (pprev ? pprev : str), p);
3234 }
3235 }
3236 else {
3237 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3238 if (p - str < reg->dmax) {
3239 *low = (UChar* )str;
3240 if (low_prev)
3241 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
3242 }
3243 else {
3244 *low = p - reg->dmax;
3245 if (*low > s) {
3246 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3247 *low, (const UChar** )low_prev);
3248 if (low_prev && IS_NULL(*low_prev))
3249 *low_prev = onigenc_get_prev_char_head(reg->enc,
3250 (pprev ? pprev : s), *low);
3251 }
3252 else {
3253 if (low_prev)
3254 *low_prev = onigenc_get_prev_char_head(reg->enc,
3255 (pprev ? pprev : str), *low);
3256 }
3257 }
3258 }
3259 }
3260 /* no needs to adjust *high, *high is used as range check only */
3261 *high = p - reg->dmin;
3262
3263 #ifdef ONIG_DEBUG_SEARCH
3264 fprintf(stderr,
3265 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3266 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3267 #endif
3268 return 1; /* success */
3269 }
3270
3271 return 0; /* fail */
3272 }
3273
3274
3275 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3276
3277 static int
backward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)3278 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3279 UChar* s, const UChar* range, UChar* adjrange,
3280 UChar** low, UChar** high)
3281 {
3282 UChar *p;
3283
3284 range += reg->dmin;
3285 p = s;
3286
3287 retry:
3288 switch (reg->optimize) {
3289 case ONIG_OPTIMIZE_EXACT:
3290 exact_method:
3291 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3292 range, adjrange, end, p);
3293 break;
3294
3295 case ONIG_OPTIMIZE_EXACT_IC:
3296 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3297 reg->exact, reg->exact_end,
3298 range, adjrange, end, p);
3299 break;
3300
3301 case ONIG_OPTIMIZE_EXACT_BM:
3302 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3303 #ifdef USE_INT_MAP_BACKWARD
3304 if (IS_NULL(reg->int_map_backward)) {
3305 int r;
3306
3307 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3308 goto exact_method;
3309
3310 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3311 &(reg->int_map_backward));
3312 if (r) return r;
3313 }
3314 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3315 end, p);
3316 #else
3317 goto exact_method;
3318 #endif
3319 break;
3320
3321 case ONIG_OPTIMIZE_MAP:
3322 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3323 break;
3324 }
3325
3326 if (p) {
3327 if (reg->sub_anchor) {
3328 UChar* prev;
3329
3330 switch (reg->sub_anchor) {
3331 case ANCHOR_BEGIN_LINE:
3332 if (!ON_STR_BEGIN(p)) {
3333 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3334 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3335 p = prev;
3336 goto retry;
3337 }
3338 }
3339 break;
3340
3341 case ANCHOR_END_LINE:
3342 if (ON_STR_END(p)) {
3343 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3344 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3345 if (IS_NULL(prev)) goto fail;
3346 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3347 p = prev;
3348 goto retry;
3349 }
3350 #endif
3351 }
3352 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3353 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3354 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3355 #endif
3356 ) {
3357 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3358 if (IS_NULL(p)) goto fail;
3359 goto retry;
3360 }
3361 break;
3362 }
3363 }
3364
3365 /* no needs to adjust *high, *high is used as range check only */
3366 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3367 *low = p - reg->dmax;
3368 *high = p - reg->dmin;
3369 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3370 }
3371
3372 #ifdef ONIG_DEBUG_SEARCH
3373 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3374 (int )(*low - str), (int )(*high - str));
3375 #endif
3376 return 1; /* success */
3377 }
3378
3379 fail:
3380 #ifdef ONIG_DEBUG_SEARCH
3381 fprintf(stderr, "backward_search_range: fail.\n");
3382 #endif
3383 return 0; /* fail */
3384 }
3385
3386
3387 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)3388 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3389 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3390 {
3391 int r;
3392 UChar *s, *prev;
3393 OnigMatchArg msa;
3394 const UChar *orig_start = start;
3395 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3396 const UChar *orig_range = range;
3397 #endif
3398
3399 #ifdef ONIG_DEBUG_SEARCH
3400 fprintf(stderr,
3401 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3402 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3403 #endif
3404
3405 if (region
3406 #ifdef USE_POSIX_API_REGION_OPTION
3407 && !IS_POSIX_REGION(option)
3408 #endif
3409 ) {
3410 r = onig_region_resize_clear(region, reg->num_mem + 1);
3411 if (r) goto finish_no_msa;
3412 }
3413
3414 if (start > end || start < str) goto mismatch_no_msa;
3415
3416 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
3417 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
3418 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
3419 goto finish_no_msa;
3420 }
3421 }
3422
3423
3424 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3425 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3426 #define MATCH_AND_RETURN_CHECK(upper_range) \
3427 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3428 if (r != ONIG_MISMATCH) {\
3429 if (r >= 0) {\
3430 if (! IS_FIND_LONGEST(reg->options)) {\
3431 goto match;\
3432 }\
3433 }\
3434 else goto finish; /* error */ \
3435 }
3436 #else
3437 #define MATCH_AND_RETURN_CHECK(upper_range) \
3438 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3439 if (r != ONIG_MISMATCH) {\
3440 if (r >= 0) {\
3441 goto match;\
3442 }\
3443 else goto finish; /* error */ \
3444 }
3445 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3446 #else
3447 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3448 #define MATCH_AND_RETURN_CHECK(none) \
3449 r = match_at(reg, str, end, s, prev, &msa);\
3450 if (r != ONIG_MISMATCH) {\
3451 if (r >= 0) {\
3452 if (! IS_FIND_LONGEST(reg->options)) {\
3453 goto match;\
3454 }\
3455 }\
3456 else goto finish; /* error */ \
3457 }
3458 #else
3459 #define MATCH_AND_RETURN_CHECK(none) \
3460 r = match_at(reg, str, end, s, prev, &msa);\
3461 if (r != ONIG_MISMATCH) {\
3462 if (r >= 0) {\
3463 goto match;\
3464 }\
3465 else goto finish; /* error */ \
3466 }
3467 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3468 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3469
3470
3471 /* anchor optimize: resume search range */
3472 if (reg->anchor != 0 && str < end) {
3473 UChar *min_semi_end, *max_semi_end;
3474
3475 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3476 /* search start-position only */
3477 begin_position:
3478 if (range > start)
3479 range = start + 1;
3480 else
3481 range = start;
3482 }
3483 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3484 /* search str-position only */
3485 if (range > start) {
3486 if (start != str) goto mismatch_no_msa;
3487 range = str + 1;
3488 }
3489 else {
3490 if (range <= str) {
3491 start = str;
3492 range = str;
3493 }
3494 else
3495 goto mismatch_no_msa;
3496 }
3497 }
3498 else if (reg->anchor & ANCHOR_END_BUF) {
3499 min_semi_end = max_semi_end = (UChar* )end;
3500
3501 end_buf:
3502 if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
3503 goto mismatch_no_msa;
3504
3505 if (range > start) {
3506 if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
3507 start = min_semi_end - reg->anchor_dmax;
3508 if (start < end)
3509 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3510 }
3511 if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3512 range = max_semi_end - reg->anchor_dmin + 1;
3513 }
3514
3515 if (start > range) goto mismatch_no_msa;
3516 /* If start == range, match with empty at end.
3517 Backward search is used. */
3518 }
3519 else {
3520 if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
3521 range = min_semi_end - reg->anchor_dmax;
3522 }
3523 if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
3524 start = max_semi_end - reg->anchor_dmin;
3525 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3526 }
3527 if (range > start) goto mismatch_no_msa;
3528 }
3529 }
3530 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3531 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3532
3533 max_semi_end = (UChar* )end;
3534 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3535 min_semi_end = pre_end;
3536
3537 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3538 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3539 if (IS_NOT_NULL(pre_end) &&
3540 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3541 min_semi_end = pre_end;
3542 }
3543 #endif
3544 if (min_semi_end > str && start <= min_semi_end) {
3545 goto end_buf;
3546 }
3547 }
3548 else {
3549 min_semi_end = (UChar* )end;
3550 goto end_buf;
3551 }
3552 }
3553 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3554 goto begin_position;
3555 }
3556 }
3557 else if (str == end) { /* empty string */
3558 static const UChar* address_for_empty_string = (UChar* )"";
3559
3560 #ifdef ONIG_DEBUG_SEARCH
3561 fprintf(stderr, "onig_search: empty string.\n");
3562 #endif
3563
3564 if (reg->threshold_len == 0) {
3565 start = end = str = address_for_empty_string;
3566 s = (UChar* )start;
3567 prev = (UChar* )NULL;
3568
3569 MATCH_ARG_INIT(msa, reg, option, region, start);
3570 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3571 msa.state_check_buff = (void* )0;
3572 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
3573 #endif
3574 MATCH_AND_RETURN_CHECK(end);
3575 goto mismatch;
3576 }
3577 goto mismatch_no_msa;
3578 }
3579
3580 #ifdef ONIG_DEBUG_SEARCH
3581 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3582 (int )(end - str), (int )(start - str), (int )(range - str));
3583 #endif
3584
3585 MATCH_ARG_INIT(msa, reg, option, region, orig_start);
3586 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3587 {
3588 int offset = (MIN(start, range) - str);
3589 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3590 }
3591 #endif
3592
3593 s = (UChar* )start;
3594 if (range > start) { /* forward search */
3595 if (s > str)
3596 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3597 else
3598 prev = (UChar* )NULL;
3599
3600 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3601 UChar *sch_range, *low, *high, *low_prev;
3602
3603 sch_range = (UChar* )range;
3604 if (reg->dmax != 0) {
3605 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3606 sch_range = (UChar* )end;
3607 else {
3608 sch_range += reg->dmax;
3609 if (sch_range > end) sch_range = (UChar* )end;
3610 }
3611 }
3612
3613 if ((end - start) < reg->threshold_len)
3614 goto mismatch;
3615
3616 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3617 do {
3618 if (! forward_search_range(reg, str, end, s, sch_range,
3619 &low, &high, &low_prev)) goto mismatch;
3620 if (s < low) {
3621 s = low;
3622 prev = low_prev;
3623 }
3624 while (s <= high) {
3625 MATCH_AND_RETURN_CHECK(orig_range);
3626 prev = s;
3627 s += enclen(reg->enc, s);
3628 }
3629 } while (s < range);
3630 goto mismatch;
3631 }
3632 else { /* check only. */
3633 if (! forward_search_range(reg, str, end, s, sch_range,
3634 &low, &high, (UChar** )NULL)) goto mismatch;
3635
3636 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3637 do {
3638 MATCH_AND_RETURN_CHECK(orig_range);
3639 prev = s;
3640 s += enclen(reg->enc, s);
3641
3642 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
3643 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3644 prev = s;
3645 s += enclen(reg->enc, s);
3646 }
3647 }
3648 } while (s < range);
3649 goto mismatch;
3650 }
3651 }
3652 }
3653
3654 do {
3655 MATCH_AND_RETURN_CHECK(orig_range);
3656 prev = s;
3657 s += enclen(reg->enc, s);
3658 } while (s < range);
3659
3660 if (s == range) { /* because empty match with /$/. */
3661 MATCH_AND_RETURN_CHECK(orig_range);
3662 }
3663 }
3664 else { /* backward search */
3665 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3666 if (orig_start < end)
3667 orig_start += enclen(reg->enc, orig_start); /* is upper range */
3668 #endif
3669
3670 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3671 UChar *low, *high, *adjrange, *sch_start;
3672
3673 if (range < end)
3674 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3675 else
3676 adjrange = (UChar* )end;
3677
3678 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3679 (end - range) >= reg->threshold_len) {
3680 do {
3681 sch_start = s + reg->dmax;
3682 if (sch_start > end) sch_start = (UChar* )end;
3683 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3684 &low, &high) <= 0)
3685 goto mismatch;
3686
3687 if (s > high)
3688 s = high;
3689
3690 while (s >= low) {
3691 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3692 MATCH_AND_RETURN_CHECK(orig_start);
3693 s = prev;
3694 }
3695 } while (s >= range);
3696 goto mismatch;
3697 }
3698 else { /* check only. */
3699 if ((end - range) < reg->threshold_len) goto mismatch;
3700
3701 sch_start = s;
3702 if (reg->dmax != 0) {
3703 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3704 sch_start = (UChar* )end;
3705 else {
3706 sch_start += reg->dmax;
3707 if (sch_start > end) sch_start = (UChar* )end;
3708 else
3709 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3710 start, sch_start);
3711 }
3712 }
3713 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3714 &low, &high) <= 0) goto mismatch;
3715 }
3716 }
3717
3718 do {
3719 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3720 MATCH_AND_RETURN_CHECK(orig_start);
3721 s = prev;
3722 } while (s >= range);
3723 }
3724
3725 mismatch:
3726 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3727 if (IS_FIND_LONGEST(reg->options)) {
3728 if (msa.best_len >= 0) {
3729 s = msa.best_s;
3730 goto match;
3731 }
3732 }
3733 #endif
3734 r = ONIG_MISMATCH;
3735
3736 finish:
3737 MATCH_ARG_FREE(msa);
3738
3739 /* If result is mismatch and no FIND_NOT_EMPTY option,
3740 then the region is not set in match_at(). */
3741 if (IS_FIND_NOT_EMPTY(reg->options) && region
3742 #ifdef USE_POSIX_API_REGION_OPTION
3743 && !IS_POSIX_REGION(option)
3744 #endif
3745 ) {
3746 onig_region_clear(region);
3747 }
3748
3749 #ifdef ONIG_DEBUG
3750 if (r != ONIG_MISMATCH)
3751 fprintf(stderr, "onig_search: error %d\n", r);
3752 #endif
3753 return r;
3754
3755 mismatch_no_msa:
3756 r = ONIG_MISMATCH;
3757 finish_no_msa:
3758 #ifdef ONIG_DEBUG
3759 if (r != ONIG_MISMATCH)
3760 fprintf(stderr, "onig_search: error %d\n", r);
3761 #endif
3762 return r;
3763
3764 match:
3765 MATCH_ARG_FREE(msa);
3766 return s - str;
3767 }
3768
3769 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)3770 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
3771 OnigRegion* region, OnigOptionType option,
3772 int (*scan_callback)(int, int, OnigRegion*, void*),
3773 void* callback_arg)
3774 {
3775 int r;
3776 int n;
3777 int rs;
3778 const UChar* start;
3779
3780 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
3781 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
3782 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
3783
3784 ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
3785 }
3786
3787 n = 0;
3788 start = str;
3789 while (1) {
3790 r = onig_search(reg, str, end, start, end, region, option);
3791 if (r >= 0) {
3792 rs = scan_callback(n, r, region, callback_arg);
3793 n++;
3794 if (rs != 0)
3795 return rs;
3796
3797 if (region->end[0] == start - str) {
3798 if (start >= end) break;
3799 start += enclen(reg->enc, start);
3800 }
3801 else
3802 start = str + region->end[0];
3803
3804 if (start > end)
3805 break;
3806 }
3807 else if (r == ONIG_MISMATCH) {
3808 break;
3809 }
3810 else { /* error */
3811 return r;
3812 }
3813 }
3814
3815 return n;
3816 }
3817
3818 extern OnigEncoding
onig_get_encoding(regex_t * reg)3819 onig_get_encoding(regex_t* reg)
3820 {
3821 return reg->enc;
3822 }
3823
3824 extern OnigOptionType
onig_get_options(regex_t * reg)3825 onig_get_options(regex_t* reg)
3826 {
3827 return reg->options;
3828 }
3829
3830 extern OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)3831 onig_get_case_fold_flag(regex_t* reg)
3832 {
3833 return reg->case_fold_flag;
3834 }
3835
3836 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)3837 onig_get_syntax(regex_t* reg)
3838 {
3839 return reg->syntax;
3840 }
3841
3842 extern int
onig_number_of_captures(regex_t * reg)3843 onig_number_of_captures(regex_t* reg)
3844 {
3845 return reg->num_mem;
3846 }
3847
3848 extern int
onig_number_of_capture_histories(regex_t * reg)3849 onig_number_of_capture_histories(regex_t* reg)
3850 {
3851 #ifdef USE_CAPTURE_HISTORY
3852 int i, n;
3853
3854 n = 0;
3855 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3856 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3857 n++;
3858 }
3859 return n;
3860 #else
3861 return 0;
3862 #endif
3863 }
3864
3865 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)3866 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3867 {
3868 *to = *from;
3869 }
3870
3871