1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 #ifdef USE_CRNL_AS_LINE_TERMINATOR
33 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
34 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
35 ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
36 #endif
37
38 #ifdef USE_CAPTURE_HISTORY
39 static void history_tree_free(OnigCaptureTreeNode* node);
40
41 static void
history_tree_clear(OnigCaptureTreeNode * node)42 history_tree_clear(OnigCaptureTreeNode* node)
43 {
44 int i;
45
46 if (IS_NOT_NULL(node)) {
47 for (i = 0; i < node->num_childs; i++) {
48 if (IS_NOT_NULL(node->childs[i])) {
49 history_tree_free(node->childs[i]);
50 }
51 }
52 for (i = 0; i < node->allocated; i++) {
53 node->childs[i] = (OnigCaptureTreeNode* )0;
54 }
55 node->num_childs = 0;
56 node->beg = ONIG_REGION_NOTPOS;
57 node->end = ONIG_REGION_NOTPOS;
58 node->group = -1;
59 }
60 }
61
62 static void
history_tree_free(OnigCaptureTreeNode * node)63 history_tree_free(OnigCaptureTreeNode* node)
64 {
65 history_tree_clear(node);
66 xfree(node);
67 }
68
69 static void
history_root_free(OnigRegion * r)70 history_root_free(OnigRegion* r)
71 {
72 if (IS_NOT_NULL(r->history_root)) {
73 history_tree_free(r->history_root);
74 r->history_root = (OnigCaptureTreeNode* )0;
75 }
76 }
77
78 static OnigCaptureTreeNode*
history_node_new(void)79 history_node_new(void)
80 {
81 OnigCaptureTreeNode* node;
82
83 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
84 CHECK_NULL_RETURN(node);
85 node->childs = (OnigCaptureTreeNode** )0;
86 node->allocated = 0;
87 node->num_childs = 0;
88 node->group = -1;
89 node->beg = ONIG_REGION_NOTPOS;
90 node->end = ONIG_REGION_NOTPOS;
91
92 return node;
93 }
94
95 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)96 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
97 {
98 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
99
100 if (parent->num_childs >= parent->allocated) {
101 int n, i;
102
103 if (IS_NULL(parent->childs)) {
104 n = HISTORY_TREE_INIT_ALLOC_SIZE;
105 parent->childs =
106 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
107 }
108 else {
109 n = parent->allocated * 2;
110 parent->childs =
111 (OnigCaptureTreeNode** )xrealloc(parent->childs,
112 sizeof(OnigCaptureTreeNode*) * n);
113 }
114 CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
115 for (i = parent->allocated; i < n; i++) {
116 parent->childs[i] = (OnigCaptureTreeNode* )0;
117 }
118 parent->allocated = n;
119 }
120
121 parent->childs[parent->num_childs] = child;
122 parent->num_childs++;
123 return 0;
124 }
125
126 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)127 history_tree_clone(OnigCaptureTreeNode* node)
128 {
129 int i;
130 OnigCaptureTreeNode *clone, *child;
131
132 clone = history_node_new();
133 CHECK_NULL_RETURN(clone);
134
135 clone->beg = node->beg;
136 clone->end = node->end;
137 for (i = 0; i < node->num_childs; i++) {
138 child = history_tree_clone(node->childs[i]);
139 if (IS_NULL(child)) {
140 history_tree_free(clone);
141 return (OnigCaptureTreeNode* )0;
142 }
143 history_tree_add_child(clone, child);
144 }
145
146 return clone;
147 }
148
149 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)150 onig_get_capture_tree(OnigRegion* region)
151 {
152 return region->history_root;
153 }
154 #endif /* USE_CAPTURE_HISTORY */
155
156 extern void
onig_region_clear(OnigRegion * region)157 onig_region_clear(OnigRegion* region)
158 {
159 int i;
160
161 for (i = 0; i < region->num_regs; i++) {
162 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
163 }
164 #ifdef USE_CAPTURE_HISTORY
165 history_root_free(region);
166 #endif
167 }
168
169 extern int
onig_region_resize(OnigRegion * region,int n)170 onig_region_resize(OnigRegion* region, int n)
171 {
172 region->num_regs = n;
173
174 if (n < ONIG_NREGION)
175 n = ONIG_NREGION;
176
177 if (region->allocated == 0) {
178 region->beg = (int* )xmalloc(n * sizeof(int));
179 region->end = (int* )xmalloc(n * sizeof(int));
180
181 if (region->beg == 0 || region->end == 0)
182 return ONIGERR_MEMORY;
183
184 region->allocated = n;
185 }
186 else if (region->allocated < n) {
187 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
188 region->end = (int* )xrealloc(region->end, n * sizeof(int));
189
190 if (region->beg == 0 || region->end == 0)
191 return ONIGERR_MEMORY;
192
193 region->allocated = n;
194 }
195
196 return 0;
197 }
198
199 extern int
onig_region_resize_clear(OnigRegion * region,int n)200 onig_region_resize_clear(OnigRegion* region, int n)
201 {
202 int r;
203
204 r = onig_region_resize(region, n);
205 if (r != 0) return r;
206 onig_region_clear(region);
207 return 0;
208 }
209
210 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)211 onig_region_set(OnigRegion* region, int at, int beg, int end)
212 {
213 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
214
215 if (at >= region->allocated) {
216 int r = onig_region_resize(region, at + 1);
217 if (r < 0) return r;
218 }
219
220 region->beg[at] = beg;
221 region->end[at] = end;
222 return 0;
223 }
224
225 extern void
onig_region_init(OnigRegion * region)226 onig_region_init(OnigRegion* region)
227 {
228 region->num_regs = 0;
229 region->allocated = 0;
230 region->beg = (int* )0;
231 region->end = (int* )0;
232 region->history_root = (OnigCaptureTreeNode* )0;
233 }
234
235 extern OnigRegion*
onig_region_new(void)236 onig_region_new(void)
237 {
238 OnigRegion* r;
239
240 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
241 onig_region_init(r);
242 return r;
243 }
244
245 extern void
onig_region_free(OnigRegion * r,int free_self)246 onig_region_free(OnigRegion* r, int free_self)
247 {
248 if (r) {
249 if (r->allocated > 0) {
250 if (r->beg) xfree(r->beg);
251 if (r->end) xfree(r->end);
252 r->allocated = 0;
253 }
254 #ifdef USE_CAPTURE_HISTORY
255 history_root_free(r);
256 #endif
257 if (free_self) xfree(r);
258 }
259 }
260
261 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)262 onig_region_copy(OnigRegion* to, OnigRegion* from)
263 {
264 #define RREGC_SIZE (sizeof(int) * from->num_regs)
265 int i;
266
267 if (to == from) return;
268
269 if (to->allocated == 0) {
270 if (from->num_regs > 0) {
271 to->beg = (int* )xmalloc(RREGC_SIZE);
272 to->end = (int* )xmalloc(RREGC_SIZE);
273 to->allocated = from->num_regs;
274 }
275 }
276 else if (to->allocated < from->num_regs) {
277 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
278 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
279 to->allocated = from->num_regs;
280 }
281
282 for (i = 0; i < from->num_regs; i++) {
283 to->beg[i] = from->beg[i];
284 to->end[i] = from->end[i];
285 }
286 to->num_regs = from->num_regs;
287
288 #ifdef USE_CAPTURE_HISTORY
289 history_root_free(to);
290
291 if (IS_NOT_NULL(from->history_root)) {
292 to->history_root = history_tree_clone(from->history_root);
293 }
294 #endif
295 }
296
297
298 /** stack **/
299 #define INVALID_STACK_INDEX -1
300 typedef long StackIndex;
301
302 typedef struct _StackType {
303 unsigned int type;
304 union {
305 struct {
306 UChar *pcode; /* byte code position */
307 UChar *pstr; /* string position */
308 UChar *pstr_prev; /* previous char position of pstr */
309 #ifdef USE_COMBINATION_EXPLOSION_CHECK
310 unsigned int state_check;
311 #endif
312 } state;
313 struct {
314 int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
315 UChar *pcode; /* byte code position (head of repeated target) */
316 int num; /* repeat id */
317 } repeat;
318 struct {
319 StackIndex si; /* index of stack */
320 } repeat_inc;
321 struct {
322 int num; /* memory num */
323 UChar *pstr; /* start/end position */
324 /* Following information is setted, if this stack type is MEM-START */
325 StackIndex start; /* prev. info (for backtrack "(...)*" ) */
326 StackIndex end; /* prev. info (for backtrack "(...)*" ) */
327 } mem;
328 struct {
329 int num; /* null check id */
330 UChar *pstr; /* start position */
331 } null_check;
332 #ifdef USE_SUBEXP_CALL
333 struct {
334 UChar *ret_addr; /* byte code position */
335 int num; /* null check id */
336 UChar *pstr; /* string position */
337 } call_frame;
338 #endif
339 } u;
340 } StackType;
341
342 /* stack type */
343 /* used by normal-POP */
344 #define STK_ALT 0x0001
345 #define STK_LOOK_BEHIND_NOT 0x0002
346 #define STK_POS_NOT 0x0003
347 /* handled by normal-POP */
348 #define STK_MEM_START 0x0100
349 #define STK_MEM_END 0x8200
350 #define STK_REPEAT_INC 0x0300
351 #define STK_STATE_CHECK_MARK 0x1000
352 /* avoided by normal-POP */
353 #define STK_NULL_CHECK_START 0x3000
354 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
355 #define STK_MEM_END_MARK 0x8400
356 #define STK_POS 0x0500 /* used when POP-POS */
357 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
358 #define STK_REPEAT 0x0700
359 #define STK_CALL_FRAME 0x0800
360 #define STK_RETURN 0x0900
361 #define STK_VOID 0x0a00 /* for fill a blank */
362
363 /* stack type check mask */
364 #define STK_MASK_POP_USED 0x00ff
365 #define STK_MASK_TO_VOID_TARGET 0x10ff
366 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
367
368 typedef struct {
369 void* stack_p;
370 int stack_n;
371 OnigOptionType options;
372 OnigRegion* region;
373 const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
374 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
375 int best_len; /* for ONIG_OPTION_FIND_LONGEST */
376 UChar* best_s;
377 #endif
378 #ifdef USE_COMBINATION_EXPLOSION_CHECK
379 void* state_check_buff;
380 int state_check_buff_size;
381 #endif
382 } MatchArg;
383
384 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
385 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
386 (msa).stack_p = (void* )0;\
387 (msa).options = (arg_option);\
388 (msa).region = (arg_region);\
389 (msa).start = (arg_start);\
390 (msa).best_len = ONIG_MISMATCH;\
391 } while (0)
392 #else
393 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
394 (msa).stack_p = (void* )0;\
395 (msa).options = (arg_option);\
396 (msa).region = (arg_region);\
397 (msa).start = (arg_start);\
398 } while (0)
399 #endif
400
401 #ifdef USE_COMBINATION_EXPLOSION_CHECK
402
403 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
404
405 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
406 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
407 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
408 offset = ((offset) * (state_num)) >> 3;\
409 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
410 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
411 (msa).state_check_buff = (void* )xmalloc(size);\
412 else \
413 (msa).state_check_buff = (void* )xalloca(size);\
414 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
415 (size_t )(size - (offset))); \
416 (msa).state_check_buff_size = size;\
417 }\
418 else {\
419 (msa).state_check_buff = (void* )0;\
420 (msa).state_check_buff_size = 0;\
421 }\
422 }\
423 else {\
424 (msa).state_check_buff = (void* )0;\
425 (msa).state_check_buff_size = 0;\
426 }\
427 } while (0)
428
429 #define MATCH_ARG_FREE(msa) do {\
430 if ((msa).stack_p) xfree((msa).stack_p);\
431 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
432 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
433 }\
434 } while (0);
435 #else
436 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
437 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
438 #endif
439
440
441
442 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
443 if (msa->stack_p) {\
444 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
445 stk_alloc = (StackType* )(msa->stack_p);\
446 stk_base = stk_alloc;\
447 stk = stk_base;\
448 stk_end = stk_base + msa->stack_n;\
449 }\
450 else {\
451 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
452 + sizeof(StackType) * (stack_num));\
453 stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
454 stk_base = stk_alloc;\
455 stk = stk_base;\
456 stk_end = stk_base + (stack_num);\
457 }\
458 } while(0)
459
460 #define STACK_SAVE do{\
461 if (stk_base != stk_alloc) {\
462 msa->stack_p = stk_base;\
463 msa->stack_n = stk_end - stk_base;\
464 };\
465 } while(0)
466
467 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
468
469 extern unsigned int
onig_get_match_stack_limit_size(void)470 onig_get_match_stack_limit_size(void)
471 {
472 return MatchStackLimitSize;
473 }
474
475 extern int
onig_set_match_stack_limit_size(unsigned int size)476 onig_set_match_stack_limit_size(unsigned int size)
477 {
478 MatchStackLimitSize = size;
479 return 0;
480 }
481
482 static int
stack_double(StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,StackType * stk_alloc,MatchArg * msa)483 stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
484 StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
485 {
486 unsigned int n;
487 StackType *x, *stk_base, *stk_end, *stk;
488
489 stk_base = *arg_stk_base;
490 stk_end = *arg_stk_end;
491 stk = *arg_stk;
492
493 n = stk_end - stk_base;
494 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
495 x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
496 if (IS_NULL(x)) {
497 STACK_SAVE;
498 return ONIGERR_MEMORY;
499 }
500 xmemcpy(x, stk_base, n * sizeof(StackType));
501 n *= 2;
502 }
503 else {
504 n *= 2;
505 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
506 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
507 return ONIGERR_MATCH_STACK_LIMIT_OVER;
508 else
509 n = MatchStackLimitSize;
510 }
511 x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
512 if (IS_NULL(x)) {
513 STACK_SAVE;
514 return ONIGERR_MEMORY;
515 }
516 }
517 *arg_stk = x + (stk - stk_base);
518 *arg_stk_base = x;
519 *arg_stk_end = x + n;
520 return 0;
521 }
522
523 #define STACK_ENSURE(n) do {\
524 if (stk_end - stk < (n)) {\
525 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
526 if (r != 0) { STACK_SAVE; return r; } \
527 }\
528 } while(0)
529
530 #define STACK_AT(index) (stk_base + (index))
531 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
532
533 #define STACK_PUSH_TYPE(stack_type) do {\
534 STACK_ENSURE(1);\
535 stk->type = (stack_type);\
536 STACK_INC;\
537 } while(0)
538
539 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
540
541 #ifdef USE_COMBINATION_EXPLOSION_CHECK
542 #define STATE_CHECK_POS(s,snum) \
543 (((s) - str) * num_comb_exp_check + ((snum) - 1))
544 #define STATE_CHECK_VAL(v,snum) do {\
545 if (state_check_buff != NULL) {\
546 int x = STATE_CHECK_POS(s,snum);\
547 (v) = state_check_buff[x/8] & (1<<(x%8));\
548 }\
549 else (v) = 0;\
550 } while(0)
551
552
553 #define ELSE_IF_STATE_CHECK_MARK(stk) \
554 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
555 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
556 state_check_buff[x/8] |= (1<<(x%8)); \
557 }
558
559 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
560 STACK_ENSURE(1);\
561 stk->type = (stack_type);\
562 stk->u.state.pcode = (pat);\
563 stk->u.state.pstr = (s);\
564 stk->u.state.pstr_prev = (sprev);\
565 stk->u.state.state_check = 0;\
566 STACK_INC;\
567 } while(0)
568
569 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
570 stk->type = (stack_type);\
571 stk->u.state.pcode = (pat);\
572 stk->u.state.state_check = 0;\
573 STACK_INC;\
574 } while(0)
575
576 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
577 STACK_ENSURE(1);\
578 stk->type = STK_ALT;\
579 stk->u.state.pcode = (pat);\
580 stk->u.state.pstr = (s);\
581 stk->u.state.pstr_prev = (sprev);\
582 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
583 STACK_INC;\
584 } while(0)
585
586 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
587 if (state_check_buff != NULL) {\
588 STACK_ENSURE(1);\
589 stk->type = STK_STATE_CHECK_MARK;\
590 stk->u.state.pstr = (s);\
591 stk->u.state.state_check = (snum);\
592 STACK_INC;\
593 }\
594 } while(0)
595
596 #else /* USE_COMBINATION_EXPLOSION_CHECK */
597
598 #define ELSE_IF_STATE_CHECK_MARK(stk)
599
600 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
601 STACK_ENSURE(1);\
602 stk->type = (stack_type);\
603 stk->u.state.pcode = (pat);\
604 stk->u.state.pstr = (s);\
605 stk->u.state.pstr_prev = (sprev);\
606 STACK_INC;\
607 } while(0)
608
609 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
610 stk->type = (stack_type);\
611 stk->u.state.pcode = (pat);\
612 STACK_INC;\
613 } while(0)
614 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
615
616 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
617 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
618 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
619 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
620 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
621 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
622
623 #define STACK_PUSH_REPEAT(id, pat) do {\
624 STACK_ENSURE(1);\
625 stk->type = STK_REPEAT;\
626 stk->u.repeat.num = (id);\
627 stk->u.repeat.pcode = (pat);\
628 stk->u.repeat.count = 0;\
629 STACK_INC;\
630 } while(0)
631
632 #define STACK_PUSH_REPEAT_INC(sindex) do {\
633 STACK_ENSURE(1);\
634 stk->type = STK_REPEAT_INC;\
635 stk->u.repeat_inc.si = (sindex);\
636 STACK_INC;\
637 } while(0)
638
639 #define STACK_PUSH_MEM_START(mnum, s) do {\
640 STACK_ENSURE(1);\
641 stk->type = STK_MEM_START;\
642 stk->u.mem.num = (mnum);\
643 stk->u.mem.pstr = (s);\
644 stk->u.mem.start = mem_start_stk[mnum];\
645 stk->u.mem.end = mem_end_stk[mnum];\
646 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
647 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
648 STACK_INC;\
649 } while(0)
650
651 #define STACK_PUSH_MEM_END(mnum, s) do {\
652 STACK_ENSURE(1);\
653 stk->type = STK_MEM_END;\
654 stk->u.mem.num = (mnum);\
655 stk->u.mem.pstr = (s);\
656 stk->u.mem.start = mem_start_stk[mnum];\
657 stk->u.mem.end = mem_end_stk[mnum];\
658 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
659 STACK_INC;\
660 } while(0)
661
662 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
663 STACK_ENSURE(1);\
664 stk->type = STK_MEM_END_MARK;\
665 stk->u.mem.num = (mnum);\
666 STACK_INC;\
667 } while(0)
668
669 #define STACK_GET_MEM_START(mnum, k) do {\
670 int level = 0;\
671 k = stk;\
672 while (k > stk_base) {\
673 k--;\
674 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
675 && k->u.mem.num == (mnum)) {\
676 level++;\
677 }\
678 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
679 if (level == 0) break;\
680 level--;\
681 }\
682 }\
683 } while (0)
684
685 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
686 int level = 0;\
687 while (k < stk) {\
688 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
689 if (level == 0) (start) = k->u.mem.pstr;\
690 level++;\
691 }\
692 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
693 level--;\
694 if (level == 0) {\
695 (end) = k->u.mem.pstr;\
696 break;\
697 }\
698 }\
699 k++;\
700 }\
701 } while (0)
702
703 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
704 STACK_ENSURE(1);\
705 stk->type = STK_NULL_CHECK_START;\
706 stk->u.null_check.num = (cnum);\
707 stk->u.null_check.pstr = (s);\
708 STACK_INC;\
709 } while(0)
710
711 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
712 STACK_ENSURE(1);\
713 stk->type = STK_NULL_CHECK_END;\
714 stk->u.null_check.num = (cnum);\
715 STACK_INC;\
716 } while(0)
717
718 #define STACK_PUSH_CALL_FRAME(pat) do {\
719 STACK_ENSURE(1);\
720 stk->type = STK_CALL_FRAME;\
721 stk->u.call_frame.ret_addr = (pat);\
722 STACK_INC;\
723 } while(0)
724
725 #define STACK_PUSH_RETURN do {\
726 STACK_ENSURE(1);\
727 stk->type = STK_RETURN;\
728 STACK_INC;\
729 } while(0)
730
731
732 #ifdef ONIG_DEBUG
733 #define STACK_BASE_CHECK(p, at) \
734 if ((p) < stk_base) {\
735 fprintf(stderr, "at %s\n", at);\
736 goto stack_error;\
737 }
738 #else
739 #define STACK_BASE_CHECK(p, at)
740 #endif
741
742 #define STACK_POP_ONE do {\
743 stk--;\
744 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
745 } while(0)
746
747 #define STACK_POP do {\
748 switch (pop_level) {\
749 case STACK_POP_LEVEL_FREE:\
750 while (1) {\
751 stk--;\
752 STACK_BASE_CHECK(stk, "STACK_POP"); \
753 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
754 ELSE_IF_STATE_CHECK_MARK(stk);\
755 }\
756 break;\
757 case STACK_POP_LEVEL_MEM_START:\
758 while (1) {\
759 stk--;\
760 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
761 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
762 else if (stk->type == STK_MEM_START) {\
763 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
764 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
765 }\
766 ELSE_IF_STATE_CHECK_MARK(stk);\
767 }\
768 break;\
769 default:\
770 while (1) {\
771 stk--;\
772 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
773 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
774 else if (stk->type == STK_MEM_START) {\
775 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
776 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
777 }\
778 else if (stk->type == STK_REPEAT_INC) {\
779 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
780 }\
781 else if (stk->type == STK_MEM_END) {\
782 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
783 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
784 }\
785 ELSE_IF_STATE_CHECK_MARK(stk);\
786 }\
787 break;\
788 }\
789 } while(0)
790
791 #define STACK_POP_TIL_POS_NOT do {\
792 while (1) {\
793 stk--;\
794 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
795 if (stk->type == STK_POS_NOT) break;\
796 else if (stk->type == STK_MEM_START) {\
797 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
798 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
799 }\
800 else if (stk->type == STK_REPEAT_INC) {\
801 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
802 }\
803 else if (stk->type == STK_MEM_END) {\
804 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
805 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
806 }\
807 ELSE_IF_STATE_CHECK_MARK(stk);\
808 }\
809 } while(0)
810
811 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
812 while (1) {\
813 stk--;\
814 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
815 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
816 else if (stk->type == STK_MEM_START) {\
817 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
818 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
819 }\
820 else if (stk->type == STK_REPEAT_INC) {\
821 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
822 }\
823 else if (stk->type == STK_MEM_END) {\
824 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
825 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
826 }\
827 ELSE_IF_STATE_CHECK_MARK(stk);\
828 }\
829 } while(0)
830
831 #define STACK_POS_END(k) do {\
832 k = stk;\
833 while (1) {\
834 k--;\
835 STACK_BASE_CHECK(k, "STACK_POS_END"); \
836 if (IS_TO_VOID_TARGET(k)) {\
837 k->type = STK_VOID;\
838 }\
839 else if (k->type == STK_POS) {\
840 k->type = STK_VOID;\
841 break;\
842 }\
843 }\
844 } while(0)
845
846 #define STACK_STOP_BT_END do {\
847 StackType *k = stk;\
848 while (1) {\
849 k--;\
850 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
851 if (IS_TO_VOID_TARGET(k)) {\
852 k->type = STK_VOID;\
853 }\
854 else if (k->type == STK_STOP_BT) {\
855 k->type = STK_VOID;\
856 break;\
857 }\
858 }\
859 } while(0)
860
861 #define STACK_NULL_CHECK(isnull,id,s) do {\
862 StackType* k = stk;\
863 while (1) {\
864 k--;\
865 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
866 if (k->type == STK_NULL_CHECK_START) {\
867 if (k->u.null_check.num == (id)) {\
868 (isnull) = (k->u.null_check.pstr == (s));\
869 break;\
870 }\
871 }\
872 }\
873 } while(0)
874
875 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
876 int level = 0;\
877 StackType* k = stk;\
878 while (1) {\
879 k--;\
880 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
881 if (k->type == STK_NULL_CHECK_START) {\
882 if (k->u.null_check.num == (id)) {\
883 if (level == 0) {\
884 (isnull) = (k->u.null_check.pstr == (s));\
885 break;\
886 }\
887 else level--;\
888 }\
889 }\
890 else if (k->type == STK_NULL_CHECK_END) {\
891 level++;\
892 }\
893 }\
894 } while(0)
895
896 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
897 StackType* k = stk;\
898 while (1) {\
899 k--;\
900 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
901 if (k->type == STK_NULL_CHECK_START) {\
902 if (k->u.null_check.num == (id)) {\
903 if (k->u.null_check.pstr != (s)) {\
904 (isnull) = 0;\
905 break;\
906 }\
907 else {\
908 UChar* endp;\
909 (isnull) = 1;\
910 while (k < stk) {\
911 if (k->type == STK_MEM_START) {\
912 if (k->u.mem.end == INVALID_STACK_INDEX) {\
913 (isnull) = 0; break;\
914 }\
915 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
916 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
917 else\
918 endp = (UChar* )k->u.mem.end;\
919 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
920 (isnull) = 0; break;\
921 }\
922 else if (endp != s) {\
923 (isnull) = -1; /* empty, but position changed */ \
924 }\
925 }\
926 k++;\
927 }\
928 break;\
929 }\
930 }\
931 }\
932 }\
933 } while(0)
934
935 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
936 int level = 0;\
937 StackType* k = stk;\
938 while (1) {\
939 k--;\
940 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
941 if (k->type == STK_NULL_CHECK_START) {\
942 if (k->u.null_check.num == (id)) {\
943 if (level == 0) {\
944 if (k->u.null_check.pstr != (s)) {\
945 (isnull) = 0;\
946 break;\
947 }\
948 else {\
949 UChar* endp;\
950 (isnull) = 1;\
951 while (k < stk) {\
952 if (k->type == STK_MEM_START) {\
953 if (k->u.mem.end == INVALID_STACK_INDEX) {\
954 (isnull) = 0; break;\
955 }\
956 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
957 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
958 else\
959 endp = (UChar* )k->u.mem.end;\
960 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
961 (isnull) = 0; break;\
962 }\
963 else if (endp != s) {\
964 (isnull) = -1; /* empty, but position changed */ \
965 }\
966 }\
967 k++;\
968 }\
969 break;\
970 }\
971 }\
972 else {\
973 level--;\
974 }\
975 }\
976 }\
977 else if (k->type == STK_NULL_CHECK_END) {\
978 if (k->u.null_check.num == (id)) level++;\
979 }\
980 }\
981 } while(0)
982
983 #define STACK_GET_REPEAT(id, k) do {\
984 int level = 0;\
985 k = stk;\
986 while (1) {\
987 k--;\
988 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
989 if (k->type == STK_REPEAT) {\
990 if (level == 0) {\
991 if (k->u.repeat.num == (id)) {\
992 break;\
993 }\
994 }\
995 }\
996 else if (k->type == STK_CALL_FRAME) level--;\
997 else if (k->type == STK_RETURN) level++;\
998 }\
999 } while (0)
1000
1001 #define STACK_RETURN(addr) do {\
1002 int level = 0;\
1003 StackType* k = stk;\
1004 while (1) {\
1005 k--;\
1006 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1007 if (k->type == STK_CALL_FRAME) {\
1008 if (level == 0) {\
1009 (addr) = k->u.call_frame.ret_addr;\
1010 break;\
1011 }\
1012 else level--;\
1013 }\
1014 else if (k->type == STK_RETURN)\
1015 level++;\
1016 }\
1017 } while(0)
1018
1019
1020 #define STRING_CMP(s1,s2,len) do {\
1021 while (len-- > 0) {\
1022 if (*s1++ != *s2++) goto fail;\
1023 }\
1024 } while(0)
1025
1026 #define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
1027 if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
1028 goto fail; \
1029 } while(0)
1030
string_cmp_ic(OnigEncoding enc,int ambig_flag,UChar * s1,UChar ** ps2,int mblen)1031 static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
1032 UChar* s1, UChar** ps2, int mblen)
1033 {
1034 UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
1035 UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
1036 UChar *p1, *p2, *end, *s2, *end2;
1037 int len1, len2;
1038
1039 s2 = *ps2;
1040 end = s1 + mblen;
1041 end2 = s2 + mblen;
1042 while (s1 < end) {
1043 len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
1044 len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
1045 if (len1 != len2) return 0;
1046 p1 = buf1;
1047 p2 = buf2;
1048 while (len1-- > 0) {
1049 if (*p1 != *p2) return 0;
1050 p1++;
1051 p2++;
1052 }
1053 }
1054
1055 *ps2 = s2;
1056 return 1;
1057 }
1058
1059 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1060 is_fail = 0;\
1061 while (len-- > 0) {\
1062 if (*s1++ != *s2++) {\
1063 is_fail = 1; break;\
1064 }\
1065 }\
1066 } while(0)
1067
1068 #define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
1069 if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
1070 is_fail = 1; \
1071 else \
1072 is_fail = 0; \
1073 } while(0)
1074
1075
1076 #define ON_STR_BEGIN(s) ((s) == str)
1077 #define ON_STR_END(s) ((s) == end)
1078 #define IS_EMPTY_STR (str == end)
1079
1080 #define DATA_ENSURE(n) \
1081 if (s + (n) > end) goto fail
1082
1083 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1084
1085 #ifdef USE_CAPTURE_HISTORY
1086 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)1087 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
1088 StackType* stk_top, UChar* str, regex_t* reg)
1089 {
1090 int n, r;
1091 OnigCaptureTreeNode* child;
1092 StackType* k = *kp;
1093
1094 while (k < stk_top) {
1095 if (k->type == STK_MEM_START) {
1096 n = k->u.mem.num;
1097 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1098 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1099 child = history_node_new();
1100 CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
1101 child->group = n;
1102 child->beg = (int )(k->u.mem.pstr - str);
1103 r = history_tree_add_child(node, child);
1104 if (r != 0) return r;
1105 *kp = (k + 1);
1106 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1107 if (r != 0) return r;
1108
1109 k = *kp;
1110 child->end = (int )(k->u.mem.pstr - str);
1111 }
1112 }
1113 else if (k->type == STK_MEM_END) {
1114 if (k->u.mem.num == node->group) {
1115 node->end = (int )(k->u.mem.pstr - str);
1116 *kp = k;
1117 return 0;
1118 }
1119 }
1120 k++;
1121 }
1122
1123 return 1; /* 1: root node ending. */
1124 }
1125 #endif
1126
1127 #ifdef USE_BACKREF_AT_LEVEL
mem_is_in_memp(int mem,int num,UChar * memp)1128 static int mem_is_in_memp(int mem, int num, UChar* memp)
1129 {
1130 int i;
1131 MemNumType m;
1132
1133 for (i = 0; i < num; i++) {
1134 GET_MEMNUM_INC(m, memp);
1135 if (mem == (int )m) return 1;
1136 }
1137 return 0;
1138 }
1139
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int ambig_flag,int nest,int mem_num,UChar * memp,UChar ** s,const UChar * send)1140 static int backref_match_at_nested_level(regex_t* reg
1141 , StackType* top, StackType* stk_base
1142 , int ignore_case, int ambig_flag
1143 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1144 {
1145 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1146 int level;
1147 StackType* k;
1148
1149 level = 0;
1150 k = top;
1151 k--;
1152 while (k >= stk_base) {
1153 if (k->type == STK_CALL_FRAME) {
1154 level--;
1155 }
1156 else if (k->type == STK_RETURN) {
1157 level++;
1158 }
1159 else if (level == nest) {
1160 if (k->type == STK_MEM_START) {
1161 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1162 pstart = k->u.mem.pstr;
1163 if (pend != NULL_UCHARP) {
1164 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1165 p = pstart;
1166 ss = *s;
1167
1168 if (ignore_case != 0) {
1169 if (string_cmp_ic(reg->enc, ambig_flag,
1170 pstart, &ss, (int )(pend - pstart)) == 0)
1171 return 0; /* or goto next_mem; */
1172 }
1173 else {
1174 while (p < pend) {
1175 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1176 }
1177 }
1178
1179 *s = ss;
1180 return 1;
1181 }
1182 }
1183 }
1184 else if (k->type == STK_MEM_END) {
1185 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1186 pend = k->u.mem.pstr;
1187 }
1188 }
1189 }
1190 k--;
1191 }
1192
1193 return 0;
1194 }
1195 #endif /* USE_BACKREF_AT_LEVEL */
1196
1197
1198 #ifdef RUBY_PLATFORM
1199
1200 typedef struct {
1201 int state;
1202 regex_t* reg;
1203 MatchArg* msa;
1204 StackType* stk_base;
1205 } TrapEnsureArg;
1206
1207 static VALUE
trap_ensure(VALUE arg)1208 trap_ensure(VALUE arg)
1209 {
1210 TrapEnsureArg* ta = (TrapEnsureArg* )arg;
1211
1212 if (ta->state == 0) { /* trap_exec() is not normal return */
1213 ONIG_STATE_DEC_THREAD(ta->reg);
1214 if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
1215 xfree(ta->stk_base);
1216
1217 MATCH_ARG_FREE(*(ta->msa));
1218 }
1219
1220 return Qnil;
1221 }
1222
1223 static VALUE
trap_exec(VALUE arg)1224 trap_exec(VALUE arg)
1225 {
1226 TrapEnsureArg* ta;
1227
1228 rb_trap_exec();
1229
1230 ta = (TrapEnsureArg* )arg;
1231 ta->state = 1; /* normal return */
1232 return Qnil;
1233 }
1234
1235 extern void
onig_exec_trap(regex_t * reg,MatchArg * msa,StackType * stk_base)1236 onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
1237 {
1238 VALUE arg;
1239 TrapEnsureArg ta;
1240
1241 ta.state = 0;
1242 ta.reg = reg;
1243 ta.msa = msa;
1244 ta.stk_base = stk_base;
1245 arg = (VALUE )(&ta);
1246 rb_ensure(trap_exec, arg, trap_ensure, arg);
1247 }
1248
1249 #define CHECK_INTERRUPT_IN_MATCH_AT do {\
1250 if (rb_trap_pending) {\
1251 if (! rb_prohibit_interrupt) {\
1252 onig_exec_trap(reg, msa, stk_base);\
1253 }\
1254 }\
1255 } while (0)
1256 #else
1257 #define CHECK_INTERRUPT_IN_MATCH_AT
1258 #endif /* RUBY_PLATFORM */
1259
1260 #ifdef ONIG_DEBUG_STATISTICS
1261
1262 #define USE_TIMEOFDAY
1263
1264 #ifdef USE_TIMEOFDAY
1265 #ifdef HAVE_SYS_TIME_H
1266 #include <sys/time.h>
1267 #endif
1268 #ifdef HAVE_UNISTD_H
1269 #include <unistd.h>
1270 #endif
1271 static struct timeval ts, te;
1272 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1273 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1274 (((te).tv_sec - (ts).tv_sec)*1000000))
1275 #else
1276 #ifdef HAVE_SYS_TIMES_H
1277 #include <sys/times.h>
1278 #endif
1279 static struct tms ts, te;
1280 #define GETTIME(t) times(&(t))
1281 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1282 #endif
1283
1284 static int OpCounter[256];
1285 static int OpPrevCounter[256];
1286 static unsigned long OpTime[256];
1287 static int OpCurr = OP_FINISH;
1288 static int OpPrevTarget = OP_FAIL;
1289 static int MaxStackDepth = 0;
1290
1291 #define STAT_OP_IN(opcode) do {\
1292 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1293 OpCurr = opcode;\
1294 OpCounter[opcode]++;\
1295 GETTIME(ts);\
1296 } while (0)
1297
1298 #define STAT_OP_OUT do {\
1299 GETTIME(te);\
1300 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1301 } while (0)
1302
1303 #ifdef RUBY_PLATFORM
1304
1305 /*
1306 * :nodoc:
1307 */
onig_stat_print(void)1308 static VALUE onig_stat_print(void)
1309 {
1310 onig_print_statistics(stderr);
1311 return Qnil;
1312 }
1313 #endif
1314
onig_statistics_init(void)1315 extern void onig_statistics_init(void)
1316 {
1317 int i;
1318 for (i = 0; i < 256; i++) {
1319 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1320 }
1321 MaxStackDepth = 0;
1322
1323 #ifdef RUBY_PLATFORM
1324 rb_define_global_function("onig_stat_print", onig_stat_print, 0);
1325 #endif
1326 }
1327
1328 extern void
onig_print_statistics(FILE * f)1329 onig_print_statistics(FILE* f)
1330 {
1331 int i;
1332 fprintf(f, " count prev time\n");
1333 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1334 fprintf(f, "%8d: %8d: %10ld: %s\n",
1335 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1336 }
1337 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1338 }
1339
1340 #define STACK_INC do {\
1341 stk++;\
1342 if (stk - stk_base > MaxStackDepth) \
1343 MaxStackDepth = stk - stk_base;\
1344 } while (0)
1345
1346 #else
1347 #define STACK_INC stk++
1348
1349 #define STAT_OP_IN(opcode)
1350 #define STAT_OP_OUT
1351 #endif
1352
1353 extern int
onig_is_in_code_range(const UChar * p,OnigCodePoint code)1354 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
1355 {
1356 OnigCodePoint n, *data;
1357 OnigCodePoint low, high, x;
1358
1359 GET_CODE_POINT(n, p);
1360 data = (OnigCodePoint* )p;
1361 data++;
1362
1363 for (low = 0, high = n; low < high; ) {
1364 x = (low + high) >> 1;
1365 if (code > data[x * 2 + 1])
1366 low = x + 1;
1367 else
1368 high = x;
1369 }
1370
1371 return ((low < n && code >= data[low * 2]) ? 1 : 0);
1372 }
1373
1374 static int
is_code_in_cc(int enclen,OnigCodePoint code,CClassNode * cc)1375 is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
1376 {
1377 int found;
1378
1379 if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
1380 if (IS_NULL(cc->mbuf)) {
1381 found = 0;
1382 }
1383 else {
1384 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
1385 }
1386 }
1387 else {
1388 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
1389 }
1390
1391 if (IS_CCLASS_NOT(cc))
1392 return !found;
1393 else
1394 return found;
1395 }
1396
1397 extern int
onig_is_code_in_cc(OnigEncoding enc,OnigCodePoint code,CClassNode * cc)1398 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
1399 {
1400 int len;
1401
1402 if (ONIGENC_MBC_MINLEN(enc) > 1) {
1403 len = 2;
1404 }
1405 else {
1406 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
1407 }
1408 return is_code_in_cc(len, code, cc);
1409 }
1410
1411
1412 /* matching region of POSIX API */
1413 typedef int regoff_t;
1414
1415 typedef struct {
1416 regoff_t rm_so;
1417 regoff_t rm_eo;
1418 } posix_regmatch_t;
1419
1420 /* match data(str - end) from position (sstart). */
1421 /* if sstart == str then set sprev to NULL. */
1422 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * sstart,UChar * sprev,MatchArg * msa)1423 match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
1424 UChar* sprev, MatchArg* msa)
1425 {
1426 static UChar FinishCode[] = { OP_FINISH };
1427
1428 int i, n, num_mem, best_len, pop_level;
1429 LengthType tlen, tlen2;
1430 MemNumType mem;
1431 RelAddrType addr;
1432 OnigOptionType option = reg->options;
1433 OnigEncoding encode = reg->enc;
1434 OnigAmbigType ambig_flag = reg->ambig_flag;
1435 UChar *s, *q, *sbegin;
1436 UChar *p = reg->p;
1437 char *alloca_base;
1438 StackType *stk_alloc, *stk_base, *stk, *stk_end;
1439 StackType *stkp; /* used as any purpose. */
1440 StackIndex si;
1441 StackIndex *repeat_stk;
1442 StackIndex *mem_start_stk, *mem_end_stk;
1443 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1444 int scv;
1445 unsigned char* state_check_buff = msa->state_check_buff;
1446 int num_comb_exp_check = reg->num_comb_exp_check;
1447 #endif
1448 n = reg->num_repeat + reg->num_mem * 2;
1449
1450 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1451 pop_level = reg->stack_pop_level;
1452 num_mem = reg->num_mem;
1453 repeat_stk = (StackIndex* )alloca_base;
1454
1455 mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
1456 mem_end_stk = mem_start_stk + num_mem;
1457 mem_start_stk--; /* for index start from 1,
1458 mem_start_stk[1]..mem_start_stk[num_mem] */
1459 mem_end_stk--; /* for index start from 1,
1460 mem_end_stk[1]..mem_end_stk[num_mem] */
1461 for (i = 1; i <= num_mem; i++) {
1462 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1463 }
1464
1465 #ifdef ONIG_DEBUG_MATCH
1466 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1467 (int )str, (int )end, (int )sstart, (int )sprev);
1468 fprintf(stderr, "size: %d, start offset: %d\n",
1469 (int )(end - str), (int )(sstart - str));
1470 #endif
1471
1472 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1473 best_len = ONIG_MISMATCH;
1474 s = (UChar* )sstart;
1475 while (1) {
1476 #ifdef ONIG_DEBUG_MATCH
1477 {
1478 UChar *q, *bp, buf[50];
1479 int len;
1480 fprintf(stderr, "%4d> \"", (int )(s - str));
1481 bp = buf;
1482 for (i = 0, q = s; i < 7 && q < end; i++) {
1483 len = enc_len(encode, q);
1484 while (len-- > 0) *bp++ = *q++;
1485 }
1486 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1487 else { xmemcpy(bp, "\"", 1); bp += 1; }
1488 *bp = 0;
1489 fputs(buf, stderr);
1490 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1491 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1492 fprintf(stderr, "\n");
1493 }
1494 #endif
1495
1496 sbegin = s;
1497 switch (*p++) {
1498 case OP_END: STAT_OP_IN(OP_END);
1499 n = s - sstart;
1500 if (n > best_len) {
1501 OnigRegion* region;
1502 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1503 if (IS_FIND_LONGEST(option)) {
1504 if (n > msa->best_len) {
1505 msa->best_len = n;
1506 msa->best_s = (UChar* )sstart;
1507 }
1508 else
1509 goto end_best_len;
1510 }
1511 #endif
1512 best_len = n;
1513 region = msa->region;
1514 if (region) {
1515 #ifdef USE_POSIX_REGION_OPTION
1516 if (IS_POSIX_REGION(msa->options)) {
1517 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1518
1519 rmt[0].rm_so = sstart - str;
1520 rmt[0].rm_eo = s - str;
1521 for (i = 1; i <= num_mem; i++) {
1522 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1523 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1524 rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1525 else
1526 rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1527
1528 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1529 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1530 : (UChar* )((void* )mem_end_stk[i])) - str;
1531 }
1532 else {
1533 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1534 }
1535 }
1536 }
1537 else {
1538 #endif /* USE_POSIX_REGION_OPTION */
1539 region->beg[0] = sstart - str;
1540 region->end[0] = s - str;
1541 for (i = 1; i <= num_mem; i++) {
1542 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1543 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1544 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1545 else
1546 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1547
1548 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1549 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1550 : (UChar* )((void* )mem_end_stk[i])) - str;
1551 }
1552 else {
1553 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1554 }
1555 }
1556
1557 #ifdef USE_CAPTURE_HISTORY
1558 if (reg->capture_history != 0) {
1559 int r;
1560 OnigCaptureTreeNode* node;
1561
1562 if (IS_NULL(region->history_root)) {
1563 region->history_root = node = history_node_new();
1564 CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
1565 }
1566 else {
1567 node = region->history_root;
1568 history_tree_clear(node);
1569 }
1570
1571 node->group = 0;
1572 node->beg = sstart - str;
1573 node->end = s - str;
1574
1575 stkp = stk_base;
1576 r = make_capture_history_tree(region->history_root, &stkp,
1577 stk, (UChar* )str, reg);
1578 if (r < 0) {
1579 best_len = r; /* error code */
1580 goto finish;
1581 }
1582 }
1583 #endif /* USE_CAPTURE_HISTORY */
1584 #ifdef USE_POSIX_REGION_OPTION
1585 } /* else IS_POSIX_REGION() */
1586 #endif
1587 } /* if (region) */
1588 } /* n > best_len */
1589
1590 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1591 end_best_len:
1592 #endif
1593 STAT_OP_OUT;
1594
1595 if (IS_FIND_CONDITION(option)) {
1596 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1597 best_len = ONIG_MISMATCH;
1598 goto fail; /* for retry */
1599 }
1600 if (IS_FIND_LONGEST(option) && s < end) {
1601 goto fail; /* for retry */
1602 }
1603 }
1604
1605 /* default behavior: return first-matching result. */
1606 goto finish;
1607 break;
1608
1609 case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
1610 #if 0
1611 DATA_ENSURE(1);
1612 if (*p != *s) goto fail;
1613 p++; s++;
1614 #endif
1615 if (*p != *s++) goto fail;
1616 DATA_ENSURE(0);
1617 p++;
1618 STAT_OP_OUT;
1619 break;
1620
1621 case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
1622 {
1623 int len;
1624 UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
1625
1626 DATA_ENSURE(1);
1627 ss = s;
1628 sp = p;
1629
1630 len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
1631 DATA_ENSURE(0);
1632 q = lowbuf;
1633 while (len-- > 0) {
1634 if (*p != *q) {
1635 goto fail;
1636 }
1637 p++; q++;
1638 }
1639 }
1640 STAT_OP_OUT;
1641 break;
1642
1643 case OP_EXACT2: STAT_OP_IN(OP_EXACT2);
1644 DATA_ENSURE(2);
1645 if (*p != *s) goto fail;
1646 p++; s++;
1647 if (*p != *s) goto fail;
1648 sprev = s;
1649 p++; s++;
1650 STAT_OP_OUT;
1651 continue;
1652 break;
1653
1654 case OP_EXACT3: STAT_OP_IN(OP_EXACT3);
1655 DATA_ENSURE(3);
1656 if (*p != *s) goto fail;
1657 p++; s++;
1658 if (*p != *s) goto fail;
1659 p++; s++;
1660 if (*p != *s) goto fail;
1661 sprev = s;
1662 p++; s++;
1663 STAT_OP_OUT;
1664 continue;
1665 break;
1666
1667 case OP_EXACT4: STAT_OP_IN(OP_EXACT4);
1668 DATA_ENSURE(4);
1669 if (*p != *s) goto fail;
1670 p++; s++;
1671 if (*p != *s) goto fail;
1672 p++; s++;
1673 if (*p != *s) goto fail;
1674 p++; s++;
1675 if (*p != *s) goto fail;
1676 sprev = s;
1677 p++; s++;
1678 STAT_OP_OUT;
1679 continue;
1680 break;
1681
1682 case OP_EXACT5: STAT_OP_IN(OP_EXACT5);
1683 DATA_ENSURE(5);
1684 if (*p != *s) goto fail;
1685 p++; s++;
1686 if (*p != *s) goto fail;
1687 p++; s++;
1688 if (*p != *s) goto fail;
1689 p++; s++;
1690 if (*p != *s) goto fail;
1691 p++; s++;
1692 if (*p != *s) goto fail;
1693 sprev = s;
1694 p++; s++;
1695 STAT_OP_OUT;
1696 continue;
1697 break;
1698
1699 case OP_EXACTN: STAT_OP_IN(OP_EXACTN);
1700 GET_LENGTH_INC(tlen, p);
1701 DATA_ENSURE(tlen);
1702 while (tlen-- > 0) {
1703 if (*p++ != *s++) goto fail;
1704 }
1705 sprev = s - 1;
1706 STAT_OP_OUT;
1707 continue;
1708 break;
1709
1710 case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
1711 {
1712 int len;
1713 UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
1714
1715 GET_LENGTH_INC(tlen, p);
1716 endp = p + tlen;
1717
1718 while (p < endp) {
1719 sprev = s;
1720 DATA_ENSURE(1);
1721 ss = s;
1722 sp = p;
1723
1724 len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
1725 DATA_ENSURE(0);
1726 q = lowbuf;
1727 while (len-- > 0) {
1728 if (*p != *q) {
1729 goto fail;
1730 }
1731 p++; q++;
1732 }
1733 }
1734 }
1735
1736 STAT_OP_OUT;
1737 continue;
1738 break;
1739
1740 case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1);
1741 DATA_ENSURE(2);
1742 if (*p != *s) goto fail;
1743 p++; s++;
1744 if (*p != *s) goto fail;
1745 p++; s++;
1746 STAT_OP_OUT;
1747 break;
1748
1749 case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2);
1750 DATA_ENSURE(4);
1751 if (*p != *s) goto fail;
1752 p++; s++;
1753 if (*p != *s) goto fail;
1754 p++; s++;
1755 sprev = s;
1756 if (*p != *s) goto fail;
1757 p++; s++;
1758 if (*p != *s) goto fail;
1759 p++; s++;
1760 STAT_OP_OUT;
1761 continue;
1762 break;
1763
1764 case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3);
1765 DATA_ENSURE(6);
1766 if (*p != *s) goto fail;
1767 p++; s++;
1768 if (*p != *s) goto fail;
1769 p++; s++;
1770 if (*p != *s) goto fail;
1771 p++; s++;
1772 if (*p != *s) goto fail;
1773 p++; s++;
1774 sprev = s;
1775 if (*p != *s) goto fail;
1776 p++; s++;
1777 if (*p != *s) goto fail;
1778 p++; s++;
1779 STAT_OP_OUT;
1780 continue;
1781 break;
1782
1783 case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N);
1784 GET_LENGTH_INC(tlen, p);
1785 DATA_ENSURE(tlen * 2);
1786 while (tlen-- > 0) {
1787 if (*p != *s) goto fail;
1788 p++; s++;
1789 if (*p != *s) goto fail;
1790 p++; s++;
1791 }
1792 sprev = s - 2;
1793 STAT_OP_OUT;
1794 continue;
1795 break;
1796
1797 case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N);
1798 GET_LENGTH_INC(tlen, p);
1799 DATA_ENSURE(tlen * 3);
1800 while (tlen-- > 0) {
1801 if (*p != *s) goto fail;
1802 p++; s++;
1803 if (*p != *s) goto fail;
1804 p++; s++;
1805 if (*p != *s) goto fail;
1806 p++; s++;
1807 }
1808 sprev = s - 3;
1809 STAT_OP_OUT;
1810 continue;
1811 break;
1812
1813 case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN);
1814 GET_LENGTH_INC(tlen, p); /* mb-len */
1815 GET_LENGTH_INC(tlen2, p); /* string len */
1816 tlen2 *= tlen;
1817 DATA_ENSURE(tlen2);
1818 while (tlen2-- > 0) {
1819 if (*p != *s) goto fail;
1820 p++; s++;
1821 }
1822 sprev = s - tlen;
1823 STAT_OP_OUT;
1824 continue;
1825 break;
1826
1827 case OP_CCLASS: STAT_OP_IN(OP_CCLASS);
1828 DATA_ENSURE(1);
1829 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1830 p += SIZE_BITSET;
1831 s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
1832 STAT_OP_OUT;
1833 break;
1834
1835 case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
1836 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1837
1838 cclass_mb:
1839 GET_LENGTH_INC(tlen, p);
1840 {
1841 OnigCodePoint code;
1842 UChar *ss;
1843 int mb_len;
1844
1845 DATA_ENSURE(1);
1846 mb_len = enc_len(encode, s);
1847 DATA_ENSURE(mb_len);
1848 ss = s;
1849 s += mb_len;
1850 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1851
1852 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1853 if (! onig_is_in_code_range(p, code)) goto fail;
1854 #else
1855 q = p;
1856 ALIGNMENT_RIGHT(q);
1857 if (! onig_is_in_code_range(q, code)) goto fail;
1858 #endif
1859 }
1860 p += tlen;
1861 STAT_OP_OUT;
1862 break;
1863
1864 case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
1865 DATA_ENSURE(1);
1866 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1867 p += SIZE_BITSET;
1868 goto cclass_mb;
1869 }
1870 else {
1871 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1872 goto fail;
1873
1874 p += SIZE_BITSET;
1875 GET_LENGTH_INC(tlen, p);
1876 p += tlen;
1877 s++;
1878 }
1879 STAT_OP_OUT;
1880 break;
1881
1882 case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT);
1883 DATA_ENSURE(1);
1884 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1885 p += SIZE_BITSET;
1886 s += enc_len(encode, s);
1887 STAT_OP_OUT;
1888 break;
1889
1890 case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
1891 DATA_ENSURE(1);
1892 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1893 s++;
1894 GET_LENGTH_INC(tlen, p);
1895 p += tlen;
1896 goto cc_mb_not_success;
1897 }
1898
1899 cclass_mb_not:
1900 GET_LENGTH_INC(tlen, p);
1901 {
1902 OnigCodePoint code;
1903 UChar *ss;
1904 int mb_len = enc_len(encode, s);
1905
1906 if (s + mb_len > end) {
1907 DATA_ENSURE(1);
1908 s = (UChar* )end;
1909 p += tlen;
1910 goto cc_mb_not_success;
1911 }
1912
1913 ss = s;
1914 s += mb_len;
1915 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1916
1917 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1918 if (onig_is_in_code_range(p, code)) goto fail;
1919 #else
1920 q = p;
1921 ALIGNMENT_RIGHT(q);
1922 if (onig_is_in_code_range(q, code)) goto fail;
1923 #endif
1924 }
1925 p += tlen;
1926
1927 cc_mb_not_success:
1928 STAT_OP_OUT;
1929 break;
1930
1931 case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
1932 DATA_ENSURE(1);
1933 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1934 p += SIZE_BITSET;
1935 goto cclass_mb_not;
1936 }
1937 else {
1938 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1939 goto fail;
1940
1941 p += SIZE_BITSET;
1942 GET_LENGTH_INC(tlen, p);
1943 p += tlen;
1944 s++;
1945 }
1946 STAT_OP_OUT;
1947 break;
1948
1949 case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
1950 {
1951 OnigCodePoint code;
1952 void *node;
1953 int mb_len;
1954 UChar *ss;
1955
1956 DATA_ENSURE(1);
1957 GET_POINTER_INC(node, p);
1958 mb_len = enc_len(encode, s);
1959 ss = s;
1960 s += mb_len;
1961 DATA_ENSURE(0);
1962 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1963 if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
1964 }
1965 STAT_OP_OUT;
1966 break;
1967
1968 case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
1969 DATA_ENSURE(1);
1970 n = enc_len(encode, s);
1971 DATA_ENSURE(n);
1972 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1973 s += n;
1974 STAT_OP_OUT;
1975 break;
1976
1977 case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
1978 DATA_ENSURE(1);
1979 n = enc_len(encode, s);
1980 DATA_ENSURE(n);
1981 s += n;
1982 STAT_OP_OUT;
1983 break;
1984
1985 case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
1986 while (s < end) {
1987 STACK_PUSH_ALT(p, s, sprev);
1988 n = enc_len(encode, s);
1989 DATA_ENSURE(n);
1990 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1991 sprev = s;
1992 s += n;
1993 }
1994 STAT_OP_OUT;
1995 break;
1996
1997 case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
1998 while (s < end) {
1999 STACK_PUSH_ALT(p, s, sprev);
2000 n = enc_len(encode, s);
2001 if (n > 1) {
2002 DATA_ENSURE(n);
2003 sprev = s;
2004 s += n;
2005 }
2006 else {
2007 sprev = s;
2008 s++;
2009 }
2010 }
2011 STAT_OP_OUT;
2012 break;
2013
2014 case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2015 while (s < end) {
2016 if (*p == *s) {
2017 STACK_PUSH_ALT(p + 1, s, sprev);
2018 }
2019 n = enc_len(encode, s);
2020 DATA_ENSURE(n);
2021 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
2022 sprev = s;
2023 s += n;
2024 }
2025 p++;
2026 STAT_OP_OUT;
2027 break;
2028
2029 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2030 while (s < end) {
2031 if (*p == *s) {
2032 STACK_PUSH_ALT(p + 1, s, sprev);
2033 }
2034 n = enc_len(encode, s);
2035 if (n >1) {
2036 DATA_ENSURE(n);
2037 sprev = s;
2038 s += n;
2039 }
2040 else {
2041 sprev = s;
2042 s++;
2043 }
2044 }
2045 p++;
2046 STAT_OP_OUT;
2047 break;
2048
2049 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2050 case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2051 GET_STATE_CHECK_NUM_INC(mem, p);
2052 while (s < end) {
2053 STATE_CHECK_VAL(scv, mem);
2054 if (scv) goto fail;
2055
2056 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
2057 n = enc_len(encode, s);
2058 DATA_ENSURE(n);
2059 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
2060 sprev = s;
2061 s += n;
2062 }
2063 STAT_OP_OUT;
2064 break;
2065
2066 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
2067 STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2068
2069 GET_STATE_CHECK_NUM_INC(mem, p);
2070 while (s < end) {
2071 STATE_CHECK_VAL(scv, mem);
2072 if (scv) goto fail;
2073
2074 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
2075 n = enc_len(encode, s);
2076 if (n > 1) {
2077 DATA_ENSURE(n);
2078 sprev = s;
2079 s += n;
2080 }
2081 else {
2082 sprev = s;
2083 s++;
2084 }
2085 }
2086 STAT_OP_OUT;
2087 break;
2088 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2089
2090 case OP_WORD: STAT_OP_IN(OP_WORD);
2091 DATA_ENSURE(1);
2092 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2093 goto fail;
2094
2095 s += enc_len(encode, s);
2096 STAT_OP_OUT;
2097 break;
2098
2099 case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD);
2100 DATA_ENSURE(1);
2101 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2102 goto fail;
2103
2104 s += enc_len(encode, s);
2105 STAT_OP_OUT;
2106 break;
2107
2108 case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND);
2109 if (ON_STR_BEGIN(s)) {
2110 DATA_ENSURE(1);
2111 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2112 goto fail;
2113 }
2114 else if (ON_STR_END(s)) {
2115 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2116 goto fail;
2117 }
2118 else {
2119 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2120 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2121 goto fail;
2122 }
2123 STAT_OP_OUT;
2124 continue;
2125 break;
2126
2127 case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
2128 if (ON_STR_BEGIN(s)) {
2129 if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
2130 goto fail;
2131 }
2132 else if (ON_STR_END(s)) {
2133 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2134 goto fail;
2135 }
2136 else {
2137 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2138 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2139 goto fail;
2140 }
2141 STAT_OP_OUT;
2142 continue;
2143 break;
2144
2145 #ifdef USE_WORD_BEGIN_END
2146 case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
2147 if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2148 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2149 STAT_OP_OUT;
2150 continue;
2151 }
2152 }
2153 goto fail;
2154 break;
2155
2156 case OP_WORD_END: STAT_OP_IN(OP_WORD_END);
2157 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2158 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2159 STAT_OP_OUT;
2160 continue;
2161 }
2162 }
2163 goto fail;
2164 break;
2165 #endif
2166
2167 case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF);
2168 if (! ON_STR_BEGIN(s)) goto fail;
2169
2170 STAT_OP_OUT;
2171 continue;
2172 break;
2173
2174 case OP_END_BUF: STAT_OP_IN(OP_END_BUF);
2175 if (! ON_STR_END(s)) goto fail;
2176
2177 STAT_OP_OUT;
2178 continue;
2179 break;
2180
2181 case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE);
2182 if (ON_STR_BEGIN(s)) {
2183 if (IS_NOTBOL(msa->options)) goto fail;
2184 STAT_OP_OUT;
2185 continue;
2186 }
2187 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2188 STAT_OP_OUT;
2189 continue;
2190 }
2191 goto fail;
2192 break;
2193
2194 case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
2195 if (ON_STR_END(s)) {
2196 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2197 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2198 #endif
2199 if (IS_NOTEOL(msa->options)) goto fail;
2200 STAT_OP_OUT;
2201 continue;
2202 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2203 }
2204 #endif
2205 }
2206 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2207 STAT_OP_OUT;
2208 continue;
2209 }
2210 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2211 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2212 STAT_OP_OUT;
2213 continue;
2214 }
2215 #endif
2216 goto fail;
2217 break;
2218
2219 case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
2220 if (ON_STR_END(s)) {
2221 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2222 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2223 #endif
2224 if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
2225 STAT_OP_OUT;
2226 continue;
2227 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2228 }
2229 #endif
2230 }
2231 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2232 ON_STR_END(s + enc_len(encode, s))) {
2233 STAT_OP_OUT;
2234 continue;
2235 }
2236 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2237 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2238 UChar* ss = s + enc_len(encode, s);
2239 if (ON_STR_END(ss + enc_len(encode, ss))) {
2240 STAT_OP_OUT;
2241 continue;
2242 }
2243 }
2244 #endif
2245 goto fail;
2246 break;
2247
2248 case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION);
2249 if (s != msa->start)
2250 goto fail;
2251
2252 STAT_OP_OUT;
2253 continue;
2254 break;
2255
2256 case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH);
2257 GET_MEMNUM_INC(mem, p);
2258 STACK_PUSH_MEM_START(mem, s);
2259 STAT_OP_OUT;
2260 continue;
2261 break;
2262
2263 case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START);
2264 GET_MEMNUM_INC(mem, p);
2265 mem_start_stk[mem] = (StackIndex )((void* )s);
2266 STAT_OP_OUT;
2267 continue;
2268 break;
2269
2270 case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH);
2271 GET_MEMNUM_INC(mem, p);
2272 STACK_PUSH_MEM_END(mem, s);
2273 STAT_OP_OUT;
2274 continue;
2275 break;
2276
2277 case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END);
2278 GET_MEMNUM_INC(mem, p);
2279 mem_end_stk[mem] = (StackIndex )((void* )s);
2280 STAT_OP_OUT;
2281 continue;
2282 break;
2283
2284 #ifdef USE_SUBEXP_CALL
2285 case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
2286 GET_MEMNUM_INC(mem, p);
2287 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2288 STACK_PUSH_MEM_END(mem, s);
2289 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2290 STAT_OP_OUT;
2291 continue;
2292 break;
2293
2294 case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC);
2295 GET_MEMNUM_INC(mem, p);
2296 mem_end_stk[mem] = (StackIndex )((void* )s);
2297 STACK_GET_MEM_START(mem, stkp);
2298
2299 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2300 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2301 else
2302 mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
2303
2304 STACK_PUSH_MEM_END_MARK(mem);
2305 STAT_OP_OUT;
2306 continue;
2307 break;
2308 #endif
2309
2310 case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1);
2311 mem = 1;
2312 goto backref;
2313 break;
2314
2315 case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2);
2316 mem = 2;
2317 goto backref;
2318 break;
2319
2320 case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
2321 GET_MEMNUM_INC(mem, p);
2322 backref:
2323 {
2324 int len;
2325 UChar *pstart, *pend;
2326
2327 /* if you want to remove following line,
2328 you should check in parse and compile time. */
2329 if (mem > num_mem) goto fail;
2330 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2331 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2332
2333 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2334 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2335 else
2336 pstart = (UChar* )((void* )mem_start_stk[mem]);
2337
2338 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2339 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2340 : (UChar* )((void* )mem_end_stk[mem]));
2341 n = pend - pstart;
2342 DATA_ENSURE(n);
2343 sprev = s;
2344 STRING_CMP(pstart, s, n);
2345 while (sprev + (len = enc_len(encode, sprev)) < s)
2346 sprev += len;
2347
2348 STAT_OP_OUT;
2349 continue;
2350 }
2351 break;
2352
2353 case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC);
2354 GET_MEMNUM_INC(mem, p);
2355 {
2356 int len;
2357 UChar *pstart, *pend;
2358
2359 /* if you want to remove following line,
2360 you should check in parse and compile time. */
2361 if (mem > num_mem) goto fail;
2362 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2363 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2364
2365 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2366 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2367 else
2368 pstart = (UChar* )((void* )mem_start_stk[mem]);
2369
2370 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2371 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2372 : (UChar* )((void* )mem_end_stk[mem]));
2373 n = pend - pstart;
2374 DATA_ENSURE(n);
2375 sprev = s;
2376 STRING_CMP_IC(ambig_flag, pstart, &s, n);
2377 while (sprev + (len = enc_len(encode, sprev)) < s)
2378 sprev += len;
2379
2380 STAT_OP_OUT;
2381 continue;
2382 }
2383 break;
2384
2385 case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI);
2386 {
2387 int len, is_fail;
2388 UChar *pstart, *pend, *swork;
2389
2390 GET_LENGTH_INC(tlen, p);
2391 for (i = 0; i < tlen; i++) {
2392 GET_MEMNUM_INC(mem, p);
2393
2394 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2395 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2396
2397 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2398 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2399 else
2400 pstart = (UChar* )((void* )mem_start_stk[mem]);
2401
2402 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2403 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2404 : (UChar* )((void* )mem_end_stk[mem]));
2405 n = pend - pstart;
2406 DATA_ENSURE(n);
2407 sprev = s;
2408 swork = s;
2409 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2410 if (is_fail) continue;
2411 s = swork;
2412 while (sprev + (len = enc_len(encode, sprev)) < s)
2413 sprev += len;
2414
2415 p += (SIZE_MEMNUM * (tlen - i - 1));
2416 break; /* success */
2417 }
2418 if (i == tlen) goto fail;
2419 STAT_OP_OUT;
2420 continue;
2421 }
2422 break;
2423
2424 case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC);
2425 {
2426 int len, is_fail;
2427 UChar *pstart, *pend, *swork;
2428
2429 GET_LENGTH_INC(tlen, p);
2430 for (i = 0; i < tlen; i++) {
2431 GET_MEMNUM_INC(mem, p);
2432
2433 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2434 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2435
2436 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2437 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2438 else
2439 pstart = (UChar* )((void* )mem_start_stk[mem]);
2440
2441 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2442 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2443 : (UChar* )((void* )mem_end_stk[mem]));
2444 n = pend - pstart;
2445 DATA_ENSURE(n);
2446 sprev = s;
2447 swork = s;
2448 STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
2449 if (is_fail) continue;
2450 s = swork;
2451 while (sprev + (len = enc_len(encode, sprev)) < s)
2452 sprev += len;
2453
2454 p += (SIZE_MEMNUM * (tlen - i - 1));
2455 break; /* success */
2456 }
2457 if (i == tlen) goto fail;
2458 STAT_OP_OUT;
2459 continue;
2460 }
2461 break;
2462
2463 #ifdef USE_BACKREF_AT_LEVEL
2464 case OP_BACKREF_AT_LEVEL:
2465 {
2466 int len;
2467 OnigOptionType ic;
2468 LengthType level;
2469
2470 GET_OPTION_INC(ic, p);
2471 GET_LENGTH_INC(level, p);
2472 GET_LENGTH_INC(tlen, p);
2473
2474 sprev = s;
2475 if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
2476 , (int )level, (int )tlen, p, &s, end)) {
2477 while (sprev + (len = enc_len(encode, sprev)) < s)
2478 sprev += len;
2479
2480 p += (SIZE_MEMNUM * tlen);
2481 }
2482 else
2483 goto fail;
2484
2485 STAT_OP_OUT;
2486 continue;
2487 }
2488
2489 break;
2490 #endif
2491
2492 case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
2493 GET_OPTION_INC(option, p);
2494 STACK_PUSH_ALT(p, s, sprev);
2495 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2496 STAT_OP_OUT;
2497 continue;
2498 break;
2499
2500 case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION);
2501 GET_OPTION_INC(option, p);
2502 STAT_OP_OUT;
2503 continue;
2504 break;
2505
2506 case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START);
2507 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2508 STACK_PUSH_NULL_CHECK_START(mem, s);
2509 STAT_OP_OUT;
2510 continue;
2511 break;
2512
2513 case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END);
2514 {
2515 int isnull;
2516
2517 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2518 STACK_NULL_CHECK(isnull, mem, s);
2519 if (isnull) {
2520 #ifdef ONIG_DEBUG_MATCH
2521 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2522 (int )mem, (int )s);
2523 #endif
2524 null_check_found:
2525 /* empty loop founded, skip next instruction */
2526 switch (*p++) {
2527 case OP_JUMP:
2528 case OP_PUSH:
2529 p += SIZE_RELADDR;
2530 break;
2531 case OP_REPEAT_INC:
2532 case OP_REPEAT_INC_NG:
2533 case OP_REPEAT_INC_SG:
2534 case OP_REPEAT_INC_NG_SG:
2535 p += SIZE_MEMNUM;
2536 break;
2537 default:
2538 goto unexpected_bytecode_error;
2539 break;
2540 }
2541 }
2542 }
2543 STAT_OP_OUT;
2544 continue;
2545 break;
2546
2547 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
2548 case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
2549 {
2550 int isnull;
2551
2552 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2553 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2554 if (isnull) {
2555 #ifdef ONIG_DEBUG_MATCH
2556 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2557 (int )mem, (int )s);
2558 #endif
2559 if (isnull == -1) goto fail;
2560 goto null_check_found;
2561 }
2562 }
2563 STAT_OP_OUT;
2564 continue;
2565 break;
2566 #endif
2567
2568 #ifdef USE_SUBEXP_CALL
2569 case OP_NULL_CHECK_END_MEMST_PUSH:
2570 STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2571 {
2572 int isnull;
2573
2574 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2575 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
2576 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2577 #else
2578 STACK_NULL_CHECK_REC(isnull, mem, s);
2579 #endif
2580 if (isnull) {
2581 #ifdef ONIG_DEBUG_MATCH
2582 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2583 (int )mem, (int )s);
2584 #endif
2585 if (isnull == -1) goto fail;
2586 goto null_check_found;
2587 }
2588 else {
2589 STACK_PUSH_NULL_CHECK_END(mem);
2590 }
2591 }
2592 STAT_OP_OUT;
2593 continue;
2594 break;
2595 #endif
2596
2597 case OP_JUMP: STAT_OP_IN(OP_JUMP);
2598 GET_RELADDR_INC(addr, p);
2599 p += addr;
2600 STAT_OP_OUT;
2601 CHECK_INTERRUPT_IN_MATCH_AT;
2602 continue;
2603 break;
2604
2605 case OP_PUSH: STAT_OP_IN(OP_PUSH);
2606 GET_RELADDR_INC(addr, p);
2607 STACK_PUSH_ALT(p + addr, s, sprev);
2608 STAT_OP_OUT;
2609 continue;
2610 break;
2611
2612 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2613 case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
2614 GET_STATE_CHECK_NUM_INC(mem, p);
2615 STATE_CHECK_VAL(scv, mem);
2616 if (scv) goto fail;
2617
2618 GET_RELADDR_INC(addr, p);
2619 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2620 STAT_OP_OUT;
2621 continue;
2622 break;
2623
2624 case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2625 GET_STATE_CHECK_NUM_INC(mem, p);
2626 GET_RELADDR_INC(addr, p);
2627 STATE_CHECK_VAL(scv, mem);
2628 if (scv) {
2629 p += addr;
2630 }
2631 else {
2632 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2633 }
2634 STAT_OP_OUT;
2635 continue;
2636 break;
2637
2638 case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
2639 GET_STATE_CHECK_NUM_INC(mem, p);
2640 STATE_CHECK_VAL(scv, mem);
2641 if (scv) goto fail;
2642
2643 STACK_PUSH_STATE_CHECK(s, mem);
2644 STAT_OP_OUT;
2645 continue;
2646 break;
2647 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2648
2649 case OP_POP: STAT_OP_IN(OP_POP);
2650 STACK_POP_ONE;
2651 STAT_OP_OUT;
2652 continue;
2653 break;
2654
2655 case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
2656 GET_RELADDR_INC(addr, p);
2657 if (*p == *s && DATA_ENSURE_CHECK(1)) {
2658 p++;
2659 STACK_PUSH_ALT(p + addr, s, sprev);
2660 STAT_OP_OUT;
2661 continue;
2662 }
2663 p += (addr + 1);
2664 STAT_OP_OUT;
2665 continue;
2666 break;
2667
2668 case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT);
2669 GET_RELADDR_INC(addr, p);
2670 if (*p == *s) {
2671 p++;
2672 STACK_PUSH_ALT(p + addr, s, sprev);
2673 STAT_OP_OUT;
2674 continue;
2675 }
2676 p++;
2677 STAT_OP_OUT;
2678 continue;
2679 break;
2680
2681 case OP_REPEAT: STAT_OP_IN(OP_REPEAT);
2682 {
2683 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2684 GET_RELADDR_INC(addr, p);
2685
2686 STACK_ENSURE(1);
2687 repeat_stk[mem] = GET_STACK_INDEX(stk);
2688 STACK_PUSH_REPEAT(mem, p);
2689
2690 if (reg->repeat_range[mem].lower == 0) {
2691 STACK_PUSH_ALT(p + addr, s, sprev);
2692 }
2693 }
2694 STAT_OP_OUT;
2695 continue;
2696 break;
2697
2698 case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG);
2699 {
2700 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2701 GET_RELADDR_INC(addr, p);
2702
2703 STACK_ENSURE(1);
2704 repeat_stk[mem] = GET_STACK_INDEX(stk);
2705 STACK_PUSH_REPEAT(mem, p);
2706
2707 if (reg->repeat_range[mem].lower == 0) {
2708 STACK_PUSH_ALT(p, s, sprev);
2709 p += addr;
2710 }
2711 }
2712 STAT_OP_OUT;
2713 continue;
2714 break;
2715
2716 case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC);
2717 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2718 si = repeat_stk[mem];
2719 stkp = STACK_AT(si);
2720
2721 repeat_inc:
2722 stkp->u.repeat.count++;
2723 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2724 /* end of repeat. Nothing to do. */
2725 }
2726 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2727 STACK_PUSH_ALT(p, s, sprev);
2728 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2729 }
2730 else {
2731 p = stkp->u.repeat.pcode;
2732 }
2733 STACK_PUSH_REPEAT_INC(si);
2734 STAT_OP_OUT;
2735 CHECK_INTERRUPT_IN_MATCH_AT;
2736 continue;
2737 break;
2738
2739 case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG);
2740 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2741 STACK_GET_REPEAT(mem, stkp);
2742 si = GET_STACK_INDEX(stkp);
2743 goto repeat_inc;
2744 break;
2745
2746 case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
2747 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2748 si = repeat_stk[mem];
2749 stkp = STACK_AT(si);
2750
2751 repeat_inc_ng:
2752 stkp->u.repeat.count++;
2753 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2754 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2755 UChar* pcode = stkp->u.repeat.pcode;
2756
2757 STACK_PUSH_REPEAT_INC(si);
2758 STACK_PUSH_ALT(pcode, s, sprev);
2759 }
2760 else {
2761 p = stkp->u.repeat.pcode;
2762 STACK_PUSH_REPEAT_INC(si);
2763 }
2764 }
2765 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2766 STACK_PUSH_REPEAT_INC(si);
2767 }
2768 STAT_OP_OUT;
2769 CHECK_INTERRUPT_IN_MATCH_AT;
2770 continue;
2771 break;
2772
2773 case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG);
2774 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2775 STACK_GET_REPEAT(mem, stkp);
2776 si = GET_STACK_INDEX(stkp);
2777 goto repeat_inc_ng;
2778 break;
2779
2780 case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS);
2781 STACK_PUSH_POS(s, sprev);
2782 STAT_OP_OUT;
2783 continue;
2784 break;
2785
2786 case OP_POP_POS: STAT_OP_IN(OP_POP_POS);
2787 {
2788 STACK_POS_END(stkp);
2789 s = stkp->u.state.pstr;
2790 sprev = stkp->u.state.pstr_prev;
2791 }
2792 STAT_OP_OUT;
2793 continue;
2794 break;
2795
2796 case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT);
2797 GET_RELADDR_INC(addr, p);
2798 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2799 STAT_OP_OUT;
2800 continue;
2801 break;
2802
2803 case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS);
2804 STACK_POP_TIL_POS_NOT;
2805 goto fail;
2806 break;
2807
2808 case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT);
2809 STACK_PUSH_STOP_BT;
2810 STAT_OP_OUT;
2811 continue;
2812 break;
2813
2814 case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT);
2815 STACK_STOP_BT_END;
2816 STAT_OP_OUT;
2817 continue;
2818 break;
2819
2820 case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
2821 GET_LENGTH_INC(tlen, p);
2822 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2823 if (IS_NULL(s)) goto fail;
2824 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2825 STAT_OP_OUT;
2826 continue;
2827 break;
2828
2829 case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2830 GET_RELADDR_INC(addr, p);
2831 GET_LENGTH_INC(tlen, p);
2832 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2833 if (IS_NULL(q)) {
2834 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2835 If you want to change to fail, replace following line. */
2836 p += addr;
2837 /* goto fail; */
2838 }
2839 else {
2840 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2841 s = q;
2842 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2843 }
2844 STAT_OP_OUT;
2845 continue;
2846 break;
2847
2848 case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2849 STACK_POP_TIL_LOOK_BEHIND_NOT;
2850 goto fail;
2851 break;
2852
2853 #ifdef USE_SUBEXP_CALL
2854 case OP_CALL: STAT_OP_IN(OP_CALL);
2855 GET_ABSADDR_INC(addr, p);
2856 STACK_PUSH_CALL_FRAME(p);
2857 p = reg->p + addr;
2858 STAT_OP_OUT;
2859 continue;
2860 break;
2861
2862 case OP_RETURN: STAT_OP_IN(OP_RETURN);
2863 STACK_RETURN(p);
2864 STACK_PUSH_RETURN;
2865 STAT_OP_OUT;
2866 continue;
2867 break;
2868 #endif
2869
2870 case OP_FINISH:
2871 goto finish;
2872 break;
2873
2874 fail:
2875 STAT_OP_OUT;
2876 /* fall */
2877 case OP_FAIL: STAT_OP_IN(OP_FAIL);
2878 STACK_POP;
2879 p = stk->u.state.pcode;
2880 s = stk->u.state.pstr;
2881 sprev = stk->u.state.pstr_prev;
2882
2883 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2884 if (stk->u.state.state_check != 0) {
2885 stk->type = STK_STATE_CHECK_MARK;
2886 stk++;
2887 }
2888 #endif
2889
2890 STAT_OP_OUT;
2891 continue;
2892 break;
2893
2894 default:
2895 goto bytecode_error;
2896
2897 } /* end of switch */
2898 sprev = sbegin;
2899 } /* end of while(1) */
2900
2901 finish:
2902 STACK_SAVE;
2903 return best_len;
2904
2905 #ifdef ONIG_DEBUG
2906 stack_error:
2907 STACK_SAVE;
2908 return ONIGERR_STACK_BUG;
2909 #endif
2910
2911 bytecode_error:
2912 STACK_SAVE;
2913 return ONIGERR_UNDEFINED_BYTECODE;
2914
2915 unexpected_bytecode_error:
2916 STACK_SAVE;
2917 return ONIGERR_UNEXPECTED_BYTECODE;
2918 }
2919
2920
2921 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2922 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2923 const UChar* text, const UChar* text_end, UChar* text_range)
2924 {
2925 UChar *t, *p, *s, *end;
2926
2927 end = (UChar* )text_end;
2928 end -= target_end - target - 1;
2929 if (end > text_range)
2930 end = text_range;
2931
2932 s = (UChar* )text;
2933
2934 while (s < end) {
2935 if (*s == *target) {
2936 p = s + 1;
2937 t = target + 1;
2938 while (t < target_end) {
2939 if (*t != *p++)
2940 break;
2941 t++;
2942 }
2943 if (t == target_end)
2944 return s;
2945 }
2946 s += enc_len(enc, s);
2947 }
2948
2949 return (UChar* )NULL;
2950 }
2951
2952 static int
str_lower_case_match(OnigEncoding enc,int ambig_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)2953 str_lower_case_match(OnigEncoding enc, int ambig_flag,
2954 const UChar* t, const UChar* tend,
2955 const UChar* p, const UChar* end)
2956 {
2957 int lowlen;
2958 UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
2959 const UChar* tsave;
2960 const UChar* psave;
2961
2962 tsave = t;
2963 psave = p;
2964
2965 while (t < tend) {
2966 lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
2967 q = lowbuf;
2968 while (lowlen > 0) {
2969 if (*t++ != *q++) {
2970 return 0;
2971 }
2972 lowlen--;
2973 }
2974 }
2975
2976 return 1;
2977 }
2978
2979 static UChar*
slow_search_ic(OnigEncoding enc,int ambig_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2980 slow_search_ic(OnigEncoding enc, int ambig_flag,
2981 UChar* target, UChar* target_end,
2982 const UChar* text, const UChar* text_end, UChar* text_range)
2983 {
2984 UChar *s, *end;
2985
2986 end = (UChar* )text_end;
2987 end -= target_end - target - 1;
2988 if (end > text_range)
2989 end = text_range;
2990
2991 s = (UChar* )text;
2992
2993 while (s < end) {
2994 if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
2995 return s;
2996
2997 s += enc_len(enc, s);
2998 }
2999
3000 return (UChar* )NULL;
3001 }
3002
3003 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3004 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3005 const UChar* text, const UChar* adjust_text,
3006 const UChar* text_end, const UChar* text_start)
3007 {
3008 UChar *t, *p, *s;
3009
3010 s = (UChar* )text_end;
3011 s -= (target_end - target);
3012 if (s > text_start)
3013 s = (UChar* )text_start;
3014 else
3015 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
3016
3017 while (s >= text) {
3018 if (*s == *target) {
3019 p = s + 1;
3020 t = target + 1;
3021 while (t < target_end) {
3022 if (*t != *p++)
3023 break;
3024 t++;
3025 }
3026 if (t == target_end)
3027 return s;
3028 }
3029 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
3030 }
3031
3032 return (UChar* )NULL;
3033 }
3034
3035 static UChar*
slow_search_backward_ic(OnigEncoding enc,int ambig_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3036 slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
3037 UChar* target, UChar* target_end,
3038 const UChar* text, const UChar* adjust_text,
3039 const UChar* text_end, const UChar* text_start)
3040 {
3041 UChar *s;
3042
3043 s = (UChar* )text_end;
3044 s -= (target_end - target);
3045 if (s > text_start)
3046 s = (UChar* )text_start;
3047 else
3048 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
3049
3050 while (s >= text) {
3051 if (str_lower_case_match(enc, ambig_flag,
3052 target, target_end, s, text_end))
3053 return s;
3054
3055 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
3056 }
3057
3058 return (UChar* )NULL;
3059 }
3060
3061 static UChar*
bm_search_notrev(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)3062 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3063 const UChar* text, const UChar* text_end,
3064 const UChar* text_range)
3065 {
3066 const UChar *s, *se, *t, *p, *end;
3067 const UChar *tail;
3068 int skip, tlen1;
3069
3070 #ifdef ONIG_DEBUG_SEARCH
3071 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
3072 (int )text, (int )text_end, (int )text_range);
3073 #endif
3074
3075 tail = target_end - 1;
3076 tlen1 = tail - target;
3077 end = text_range;
3078 if (end + tlen1 > text_end)
3079 end = text_end - tlen1;
3080
3081 s = text;
3082
3083 if (IS_NULL(reg->int_map)) {
3084 while (s < end) {
3085 p = se = s + tlen1;
3086 t = tail;
3087 while (t >= target && *p == *t) {
3088 p--; t--;
3089 }
3090 if (t < target) return (UChar* )s;
3091
3092 skip = reg->map[*se];
3093 t = s;
3094 do {
3095 s += enc_len(reg->enc, s);
3096 } while ((s - t) < skip && s < end);
3097 }
3098 }
3099 else {
3100 while (s < end) {
3101 p = se = s + tlen1;
3102 t = tail;
3103 while (t >= target && *p == *t) {
3104 p--; t--;
3105 }
3106 if (t < target) return (UChar* )s;
3107
3108 skip = reg->int_map[*se];
3109 t = s;
3110 do {
3111 s += enc_len(reg->enc, s);
3112 } while ((s - t) < skip && s < end);
3113 }
3114 }
3115
3116 return (UChar* )NULL;
3117 }
3118
3119 static UChar*
bm_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)3120 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3121 const UChar* text, const UChar* text_end, const UChar* text_range)
3122 {
3123 const UChar *s, *t, *p, *end;
3124 const UChar *tail;
3125
3126 end = text_range + (target_end - target) - 1;
3127 if (end > text_end)
3128 end = text_end;
3129
3130 tail = target_end - 1;
3131 s = text + (target_end - target) - 1;
3132 if (IS_NULL(reg->int_map)) {
3133 while (s < end) {
3134 p = s;
3135 t = tail;
3136 while (t >= target && *p == *t) {
3137 p--; t--;
3138 }
3139 if (t < target) return (UChar* )(p + 1);
3140 s += reg->map[*s];
3141 }
3142 }
3143 else { /* see int_map[] */
3144 while (s < end) {
3145 p = s;
3146 t = tail;
3147 while (t >= target && *p == *t) {
3148 p--; t--;
3149 }
3150 if (t < target) return (UChar* )(p + 1);
3151 s += reg->int_map[*s];
3152 }
3153 }
3154 return (UChar* )NULL;
3155 }
3156
3157 static int
set_bm_backward_skip(UChar * s,UChar * end,OnigEncoding enc,int ** skip)3158 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
3159
3160 {
3161 int i, len;
3162
3163 if (IS_NULL(*skip)) {
3164 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3165 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3166 }
3167
3168 len = end - s;
3169 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3170 (*skip)[i] = len;
3171
3172 for (i = len - 1; i > 0; i--)
3173 (*skip)[s[i]] = i;
3174
3175 return 0;
3176 }
3177
3178 static UChar*
bm_search_backward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3179 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3180 const UChar* text, const UChar* adjust_text,
3181 const UChar* text_end, const UChar* text_start)
3182 {
3183 const UChar *s, *t, *p;
3184
3185 s = text_end - (target_end - target);
3186 if (text_start < s)
3187 s = text_start;
3188 else
3189 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3190
3191 while (s >= text) {
3192 p = s;
3193 t = target;
3194 while (t < target_end && *p == *t) {
3195 p++; t++;
3196 }
3197 if (t == target_end)
3198 return (UChar* )s;
3199
3200 s -= reg->int_map_backward[*s];
3201 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3202 }
3203
3204 return (UChar* )NULL;
3205 }
3206
3207 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)3208 map_search(OnigEncoding enc, UChar map[],
3209 const UChar* text, const UChar* text_range)
3210 {
3211 const UChar *s = text;
3212
3213 while (s < text_range) {
3214 if (map[*s]) return (UChar* )s;
3215
3216 s += enc_len(enc, s);
3217 }
3218 return (UChar* )NULL;
3219 }
3220
3221 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)3222 map_search_backward(OnigEncoding enc, UChar map[],
3223 const UChar* text, const UChar* adjust_text,
3224 const UChar* text_start)
3225 {
3226 const UChar *s = text_start;
3227
3228 while (s >= text) {
3229 if (map[*s]) return (UChar* )s;
3230
3231 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3232 }
3233 return (UChar* )NULL;
3234 }
3235
3236 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)3237 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3238 OnigOptionType option)
3239 {
3240 int r;
3241 UChar *prev;
3242 MatchArg msa;
3243
3244 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3245 start:
3246 THREAD_ATOMIC_START;
3247 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3248 ONIG_STATE_INC(reg);
3249 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3250 onig_chain_reduce(reg);
3251 ONIG_STATE_INC(reg);
3252 }
3253 }
3254 else {
3255 int n;
3256
3257 THREAD_ATOMIC_END;
3258 n = 0;
3259 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3260 if (++n > THREAD_PASS_LIMIT_COUNT)
3261 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3262 THREAD_PASS;
3263 }
3264 goto start;
3265 }
3266 THREAD_ATOMIC_END;
3267 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3268
3269 MATCH_ARG_INIT(msa, option, region, at);
3270 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3271 {
3272 int offset = at - str;
3273 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3274 }
3275 #endif
3276
3277 if (region
3278 #ifdef USE_POSIX_REGION_OPTION
3279 && !IS_POSIX_REGION(option)
3280 #endif
3281 ) {
3282 r = onig_region_resize_clear(region, reg->num_mem + 1);
3283 }
3284 else
3285 r = 0;
3286
3287 if (r == 0) {
3288 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3289 r = match_at(reg, str, end, at, prev, &msa);
3290 }
3291
3292 MATCH_ARG_FREE(msa);
3293 ONIG_STATE_DEC_THREAD(reg);
3294 return r;
3295 }
3296
3297 static int
forward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)3298 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3299 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3300 {
3301 UChar *p, *pprev = (UChar* )NULL;
3302
3303 #ifdef ONIG_DEBUG_SEARCH
3304 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3305 (int )str, (int )end, (int )s, (int )range);
3306 #endif
3307
3308 p = s;
3309 if (reg->dmin > 0) {
3310 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3311 p += reg->dmin;
3312 }
3313 else {
3314 UChar *q = p + reg->dmin;
3315 while (p < q) p += enc_len(reg->enc, p);
3316 }
3317 }
3318
3319 retry:
3320 switch (reg->optimize) {
3321 case ONIG_OPTIMIZE_EXACT:
3322 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3323 break;
3324 case ONIG_OPTIMIZE_EXACT_IC:
3325 p = slow_search_ic(reg->enc, reg->ambig_flag,
3326 reg->exact, reg->exact_end, p, end, range);
3327 break;
3328
3329 case ONIG_OPTIMIZE_EXACT_BM:
3330 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3331 break;
3332
3333 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3334 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3335 break;
3336
3337 case ONIG_OPTIMIZE_MAP:
3338 p = map_search(reg->enc, reg->map, p, range);
3339 break;
3340 }
3341
3342 if (p && p < range) {
3343 if (p - reg->dmin < s) {
3344 retry_gate:
3345 pprev = p;
3346 p += enc_len(reg->enc, p);
3347 goto retry;
3348 }
3349
3350 if (reg->sub_anchor) {
3351 UChar* prev;
3352
3353 switch (reg->sub_anchor) {
3354 case ANCHOR_BEGIN_LINE:
3355 if (!ON_STR_BEGIN(p)) {
3356 prev = onigenc_get_prev_char_head(reg->enc,
3357 (pprev ? pprev : str), p);
3358 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3359 goto retry_gate;
3360 }
3361 break;
3362
3363 case ANCHOR_END_LINE:
3364 if (ON_STR_END(p)) {
3365 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3366 (pprev ? pprev : str), p);
3367 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3368 goto retry_gate;
3369 }
3370 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3371 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3372 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3373 #endif
3374 )
3375 goto retry_gate;
3376 break;
3377 }
3378 }
3379
3380 if (reg->dmax == 0) {
3381 *low = p;
3382 if (low_prev) {
3383 if (*low > s)
3384 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3385 else
3386 *low_prev = onigenc_get_prev_char_head(reg->enc,
3387 (pprev ? pprev : str), p);
3388 }
3389 }
3390 else {
3391 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3392 *low = p - reg->dmax;
3393 if (*low > s) {
3394 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3395 *low, (const UChar** )low_prev);
3396 if (low_prev && IS_NULL(*low_prev))
3397 *low_prev = onigenc_get_prev_char_head(reg->enc,
3398 (pprev ? pprev : s), *low);
3399 }
3400 else {
3401 if (low_prev)
3402 *low_prev = onigenc_get_prev_char_head(reg->enc,
3403 (pprev ? pprev : str), *low);
3404 }
3405 }
3406 }
3407 /* no needs to adjust *high, *high is used as range check only */
3408 *high = p - reg->dmin;
3409
3410 #ifdef ONIG_DEBUG_SEARCH
3411 fprintf(stderr,
3412 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3413 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3414 #endif
3415 return 1; /* success */
3416 }
3417
3418 return 0; /* fail */
3419 }
3420
3421 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3422 int** skip));
3423
3424 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3425
3426 static int
backward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)3427 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3428 UChar* s, const UChar* range, UChar* adjrange,
3429 UChar** low, UChar** high)
3430 {
3431 int r;
3432 UChar *p;
3433
3434 range += reg->dmin;
3435 p = s;
3436
3437 retry:
3438 switch (reg->optimize) {
3439 case ONIG_OPTIMIZE_EXACT:
3440 exact_method:
3441 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3442 range, adjrange, end, p);
3443 break;
3444
3445 case ONIG_OPTIMIZE_EXACT_IC:
3446 p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
3447 reg->exact, reg->exact_end,
3448 range, adjrange, end, p);
3449 break;
3450
3451 case ONIG_OPTIMIZE_EXACT_BM:
3452 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3453 if (IS_NULL(reg->int_map_backward)) {
3454 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3455 goto exact_method;
3456
3457 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3458 &(reg->int_map_backward));
3459 if (r) return r;
3460 }
3461 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3462 end, p);
3463 break;
3464
3465 case ONIG_OPTIMIZE_MAP:
3466 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3467 break;
3468 }
3469
3470 if (p) {
3471 if (reg->sub_anchor) {
3472 UChar* prev;
3473
3474 switch (reg->sub_anchor) {
3475 case ANCHOR_BEGIN_LINE:
3476 if (!ON_STR_BEGIN(p)) {
3477 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3478 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3479 p = prev;
3480 goto retry;
3481 }
3482 }
3483 break;
3484
3485 case ANCHOR_END_LINE:
3486 if (ON_STR_END(p)) {
3487 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3488 if (IS_NULL(prev)) goto fail;
3489 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3490 p = prev;
3491 goto retry;
3492 }
3493 }
3494 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3495 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3496 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3497 #endif
3498 ) {
3499 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3500 if (IS_NULL(p)) goto fail;
3501 goto retry;
3502 }
3503 break;
3504 }
3505 }
3506
3507 /* no needs to adjust *high, *high is used as range check only */
3508 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3509 *low = p - reg->dmax;
3510 *high = p - reg->dmin;
3511 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3512 }
3513
3514 #ifdef ONIG_DEBUG_SEARCH
3515 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3516 (int )(*low - str), (int )(*high - str));
3517 #endif
3518 return 1; /* success */
3519 }
3520
3521 fail:
3522 #ifdef ONIG_DEBUG_SEARCH
3523 fprintf(stderr, "backward_search_range: fail.\n");
3524 #endif
3525 return 0; /* fail */
3526 }
3527
3528
3529 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)3530 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3531 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3532 {
3533 int r;
3534 UChar *s, *prev;
3535 MatchArg msa;
3536 const UChar *orig_start = start;
3537
3538 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3539 start:
3540 THREAD_ATOMIC_START;
3541 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3542 ONIG_STATE_INC(reg);
3543 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3544 onig_chain_reduce(reg);
3545 ONIG_STATE_INC(reg);
3546 }
3547 }
3548 else {
3549 int n;
3550
3551 THREAD_ATOMIC_END;
3552 n = 0;
3553 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3554 if (++n > THREAD_PASS_LIMIT_COUNT)
3555 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3556 THREAD_PASS;
3557 }
3558 goto start;
3559 }
3560 THREAD_ATOMIC_END;
3561 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3562
3563 #ifdef ONIG_DEBUG_SEARCH
3564 fprintf(stderr,
3565 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3566 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3567 #endif
3568
3569 if (region
3570 #ifdef USE_POSIX_REGION_OPTION
3571 && !IS_POSIX_REGION(option)
3572 #endif
3573 ) {
3574 r = onig_region_resize_clear(region, reg->num_mem + 1);
3575 if (r) goto finish_no_msa;
3576 }
3577
3578 if (start > end || start < str) goto mismatch_no_msa;
3579
3580 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3581 #define MATCH_AND_RETURN_CHECK \
3582 r = match_at(reg, str, end, s, prev, &msa);\
3583 if (r != ONIG_MISMATCH) {\
3584 if (r >= 0) {\
3585 if (! IS_FIND_LONGEST(reg->options)) {\
3586 goto match;\
3587 }\
3588 }\
3589 else goto finish; /* error */ \
3590 }
3591 #else
3592 #define MATCH_AND_RETURN_CHECK \
3593 r = match_at(reg, str, end, s, prev, &msa);\
3594 if (r != ONIG_MISMATCH) {\
3595 if (r >= 0) {\
3596 goto match;\
3597 }\
3598 else goto finish; /* error */ \
3599 }
3600 #endif
3601
3602 /* anchor optimize: resume search range */
3603 if (reg->anchor != 0 && str < end) {
3604 UChar *min_semi_end, *max_semi_end;
3605
3606 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3607 /* search start-position only */
3608 begin_position:
3609 if (range > start)
3610 range = start + 1;
3611 else
3612 range = start;
3613 }
3614 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3615 /* search str-position only */
3616 if (range > start) {
3617 if (start != str) goto mismatch_no_msa;
3618 range = str + 1;
3619 }
3620 else {
3621 if (range <= str) {
3622 start = str;
3623 range = str;
3624 }
3625 else
3626 goto mismatch_no_msa;
3627 }
3628 }
3629 else if (reg->anchor & ANCHOR_END_BUF) {
3630 min_semi_end = max_semi_end = (UChar* )end;
3631
3632 end_buf:
3633 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3634 goto mismatch_no_msa;
3635
3636 if (range > start) {
3637 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3638 start = min_semi_end - reg->anchor_dmax;
3639 if (start < end)
3640 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3641 else { /* match with empty at end */
3642 start = onigenc_get_prev_char_head(reg->enc, str, end);
3643 }
3644 }
3645 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3646 range = max_semi_end - reg->anchor_dmin + 1;
3647 }
3648
3649 if (start >= range) goto mismatch_no_msa;
3650 }
3651 else {
3652 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3653 range = min_semi_end - reg->anchor_dmax;
3654 }
3655 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3656 start = max_semi_end - reg->anchor_dmin;
3657 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3658 }
3659 if (range > start) goto mismatch_no_msa;
3660 }
3661 }
3662 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3663 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3664
3665 max_semi_end = (UChar* )end;
3666 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3667 min_semi_end = pre_end;
3668
3669 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3670 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3671 if (IS_NOT_NULL(pre_end) &&
3672 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3673 min_semi_end = pre_end;
3674 }
3675 #endif
3676 if (min_semi_end > str && start <= min_semi_end) {
3677 goto end_buf;
3678 }
3679 }
3680 else {
3681 min_semi_end = (UChar* )end;
3682 goto end_buf;
3683 }
3684 }
3685 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3686 goto begin_position;
3687 }
3688 }
3689 else if (str == end) { /* empty string */
3690 static const UChar* address_for_empty_string = (UChar* )"";
3691
3692 #ifdef ONIG_DEBUG_SEARCH
3693 fprintf(stderr, "onig_search: empty string.\n");
3694 #endif
3695
3696 if (reg->threshold_len == 0) {
3697 start = end = str = address_for_empty_string;
3698 s = (UChar* )start;
3699 prev = (UChar* )NULL;
3700
3701 MATCH_ARG_INIT(msa, option, region, start);
3702 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3703 msa.state_check_buff = (void* )0;
3704 msa.state_check_buff_size = 0;
3705 #endif
3706 MATCH_AND_RETURN_CHECK;
3707 goto mismatch;
3708 }
3709 goto mismatch_no_msa;
3710 }
3711
3712 #ifdef ONIG_DEBUG_SEARCH
3713 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3714 (int )(end - str), (int )(start - str), (int )(range - str));
3715 #endif
3716
3717 MATCH_ARG_INIT(msa, option, region, orig_start);
3718 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3719 {
3720 int offset = (MIN(start, range) - str);
3721 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3722 }
3723 #endif
3724
3725 s = (UChar* )start;
3726 if (range > start) { /* forward search */
3727 if (s > str)
3728 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3729 else
3730 prev = (UChar* )NULL;
3731
3732 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3733 UChar *sch_range, *low, *high, *low_prev;
3734
3735 sch_range = (UChar* )range;
3736 if (reg->dmax != 0) {
3737 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3738 sch_range = (UChar* )end;
3739 else {
3740 sch_range += reg->dmax;
3741 if (sch_range > end) sch_range = (UChar* )end;
3742 }
3743 }
3744
3745 if ((end - start) < reg->threshold_len)
3746 goto mismatch;
3747
3748 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3749 do {
3750 if (! forward_search_range(reg, str, end, s, sch_range,
3751 &low, &high, &low_prev)) goto mismatch;
3752 if (s < low) {
3753 s = low;
3754 prev = low_prev;
3755 }
3756 while (s <= high) {
3757 MATCH_AND_RETURN_CHECK;
3758 prev = s;
3759 s += enc_len(reg->enc, s);
3760 }
3761 } while (s < range);
3762 goto mismatch;
3763 }
3764 else { /* check only. */
3765 if (! forward_search_range(reg, str, end, s, sch_range,
3766 &low, &high, (UChar** )NULL)) goto mismatch;
3767
3768 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3769 do {
3770 MATCH_AND_RETURN_CHECK;
3771 prev = s;
3772 s += enc_len(reg->enc, s);
3773
3774 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3775 prev = s;
3776 s += enc_len(reg->enc, s);
3777 }
3778 } while (s < range);
3779 goto mismatch;
3780 }
3781 }
3782 }
3783
3784 do {
3785 MATCH_AND_RETURN_CHECK;
3786 prev = s;
3787 s += enc_len(reg->enc, s);
3788 } while (s < range);
3789
3790 if (s == range) { /* because empty match with /$/. */
3791 MATCH_AND_RETURN_CHECK;
3792 }
3793 }
3794 else { /* backward search */
3795 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3796 UChar *low, *high, *adjrange, *sch_start;
3797
3798 if (range < end)
3799 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3800 else
3801 adjrange = (UChar* )end;
3802
3803 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3804 (end - range) >= reg->threshold_len) {
3805 do {
3806 sch_start = s + reg->dmax;
3807 if (sch_start > end) sch_start = (UChar* )end;
3808 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3809 &low, &high) <= 0)
3810 goto mismatch;
3811
3812 if (s > high)
3813 s = high;
3814
3815 while (s >= low) {
3816 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3817 MATCH_AND_RETURN_CHECK;
3818 s = prev;
3819 }
3820 } while (s >= range);
3821 goto mismatch;
3822 }
3823 else { /* check only. */
3824 if ((end - range) < reg->threshold_len) goto mismatch;
3825
3826 sch_start = s;
3827 if (reg->dmax != 0) {
3828 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3829 sch_start = (UChar* )end;
3830 else {
3831 sch_start += reg->dmax;
3832 if (sch_start > end) sch_start = (UChar* )end;
3833 else
3834 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3835 start, sch_start);
3836 }
3837 }
3838 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3839 &low, &high) <= 0) goto mismatch;
3840 }
3841 }
3842
3843 do {
3844 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3845 MATCH_AND_RETURN_CHECK;
3846 s = prev;
3847 } while (s >= range);
3848 }
3849
3850 mismatch:
3851 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3852 if (IS_FIND_LONGEST(reg->options)) {
3853 if (msa.best_len >= 0) {
3854 s = msa.best_s;
3855 goto match;
3856 }
3857 }
3858 #endif
3859 r = ONIG_MISMATCH;
3860
3861 finish:
3862 MATCH_ARG_FREE(msa);
3863 ONIG_STATE_DEC_THREAD(reg);
3864
3865 /* If result is mismatch and no FIND_NOT_EMPTY option,
3866 then the region is not setted in match_at(). */
3867 if (IS_FIND_NOT_EMPTY(reg->options) && region
3868 #ifdef USE_POSIX_REGION_OPTION
3869 && !IS_POSIX_REGION(option)
3870 #endif
3871 ) {
3872 onig_region_clear(region);
3873 }
3874
3875 #ifdef ONIG_DEBUG
3876 if (r != ONIG_MISMATCH)
3877 fprintf(stderr, "onig_search: error %d\n", r);
3878 #endif
3879 return r;
3880
3881 mismatch_no_msa:
3882 r = ONIG_MISMATCH;
3883 finish_no_msa:
3884 ONIG_STATE_DEC_THREAD(reg);
3885 #ifdef ONIG_DEBUG
3886 if (r != ONIG_MISMATCH)
3887 fprintf(stderr, "onig_search: error %d\n", r);
3888 #endif
3889 return r;
3890
3891 match:
3892 ONIG_STATE_DEC_THREAD(reg);
3893 MATCH_ARG_FREE(msa);
3894 return s - str;
3895 }
3896
3897 extern OnigEncoding
onig_get_encoding(regex_t * reg)3898 onig_get_encoding(regex_t* reg)
3899 {
3900 return reg->enc;
3901 }
3902
3903 extern OnigOptionType
onig_get_options(regex_t * reg)3904 onig_get_options(regex_t* reg)
3905 {
3906 return reg->options;
3907 }
3908
3909 extern OnigAmbigType
onig_get_ambig_flag(regex_t * reg)3910 onig_get_ambig_flag(regex_t* reg)
3911 {
3912 return reg->ambig_flag;
3913 }
3914
3915 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)3916 onig_get_syntax(regex_t* reg)
3917 {
3918 return reg->syntax;
3919 }
3920
3921 extern int
onig_number_of_captures(regex_t * reg)3922 onig_number_of_captures(regex_t* reg)
3923 {
3924 return reg->num_mem;
3925 }
3926
3927 extern int
onig_number_of_capture_histories(regex_t * reg)3928 onig_number_of_capture_histories(regex_t* reg)
3929 {
3930 #ifdef USE_CAPTURE_HISTORY
3931 int i, n;
3932
3933 n = 0;
3934 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3935 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3936 n++;
3937 }
3938 return n;
3939 #else
3940 return 0;
3941 #endif
3942 }
3943
3944 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)3945 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3946 {
3947 *to = *from;
3948 }
3949
3950