1 #ifndef REGPARSE_H 2 #define REGPARSE_H 3 /********************************************************************** 4 regparse.h - Oniguruma (regular expression library) 5 **********************************************************************/ 6 /*- 7 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include "regint.h" 33 34 /* node type */ 35 #define N_STRING (1<< 0) 36 #define N_CCLASS (1<< 1) 37 #define N_CTYPE (1<< 2) 38 #define N_ANYCHAR (1<< 3) 39 #define N_BACKREF (1<< 4) 40 #define N_QUANTIFIER (1<< 5) 41 #define N_EFFECT (1<< 6) 42 #define N_ANCHOR (1<< 7) 43 #define N_LIST (1<< 8) 44 #define N_ALT (1<< 9) 45 #define N_CALL (1<<10) 46 47 #define IS_NODE_TYPE_SIMPLE(type) \ 48 (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0) 49 50 #define NTYPE(node) ((node)->type) 51 #define NCONS(node) ((node)->u.cons) 52 #define NSTRING(node) ((node)->u.str) 53 #define NCCLASS(node) ((node)->u.cclass) 54 #define NCTYPE(node) ((node)->u.ctype) 55 #define NQUANTIFIER(node) ((node)->u.quantifier) 56 #define NANCHOR(node) ((node)->u.anchor) 57 #define NBACKREF(node) ((node)->u.backref) 58 #define NEFFECT(node) ((node)->u.effect) 59 #define NCALL(node) ((node)->u.call) 60 61 #define CTYPE_WORD (1<<0) 62 #define CTYPE_NOT_WORD (1<<1) 63 #define CTYPE_WHITE_SPACE (1<<2) 64 #define CTYPE_NOT_WHITE_SPACE (1<<3) 65 #define CTYPE_DIGIT (1<<4) 66 #define CTYPE_NOT_DIGIT (1<<5) 67 #define CTYPE_XDIGIT (1<<6) 68 #define CTYPE_NOT_XDIGIT (1<<7) 69 70 #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) 71 #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) 72 73 #define EFFECT_MEMORY (1<<0) 74 #define EFFECT_OPTION (1<<1) 75 #define EFFECT_STOP_BACKTRACK (1<<2) 76 77 #define NODE_STR_MARGIN 16 78 #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ 79 #define NODE_BACKREFS_SIZE 6 80 81 #define NSTR_RAW (1<<0) /* by backslashed number */ 82 #define NSTR_AMBIG (1<<1) 83 #define NSTR_AMBIG_REDUCE (1<<2) 84 85 #define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) 86 #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW 87 #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW 88 #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG 89 #define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE 90 #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) 91 #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) 92 #define NSTRING_IS_AMBIG_REDUCE(node) \ 93 (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0) 94 95 #define BACKREFS_P(br) \ 96 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); 97 98 #define NQ_TARGET_ISNOT_EMPTY 0 99 #define NQ_TARGET_IS_EMPTY 1 100 #define NQ_TARGET_IS_EMPTY_MEM 2 101 #define NQ_TARGET_IS_EMPTY_REC 3 102 103 104 typedef struct { 105 UChar* s; 106 UChar* end; 107 unsigned int flag; 108 int capa; /* (allocated size - 1) or 0: use buf[] */ 109 UChar buf[NODE_STR_BUF_SIZE]; 110 } StrNode; 111 112 /* move to regint.h */ 113 #if 0 114 typedef struct { 115 int flags; 116 BitSet bs; 117 BBuf* mbuf; /* multi-byte info or NULL */ 118 } CClassNode; 119 #endif 120 121 typedef struct { 122 int state; 123 struct _Node* target; 124 int lower; 125 int upper; 126 int greedy; 127 int target_empty_info; 128 struct _Node* head_exact; 129 struct _Node* next_head_exact; 130 int is_refered; /* include called node. don't eliminate even if {0} */ 131 #ifdef USE_COMBINATION_EXPLOSION_CHECK 132 int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ 133 #endif 134 } QuantifierNode; 135 136 /* status bits */ 137 #define NST_MIN_FIXED (1<<0) 138 #define NST_MAX_FIXED (1<<1) 139 #define NST_CLEN_FIXED (1<<2) 140 #define NST_MARK1 (1<<3) 141 #define NST_MARK2 (1<<4) 142 #define NST_MEM_BACKREFED (1<<5) 143 #define NST_STOP_BT_SIMPLE_REPEAT (1<<6) 144 #define NST_RECURSION (1<<7) 145 #define NST_CALLED (1<<8) 146 #define NST_ADDR_FIXED (1<<9) 147 #define NST_NAMED_GROUP (1<<10) 148 #define NST_NAME_REF (1<<11) 149 #define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ 150 #define NST_NEST_LEVEL (1<<13) 151 #define NST_BY_NUMBER (1<<14) /* {n,m} */ 152 153 #define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) 154 #define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) 155 156 #define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0) 157 #define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) 158 #define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0) 159 #define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0) 160 #define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0) 161 #define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) 162 #define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) 163 #define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) 164 #define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \ 165 (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) 166 #define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) 167 168 #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION 169 #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) 170 #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) 171 #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) 172 #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) 173 #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) 174 #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) 175 176 typedef struct { 177 int state; 178 int type; 179 int regnum; 180 OnigOptionType option; 181 struct _Node* target; 182 AbsAddrType call_addr; 183 /* for multiple call reference */ 184 OnigDistance min_len; /* min length (byte) */ 185 OnigDistance max_len; /* max length (byte) */ 186 int char_len; /* character length */ 187 int opt_count; /* referenced count in optimize_node_left() */ 188 } EffectNode; 189 190 #define CALLNODE_REFNUM_UNDEF -1 191 192 #ifdef USE_SUBEXP_CALL 193 194 typedef struct { 195 int offset; 196 struct _Node* target; 197 } UnsetAddr; 198 199 typedef struct { 200 int num; 201 int alloc; 202 UnsetAddr* us; 203 } UnsetAddrList; 204 205 typedef struct { 206 int state; 207 int ref_num; 208 UChar* name; 209 UChar* name_end; 210 struct _Node* target; /* EffectNode : EFFECT_MEMORY */ 211 UnsetAddrList* unset_addr_list; 212 } CallNode; 213 214 #endif 215 216 typedef struct { 217 int state; 218 int back_num; 219 int back_static[NODE_BACKREFS_SIZE]; 220 int* back_dynamic; 221 int nest_level; 222 } BackrefNode; 223 224 typedef struct { 225 int type; 226 struct _Node* target; 227 int char_len; 228 } AnchorNode; 229 230 typedef struct _Node { 231 int type; 232 union { 233 StrNode str; 234 CClassNode cclass; 235 QuantifierNode quantifier; 236 EffectNode effect; 237 #ifdef USE_SUBEXP_CALL 238 CallNode call; 239 #endif 240 BackrefNode backref; 241 AnchorNode anchor; 242 struct { 243 struct _Node* left; 244 struct _Node* right; 245 } cons; 246 struct { 247 int type; 248 } ctype; 249 } u; 250 } Node; 251 252 #define NULL_NODE ((Node* )0) 253 254 #define SCANENV_MEMNODES_SIZE 8 255 #define SCANENV_MEM_NODES(senv) \ 256 (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ 257 (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) 258 259 typedef struct { 260 OnigOptionType option; 261 OnigAmbigType ambig_flag; 262 OnigEncoding enc; 263 OnigSyntaxType* syntax; 264 BitStatusType capture_history; 265 BitStatusType bt_mem_start; 266 BitStatusType bt_mem_end; 267 BitStatusType backrefed_mem; 268 UChar* pattern; 269 UChar* pattern_end; 270 UChar* error; 271 UChar* error_end; 272 regex_t* reg; /* for reg->names only */ 273 int num_call; 274 #ifdef USE_SUBEXP_CALL 275 UnsetAddrList* unset_addr_list; 276 #endif 277 int num_mem; 278 #ifdef USE_NAMED_GROUP 279 int num_named; 280 #endif 281 int mem_alloc; 282 Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; 283 Node** mem_nodes_dynamic; 284 #ifdef USE_COMBINATION_EXPLOSION_CHECK 285 int num_comb_exp_check; 286 int comb_exp_max_regnum; 287 int curr_max_regnum; 288 int has_recursion; 289 #endif 290 } ScanEnv; 291 292 293 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) 294 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) 295 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) 296 297 298 #ifdef USE_NAMED_GROUP 299 typedef struct { 300 int new_val; 301 } GroupNumRemap; 302 303 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); 304 #endif 305 306 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); 307 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); 308 extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); 309 extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); 310 extern void onig_node_conv_to_str_node P_((Node* node, int raw)); 311 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); 312 extern void onig_node_free P_((Node* node)); 313 extern Node* onig_node_new_effect P_((int type)); 314 extern Node* onig_node_new_anchor P_((int type)); 315 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); 316 extern Node* onig_node_new_list P_((Node* left, Node* right)); 317 extern void onig_node_str_clear P_((Node* node)); 318 extern int onig_free_node_list P_((void)); 319 extern int onig_names_free P_((regex_t* reg)); 320 extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); 321 322 #ifdef ONIG_DEBUG 323 #ifdef USE_NAMED_GROUP 324 extern int onig_print_names(FILE*, regex_t*); 325 #endif 326 #endif 327 328 #endif /* REGPARSE_H */ 329