1 #ifndef REGPARSE_H 2 #define REGPARSE_H 3 /********************************************************************** 4 regparse.h - Oniguruma (regular expression library) 5 **********************************************************************/ 6 /*- 7 * Copyright (c) 2002-2019 K.Kosako 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include "regint.h" 33 34 #define NODE_STRING_MARGIN 16 35 #define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */ 36 #define NODE_BACKREFS_SIZE 6 37 38 /* node type */ 39 typedef enum { 40 NODE_STRING = 0, 41 NODE_CCLASS = 1, 42 NODE_CTYPE = 2, 43 NODE_BACKREF = 3, 44 NODE_QUANT = 4, 45 NODE_BAG = 5, 46 NODE_ANCHOR = 6, 47 NODE_LIST = 7, 48 NODE_ALT = 8, 49 NODE_CALL = 9, 50 NODE_GIMMICK = 10 51 } NodeType; 52 53 enum BagType { 54 BAG_MEMORY = 0, 55 BAG_OPTION = 1, 56 BAG_STOP_BACKTRACK = 2, 57 BAG_IF_ELSE = 3, 58 }; 59 60 enum GimmickType { 61 GIMMICK_FAIL = 0, 62 GIMMICK_SAVE = 1, 63 GIMMICK_UPDATE_VAR = 2, 64 #ifdef USE_CALLOUT 65 GIMMICK_CALLOUT = 3, 66 #endif 67 }; 68 69 enum BodyEmptyType { 70 BODY_IS_NOT_EMPTY = 0, 71 BODY_IS_EMPTY_POSSIBILITY = 1, 72 BODY_IS_EMPTY_POSSIBILITY_MEM = 2, 73 BODY_IS_EMPTY_POSSIBILITY_REC = 3 74 }; 75 76 struct _Node; 77 78 typedef struct { 79 NodeType node_type; 80 int status; 81 struct _Node* parent; 82 83 UChar* s; 84 UChar* end; 85 unsigned int flag; 86 UChar buf[NODE_STRING_BUF_SIZE]; 87 int capacity; /* (allocated size - 1) or 0: use buf[] */ 88 int case_min_len; 89 } StrNode; 90 91 typedef struct { 92 NodeType node_type; 93 int status; 94 struct _Node* parent; 95 96 unsigned int flags; 97 BitSet bs; 98 BBuf* mbuf; /* multi-byte info or NULL */ 99 } CClassNode; 100 101 typedef struct { 102 NodeType node_type; 103 int status; 104 struct _Node* parent; 105 struct _Node* body; 106 107 int lower; 108 int upper; 109 int greedy; 110 enum BodyEmptyType emptiness; 111 struct _Node* head_exact; 112 struct _Node* next_head_exact; 113 int include_referred; /* include called node. don't eliminate even if {0} */ 114 } QuantNode; 115 116 typedef struct { 117 NodeType node_type; 118 int status; 119 struct _Node* parent; 120 struct _Node* body; 121 122 enum BagType type; 123 union { 124 struct { 125 int regnum; 126 AbsAddrType called_addr; 127 int entry_count; 128 int called_state; 129 } m; 130 struct { 131 OnigOptionType options; 132 } o; 133 struct { 134 /* body is condition */ 135 struct _Node* Then; 136 struct _Node* Else; 137 } te; 138 }; 139 /* for multiple call reference */ 140 OnigLen min_len; /* min length (byte) */ 141 OnigLen max_len; /* max length (byte) */ 142 int char_len; /* character length */ 143 int opt_count; /* referenced count in optimize_nodes() */ 144 } BagNode; 145 146 #ifdef USE_CALL 147 148 typedef struct { 149 int offset; 150 struct _Node* target; 151 } UnsetAddr; 152 153 typedef struct { 154 int num; 155 int alloc; 156 UnsetAddr* us; 157 } UnsetAddrList; 158 159 typedef struct { 160 NodeType node_type; 161 int status; 162 struct _Node* parent; 163 struct _Node* body; /* to BagNode : BAG_MEMORY */ 164 165 int by_number; 166 int group_num; 167 UChar* name; 168 UChar* name_end; 169 int entry_count; 170 } CallNode; 171 172 #endif 173 174 typedef struct { 175 NodeType node_type; 176 int status; 177 struct _Node* parent; 178 179 int back_num; 180 int back_static[NODE_BACKREFS_SIZE]; 181 int* back_dynamic; 182 int nest_level; 183 } BackRefNode; 184 185 typedef struct { 186 NodeType node_type; 187 int status; 188 struct _Node* parent; 189 struct _Node* body; 190 191 int type; 192 int char_len; 193 int ascii_mode; 194 } AnchorNode; 195 196 typedef struct { 197 NodeType node_type; 198 int status; 199 struct _Node* parent; 200 201 struct _Node* car; 202 struct _Node* cdr; 203 } ConsAltNode; 204 205 typedef struct { 206 NodeType node_type; 207 int status; 208 struct _Node* parent; 209 210 int ctype; 211 int not; 212 OnigOptionType options; 213 int ascii_mode; 214 } CtypeNode; 215 216 typedef struct { 217 NodeType node_type; 218 int status; 219 struct _Node* parent; 220 221 enum GimmickType type; 222 int detail_type; 223 int num; 224 int id; 225 } GimmickNode; 226 227 typedef struct _Node { 228 union { 229 struct { 230 NodeType node_type; 231 int status; 232 struct _Node* parent; 233 struct _Node* body; 234 } base; 235 236 StrNode str; 237 CClassNode cclass; 238 QuantNode quant; 239 BagNode bag; 240 BackRefNode backref; 241 AnchorNode anchor; 242 ConsAltNode cons; 243 CtypeNode ctype; 244 #ifdef USE_CALL 245 CallNode call; 246 #endif 247 GimmickNode gimmick; 248 } u; 249 } Node; 250 251 #define NULL_NODE ((Node* )0) 252 253 254 /* node type bit */ 255 #define NODE_TYPE2BIT(type) (1<<(type)) 256 257 #define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING) 258 #define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS) 259 #define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE) 260 #define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF) 261 #define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT) 262 #define NODE_BIT_BAG NODE_TYPE2BIT(NODE_BAG) 263 #define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR) 264 #define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST) 265 #define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT) 266 #define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL) 267 #define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK) 268 269 #define NODE_TYPE(node) ((node)->u.base.node_type) 270 #define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype) 271 272 #define STR_(node) (&((node)->u.str)) 273 #define CCLASS_(node) (&((node)->u.cclass)) 274 #define CTYPE_(node) (&((node)->u.ctype)) 275 #define BACKREF_(node) (&((node)->u.backref)) 276 #define QUANT_(node) (&((node)->u.quant)) 277 #define BAG_(node) (&((node)->u.bag)) 278 #define ANCHOR_(node) (&((node)->u.anchor)) 279 #define CONS_(node) (&((node)->u.cons)) 280 #define CALL_(node) (&((node)->u.call)) 281 #define GIMMICK_(node) (&((node)->u.gimmick)) 282 283 #define NODE_CAR(node) (CONS_(node)->car) 284 #define NODE_CDR(node) (CONS_(node)->cdr) 285 286 #define CTYPE_ANYCHAR -1 287 #define NODE_IS_ANYCHAR(node) \ 288 (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) 289 290 #define CTYPE_OPTION(node, reg) \ 291 (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) 292 293 294 #define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML) 295 #define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF) 296 297 #define NODE_STRING_CRUDE (1<<0) 298 #define NODE_STRING_CASE_EXPANDED (1<<1) 299 #define NODE_STRING_CASE_FOLD_MATCH (1<<2) 300 301 #define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s) 302 #define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE 303 #define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE 304 #define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED 305 #define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH 306 #define NODE_STRING_IS_CRUDE(node) \ 307 (((node)->u.str.flag & NODE_STRING_CRUDE) != 0) 308 #define NODE_STRING_IS_CASE_EXPANDED(node) \ 309 (((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0) 310 #define NODE_STRING_IS_CASE_FOLD_MATCH(node) \ 311 (((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0) 312 313 #define BACKREFS_P(br) \ 314 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) 315 316 /* node status bits */ 317 #define NODE_ST_MIN_FIXED (1<<0) 318 #define NODE_ST_MAX_FIXED (1<<1) 319 #define NODE_ST_CLEN_FIXED (1<<2) 320 #define NODE_ST_MARK1 (1<<3) 321 #define NODE_ST_MARK2 (1<<4) 322 #define NODE_ST_STRICT_REAL_REPEAT (1<<5) 323 #define NODE_ST_RECURSION (1<<6) 324 #define NODE_ST_CALLED (1<<7) 325 #define NODE_ST_ADDR_FIXED (1<<8) 326 #define NODE_ST_NAMED_GROUP (1<<9) 327 #define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */ 328 #define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */ 329 #define NODE_ST_IN_MULTI_ENTRY (1<<12) 330 #define NODE_ST_NEST_LEVEL (1<<13) 331 #define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */ 332 #define NODE_ST_BY_NAME (1<<15) /* backref by name */ 333 #define NODE_ST_BACKREF (1<<16) 334 #define NODE_ST_CHECKER (1<<17) 335 #define NODE_ST_FIXED_OPTION (1<<18) 336 #define NODE_ST_PROHIBIT_RECURSION (1<<19) 337 #define NODE_ST_SUPER (1<<20) 338 #define NODE_ST_EMPTY_STATUS_CHECK (1<<21) 339 340 341 #define NODE_STATUS(node) (((Node* )node)->u.base.status) 342 #define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f)) 343 #define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f)) 344 345 #define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0) 346 #define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0) 347 #define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0) 348 #define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0) 349 #define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0) 350 #define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0) 351 #define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0) 352 #define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0) 353 #define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0) 354 #define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0) 355 #define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0) 356 #define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0) 357 #define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0) 358 #define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0) 359 #define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0) 360 #define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0) 361 #define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0) 362 #define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0) 363 #define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0) 364 #define NODE_IS_PROHIBIT_RECURSION(node) \ 365 ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) 366 #define NODE_IS_STRICT_REAL_REPEAT(node) \ 367 ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0) 368 #define NODE_IS_EMPTY_STATUS_CHECK(node) \ 369 ((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0) 370 371 #define NODE_PARENT(node) ((node)->u.base.parent) 372 #define NODE_BODY(node) ((node)->u.base.body) 373 #define NODE_QUANT_BODY(node) ((node)->body) 374 #define NODE_BAG_BODY(node) ((node)->body) 375 #define NODE_CALL_BODY(node) ((node)->body) 376 #define NODE_ANCHOR_BODY(node) ((node)->body) 377 378 #define SCANENV_MEMENV_SIZE 8 379 #define SCANENV_MEMENV(senv) \ 380 (IS_NOT_NULL((senv)->mem_env_dynamic) ? \ 381 (senv)->mem_env_dynamic : (senv)->mem_env_static) 382 383 typedef struct { 384 Node* mem_node; 385 Node* empty_repeat_node; 386 } MemEnv; 387 388 typedef struct { 389 enum SaveType type; 390 } SaveItem; 391 392 typedef struct { 393 OnigOptionType options; 394 OnigCaseFoldType case_fold_flag; 395 OnigEncoding enc; 396 OnigSyntaxType* syntax; 397 MemStatusType cap_history; 398 MemStatusType backtrack_mem; /* backtrack/recursion */ 399 MemStatusType backrefed_mem; 400 UChar* pattern; 401 UChar* pattern_end; 402 UChar* error; 403 UChar* error_end; 404 regex_t* reg; /* for reg->names only */ 405 int num_call; 406 #ifdef USE_CALL 407 UnsetAddrList* unset_addr_list; 408 int has_call_zero; 409 #endif 410 int num_mem; 411 int num_named; 412 int mem_alloc; 413 MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; 414 MemEnv* mem_env_dynamic; 415 unsigned int parse_depth; 416 #ifdef ONIG_DEBUG_PARSE 417 unsigned int max_parse_depth; 418 #endif 419 int backref_num; 420 int keep_num; 421 int save_num; 422 int save_alloc_num; 423 SaveItem* saves; 424 } ScanEnv; 425 426 427 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) 428 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) 429 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) 430 431 typedef struct { 432 int new_val; 433 } GroupNumRemap; 434 435 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); 436 437 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); 438 extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); 439 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); 440 extern int onig_reduce_nested_quantifier P_((Node* pnode)); 441 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); 442 extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); 443 extern void onig_node_free P_((Node* node)); 444 extern Node* onig_node_new_bag P_((enum BagType type)); 445 extern Node* onig_node_new_anchor P_((int type, int ascii_mode)); 446 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); 447 extern Node* onig_node_new_list P_((Node* left, Node* right)); 448 extern Node* onig_node_new_alt P_((Node* left, Node* right)); 449 extern void onig_node_str_clear P_((Node* node)); 450 extern int onig_names_free P_((regex_t* reg)); 451 extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); 452 extern int onig_free_shared_cclass_table P_((void)); 453 extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); 454 extern int onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]); 455 extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node); 456 457 #ifdef USE_CALLOUT 458 extern int onig_global_callout_names_free(void); 459 #endif 460 461 #ifdef ONIG_DEBUG 462 extern int onig_print_names(FILE*, regex_t*); 463 #endif 464 465 #endif /* REGPARSE_H */ 466