1Oniguruma API Version 6.1.0 2016/08/22 2 3#include <oniguruma.h> 4 5 6# int onig_initialize(OnigEncoding use_encodings[], int num_encodings) 7 8 Initialize library. 9 10 You have to call it explicitly. 11 12 * onig_init() is deprecated. 13 14 arguments 15 1 use_encodings: array of encodings used in application. 16 2 num_encodings: number of encodings. 17 18 19# int onig_error_code_to_str(UChar* err_buf, int err_code, ...) 20 21 Get error message string. 22 If this function is used for onig_new(), 23 don't call this after the pattern argument of onig_new() is freed. 24 25 normal return: error message string length 26 27 arguments 28 1 err_buf: error message string buffer. 29 (required size: ONIG_MAX_ERROR_MESSAGE_LEN) 30 2 err_code: error code returned by other API functions. 31 3 err_info (optional): error info returned by onig_new(). 32 33 34# void onig_set_warn_func(OnigWarnFunc func) 35 36 Set warning function. 37 38 WARNING: 39 '[', '-', ']' in character class without escape. 40 ']' in pattern without escape. 41 42 arguments 43 1 func: function pointer. void (*func)(char* warning_message) 44 45 46# void onig_set_verb_warn_func(OnigWarnFunc func) 47 48 Set verbose warning function. 49 50 WARNING: 51 redundant nested repeat operator. 52 53 arguments 54 1 func: function pointer. void (*func)(char* warning_message) 55 56 57# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 58 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 59 OnigErrorInfo* err_info) 60 61 Create a regex object. 62 63 normal return: ONIG_NORMAL 64 65 arguments 66 1 reg: return regex object's address. 67 2 pattern: regex pattern string. 68 3 pattern_end: terminate address of pattern. (pattern + pattern length) 69 4 option: compile time options. 70 71 ONIG_OPTION_NONE no option 72 ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' 73 ONIG_OPTION_MULTILINE '.' match with newline 74 ONIG_OPTION_IGNORECASE ambiguity match on 75 ONIG_OPTION_EXTEND extended pattern form 76 ONIG_OPTION_FIND_LONGEST find longest match 77 ONIG_OPTION_FIND_NOT_EMPTY ignore empty match 78 ONIG_OPTION_NEGATE_SINGLELINE 79 clear ONIG_OPTION_SINGLELINE which is enabled on 80 ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, 81 ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA 82 83 ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured. 84 ONIG_OPTION_CAPTURE_GROUP named and no-named group captured. 85 86 5 enc: character encoding. 87 88 ONIG_ENCODING_ASCII ASCII 89 ONIG_ENCODING_ISO_8859_1 ISO 8859-1 90 ONIG_ENCODING_ISO_8859_2 ISO 8859-2 91 ONIG_ENCODING_ISO_8859_3 ISO 8859-3 92 ONIG_ENCODING_ISO_8859_4 ISO 8859-4 93 ONIG_ENCODING_ISO_8859_5 ISO 8859-5 94 ONIG_ENCODING_ISO_8859_6 ISO 8859-6 95 ONIG_ENCODING_ISO_8859_7 ISO 8859-7 96 ONIG_ENCODING_ISO_8859_8 ISO 8859-8 97 ONIG_ENCODING_ISO_8859_9 ISO 8859-9 98 ONIG_ENCODING_ISO_8859_10 ISO 8859-10 99 ONIG_ENCODING_ISO_8859_11 ISO 8859-11 100 ONIG_ENCODING_ISO_8859_13 ISO 8859-13 101 ONIG_ENCODING_ISO_8859_14 ISO 8859-14 102 ONIG_ENCODING_ISO_8859_15 ISO 8859-15 103 ONIG_ENCODING_ISO_8859_16 ISO 8859-16 104 ONIG_ENCODING_UTF8 UTF-8 105 ONIG_ENCODING_UTF16_BE UTF-16BE 106 ONIG_ENCODING_UTF16_LE UTF-16LE 107 ONIG_ENCODING_UTF32_BE UTF-32BE 108 ONIG_ENCODING_UTF32_LE UTF-32LE 109 ONIG_ENCODING_EUC_JP EUC-JP 110 ONIG_ENCODING_EUC_TW EUC-TW 111 ONIG_ENCODING_EUC_KR EUC-KR 112 ONIG_ENCODING_EUC_CN EUC-CN 113 ONIG_ENCODING_SJIS Shift_JIS 114 ONIG_ENCODING_KOI8_R KOI8-R 115 ONIG_ENCODING_CP1251 CP1251 116 ONIG_ENCODING_BIG5 Big5 117 ONIG_ENCODING_GB18030 GB18030 118 119 or any OnigEncodingType data address defined by user. 120 121 6 syntax: address of pattern syntax definition. 122 123 ONIG_SYNTAX_ASIS plain text 124 ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE 125 ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE 126 ONIG_SYNTAX_EMACS Emacs 127 ONIG_SYNTAX_GREP grep 128 ONIG_SYNTAX_GNU_REGEX GNU regex 129 ONIG_SYNTAX_JAVA Java (Sun java.util.regex) 130 ONIG_SYNTAX_PERL Perl 131 ONIG_SYNTAX_PERL_NG Perl + named group 132 ONIG_SYNTAX_RUBY Ruby 133 ONIG_SYNTAX_DEFAULT default (== Ruby) 134 onig_set_default_syntax() 135 136 or any OnigSyntaxType data address defined by user. 137 138 7 err_info: address for return optional error info. 139 Use this value as 3rd argument of onig_error_code_to_str(). 140 141 142 143# int onig_new_without_alloc(regex_t* reg, const UChar* pattern, 144 const UChar* pattern_end, 145 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 146 OnigErrorInfo* err_info) 147 148 Create a regex object. 149 reg object area is not allocated in this function. 150 151 normal return: ONIG_NORMAL 152 153 154 155# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 156 OnigCompileInfo* ci, OnigErrorInfo* einfo) 157 158 Create a regex object. 159 This function is deluxe version of onig_new(). 160 161 normal return: ONIG_NORMAL 162 163 arguments 164 1 reg: return address of regex object. 165 2 pattern: regex pattern string. 166 3 pattern_end: terminate address of pattern. (pattern + pattern length) 167 4 ci: compile time info. 168 169 ci->num_of_elements: number of elements in ci. (current version: 5) 170 ci->pattern_enc: pattern string character encoding. 171 ci->target_enc: target string character encoding. 172 ci->syntax: address of pattern syntax definition. 173 ci->option: compile time option. 174 ci->case_fold_flag: character matching case fold bit flag for 175 ONIG_OPTION_IGNORECASE mode. 176 177 ONIGENC_CASE_FOLD_MIN: minimum 178 ONIGENC_CASE_FOLD_DEFAULT: minimum 179 onig_set_default_case_fold_flag() 180 181 5 err_info: address for return optional error info. 182 Use this value as 3rd argument of onig_error_code_to_str(). 183 184 185 Different character encoding combination is allowed for 186 the following cases only. 187 188 pattern_enc: ASCII, ISO_8859_1 189 target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE 190 191 pattern_enc: UTF16_BE/LE 192 target_enc: UTF16_LE/BE 193 194 pattern_enc: UTF32_BE/LE 195 target_enc: UTF32_LE/BE 196 197 198# void onig_free(regex_t* reg) 199 200 Free memory used by regex object. 201 202 arguments 203 1 reg: regex object. 204 205 206# void onig_free_body(regex_t* reg) 207 208 Free memory used by regex object. (Except reg oneself.) 209 210 arguments 211 1 reg: regex object. 212 213 214# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, 215 const UChar* range, OnigRegion* region, OnigOptionType option) 216 217 Search string and return search result and matching region. 218 219 normal return: match position offset (i.e. p - str >= 0) 220 not found: ONIG_MISMATCH (< 0) 221 222 arguments 223 1 reg: regex object 224 2 str: target string 225 3 end: terminate address of target string 226 4 start: search start address of target string 227 5 range: search terminate address of target string 228 in forward search (start <= searched string < range) 229 in backward search (range <= searched string <= start) 230 6 region: address for return group match range info (NULL is allowed) 231 7 option: search time option 232 233 ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line 234 ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line 235 ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. 236 237 238# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, 239 OnigRegion* region, OnigOptionType option) 240 241 Match string and return result and matching region. 242 243 normal return: match length (>= 0) 244 not match: ONIG_MISMATCH ( < 0) 245 246 arguments 247 1 reg: regex object 248 2 str: target string 249 3 end: terminate address of target string 250 4 at: match address of target string 251 5 region: address for return group match range info (NULL is allowed) 252 6 option: search time option 253 254 ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line 255 ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line 256 ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API. 257 258 259# int onig_scan(regex_t* reg, const UChar* str, const UChar* end, 260 OnigRegion* region, OnigOptionType option, 261 int (*scan_callback)(int, int, OnigRegion*, void*), 262 void* callback_arg) 263 264 Scan string and callback with matching region. 265 266 normal return: number of matching times 267 error: error code 268 interruption: return value of callback function (!= 0) 269 270 arguments 271 1 reg: regex object 272 2 str: target string 273 3 end: terminate address of target string 274 4 region: address for return group match range info (NULL is allowed) 275 5 option: search time option 276 6 scan_callback: callback function (defined by user) 277 7 callback_arg: optional argument passed to callback 278 279 280# OnigRegion* onig_region_new(void) 281 282 Create a region. 283 284 285# void onig_region_free(OnigRegion* region, int free_self) 286 287 Free memory used by region. 288 289 arguments 290 1 region: target region 291 2 free_self: [1: free all, 0: free memory used in region but not self] 292 293 294# void onig_region_copy(OnigRegion* to, OnigRegion* from) 295 296 Copy contents of region. 297 298 arguments 299 1 to: target region 300 2 from: source region 301 302 303# void onig_region_clear(OnigRegion* region) 304 305 Clear contents of region. 306 307 arguments 308 1 region: target region 309 310 311# int onig_region_resize(OnigRegion* region, int n) 312 313 Resize group range area of region. 314 315 normal return: ONIG_NORMAL 316 317 arguments 318 1 region: target region 319 2 n: new size 320 321 322# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, 323 int** num_list) 324 325 Return the group number list of the name. 326 Named subexp is defined by (?<name>....). 327 328 normal return: number of groups for the name. 329 (ex. /(?<x>..)(?<x>..)/ ==> 2) 330 name not found: -1 331 332 arguments 333 1 reg: regex object. 334 2 name: group name. 335 3 name_end: terminate address of group name. 336 4 num_list: return list of group number. 337 338 339# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, 340 OnigRegion *region) 341 342 Return the group number corresponding to the named backref (\k<name>). 343 If two or more regions for the groups of the name are effective, 344 the greatest number in it is obtained. 345 346 normal return: group number. 347 348 arguments 349 1 reg: regex object. 350 2 name: group name. 351 3 name_end: terminate address of group name. 352 4 region: search/match result region. 353 354 355# int onig_foreach_name(regex_t* reg, 356 int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), 357 void* arg) 358 359 Iterate function call for all names. 360 361 normal return: 0 362 error: func's return value. 363 364 arguments 365 1 reg: regex object. 366 2 func: callback function. 367 func(name, name_end, <number of groups>, <group number's list>, 368 reg, arg); 369 if func does not return 0, then iteration is stopped. 370 3 arg: argument for func. 371 372 373# int onig_number_of_names(regex_t* reg) 374 375 Return the number of names defined in the pattern. 376 Multiple definitions of one name is counted as one. 377 378 arguments 379 1 reg: regex object. 380 381 382# OnigEncoding onig_get_encoding(regex_t* reg) 383# OnigOptionType onig_get_options(regex_t* reg) 384# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg) 385# OnigSyntaxType* onig_get_syntax(regex_t* reg) 386 387 Return a value of the regex object. 388 389 arguments 390 1 reg: regex object. 391 392 393# int onig_number_of_captures(regex_t* reg) 394 395 Return the number of capture group in the pattern. 396 397 arguments 398 1 reg: regex object. 399 400 401# int onig_number_of_capture_histories(regex_t* reg) 402 403 Return the number of capture history defined in the pattern. 404 405 You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY 406 is disabled in the pattern syntax.(disabled in the default syntax) 407 408 arguments 409 1 reg: regex object. 410 411 412 413# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) 414 415 Return the root node of capture history data tree. 416 417 This value is undefined if matching has faild. 418 419 arguments 420 1 region: matching result. 421 422 423# int onig_capture_tree_traverse(OnigRegion* region, int at, 424 int(*func)(int,int,int,int,int,void*), void* arg) 425 426 Traverse and callback in capture history data tree. 427 428 normal return: 0 429 error: callback func's return value. 430 431 arguments 432 1 region: match region data. 433 2 at: callback position. 434 435 ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse children. 436 ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse children first, then callback. 437 ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse children, 438 and at last callback again. 439 440 3 func: callback function. 441 if func does not return 0, then traverse is stopped. 442 443 int func(int group, int beg, int end, int level, int at, 444 void* arg) 445 446 group: group number 447 beg: capture start position 448 end: capture end position 449 level: nest level (from 0) 450 at: callback position 451 ONIG_TRAVERSE_CALLBACK_AT_FIRST 452 ONIG_TRAVERSE_CALLBACK_AT_LAST 453 arg: optional callback argument 454 455 4 arg; optional callback argument. 456 457 458# int onig_noname_group_capture_is_active(regex_t* reg) 459 460 Return noname group capture activity. 461 462 active: 1 463 inactive: 0 464 465 arguments 466 1 reg: regex object. 467 468 if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON 469 --> inactive 470 471 if the regex pattern have named group 472 and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON 473 and option ONIG_OPTION_CAPTURE_GROUP == OFF 474 --> inactive 475 476 else --> active 477 478 479# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) 480 481 Return previous character head address. 482 483 arguments 484 1 enc: character encoding 485 2 start: string address 486 3 s: target address of string 487 488 489# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, 490 const UChar* start, const UChar* s) 491 492 Return left-adjusted head address of a character. 493 494 arguments 495 1 enc: character encoding 496 2 start: string address 497 3 s: target address of string 498 499 500# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, 501 const UChar* start, const UChar* s) 502 503 Return right-adjusted head address of a character. 504 505 arguments 506 1 enc: character encoding 507 2 start: string address 508 3 s: target address of string 509 510 511# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) 512# int onigenc_strlen_null(OnigEncoding enc, const UChar* s) 513 514 Return number of characters in the string. 515 516 517# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) 518 519 Return number of bytes in the string. 520 521 522# int onig_set_default_syntax(OnigSyntaxType* syntax) 523 524 Set default syntax. 525 526 arguments 527 1 syntax: address of pattern syntax definition. 528 529 530# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) 531 532 Copy syntax. 533 534 arguments 535 1 to: destination address. 536 2 from: source address. 537 538 539# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) 540# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax) 541# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax) 542# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax) 543 544# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) 545# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) 546# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) 547# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) 548 549 Get/Set elements of the syntax. 550 551 arguments 552 1 syntax: syntax 553 2 op, op2, behavior, options: value of element. 554 555 556# void onig_copy_encoding(OnigEncoding to, OnigEncoding from) 557 558 Copy encoding. 559 560 arguments 561 1 to: destination address. 562 2 from: source address. 563 564 565# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, 566 OnigCodePoint code) 567 568 Set a variable meta character to the code point value. 569 Except for an escape character, this meta characters specification 570 is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective 571 by the syntax. (Build-in syntaxes are not effective.) 572 573 normal return: ONIG_NORMAL 574 575 arguments 576 1 syntax: target syntax 577 2 what: specifies which meta character it is. 578 579 ONIG_META_CHAR_ESCAPE 580 ONIG_META_CHAR_ANYCHAR 581 ONIG_META_CHAR_ANYTIME 582 ONIG_META_CHAR_ZERO_OR_ONE_TIME 583 ONIG_META_CHAR_ONE_OR_MORE_TIME 584 ONIG_META_CHAR_ANYCHAR_ANYTIME 585 586 3 code: meta character or ONIG_INEFFECTIVE_META_CHAR. 587 588 589# OnigCaseFoldType onig_get_default_case_fold_flag() 590 591 Get default case fold flag. 592 593 594# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) 595 596 Set default case fold flag. 597 598 1 case_fold_flag: case fold flag 599 600 601# unsigned int onig_get_match_stack_limit_size(void) 602 603 Return the maximum number of stack size. 604 (default: 0 == unlimited) 605 606 607# int onig_set_match_stack_limit_size(unsigned int size) 608 609 Set the maximum number of stack size. 610 (size = 0: unlimited) 611 612 normal return: ONIG_NORMAL 613 614 615# int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)) 616 617 Define new Unicode property. 618 (This function is not thread safe.) 619 620 arguments 621 1 name: property name (ASCII only. character ' ', '-', '_' are ignored.) 622 2 ranges: property code point ranges 623 (first element is number of ranges.) 624 625 [num-of-ranges, 1st-range-start, 1st-range-end, 2nd-range-start... ] 626 627 * Don't destroy the ranges after having called this function. 628 629 normal return: ONIG_NORMAL 630 631 632# unsigned int onig_get_parse_depth_limit(void) 633 634 Return the maximum depth of parser recursion. 635 (default: DEFAULT_PARSE_DEPTH_LIMIT defined in regint.h. Currently 4096.) 636 637 638# int onig_set_parse_depth_limit(unsigned int depth) 639 640 Set the maximum depth of parser recursion. 641 (depth = 0: Set to the default value defined in regint.h.) 642 643 normal return: ONIG_NORMAL 644 645 646# int onig_end(void) 647 648 The use of this library is finished. 649 650 normal return: ONIG_NORMAL 651 652 It is not allowed to use regex objects which created 653 before onig_end() call. 654 655 656# const char* onig_version(void) 657 658 Return version string. (ex. "5.0.3") 659 660// END 661