xref: /PHP-7.2/ext/mbstring/oniguruma/doc/API (revision 0ae2f95b)
1Oniguruma API  Version 6.1.0  2016/08/22
2
3#include <oniguruma.h>
4
5
6# int onig_initialize(OnigEncoding use_encodings[], int num_encodings)
7
8  Initialize library.
9
10  You have to call it explicitly.
11
12  * onig_init() is deprecated.
13
14  arguments
15  1 use_encodings:         array of encodings used in application.
16  2 num_encodings:         number of encodings.
17
18
19# int onig_error_code_to_str(UChar* err_buf, int err_code, ...)
20
21  Get error message string.
22  If this function is used for onig_new(),
23  don't call this after the pattern argument of onig_new() is freed.
24
25  normal return: error message string length
26
27  arguments
28  1 err_buf:              error message string buffer.
29                          (required size: ONIG_MAX_ERROR_MESSAGE_LEN)
30  2 err_code:             error code returned by other API functions.
31  3 err_info (optional):  error info returned by onig_new().
32
33
34# void onig_set_warn_func(OnigWarnFunc func)
35
36  Set warning function.
37
38  WARNING:
39    '[', '-', ']' in character class without escape.
40    ']' in pattern without escape.
41
42  arguments
43  1 func:     function pointer.    void (*func)(char* warning_message)
44
45
46# void onig_set_verb_warn_func(OnigWarnFunc func)
47
48  Set verbose warning function.
49
50  WARNING:
51    redundant nested repeat operator.
52
53  arguments
54  1 func:     function pointer.    void (*func)(char* warning_message)
55
56
57# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
58            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
59            OnigErrorInfo* err_info)
60
61  Create a regex object.
62
63  normal return: ONIG_NORMAL
64
65  arguments
66  1 reg:         return regex object's address.
67  2 pattern:     regex pattern string.
68  3 pattern_end: terminate address of pattern. (pattern + pattern length)
69  4 option:      compile time options.
70
71      ONIG_OPTION_NONE               no option
72      ONIG_OPTION_SINGLELINE         '^' -> '\A', '$' -> '\Z'
73      ONIG_OPTION_MULTILINE          '.' match with newline
74      ONIG_OPTION_IGNORECASE         ambiguity match on
75      ONIG_OPTION_EXTEND             extended pattern form
76      ONIG_OPTION_FIND_LONGEST       find longest match
77      ONIG_OPTION_FIND_NOT_EMPTY     ignore empty match
78      ONIG_OPTION_NEGATE_SINGLELINE
79            clear ONIG_OPTION_SINGLELINE which is enabled on
80            ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
81            ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA
82
83      ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured.
84      ONIG_OPTION_CAPTURE_GROUP      named and no-named group captured.
85
86  5 enc:        character encoding.
87
88      ONIG_ENCODING_ASCII         ASCII
89      ONIG_ENCODING_ISO_8859_1    ISO 8859-1
90      ONIG_ENCODING_ISO_8859_2    ISO 8859-2
91      ONIG_ENCODING_ISO_8859_3    ISO 8859-3
92      ONIG_ENCODING_ISO_8859_4    ISO 8859-4
93      ONIG_ENCODING_ISO_8859_5    ISO 8859-5
94      ONIG_ENCODING_ISO_8859_6    ISO 8859-6
95      ONIG_ENCODING_ISO_8859_7    ISO 8859-7
96      ONIG_ENCODING_ISO_8859_8    ISO 8859-8
97      ONIG_ENCODING_ISO_8859_9    ISO 8859-9
98      ONIG_ENCODING_ISO_8859_10   ISO 8859-10
99      ONIG_ENCODING_ISO_8859_11   ISO 8859-11
100      ONIG_ENCODING_ISO_8859_13   ISO 8859-13
101      ONIG_ENCODING_ISO_8859_14   ISO 8859-14
102      ONIG_ENCODING_ISO_8859_15   ISO 8859-15
103      ONIG_ENCODING_ISO_8859_16   ISO 8859-16
104      ONIG_ENCODING_UTF8          UTF-8
105      ONIG_ENCODING_UTF16_BE      UTF-16BE
106      ONIG_ENCODING_UTF16_LE      UTF-16LE
107      ONIG_ENCODING_UTF32_BE      UTF-32BE
108      ONIG_ENCODING_UTF32_LE      UTF-32LE
109      ONIG_ENCODING_EUC_JP        EUC-JP
110      ONIG_ENCODING_EUC_TW        EUC-TW
111      ONIG_ENCODING_EUC_KR        EUC-KR
112      ONIG_ENCODING_EUC_CN        EUC-CN
113      ONIG_ENCODING_SJIS          Shift_JIS
114      ONIG_ENCODING_KOI8_R        KOI8-R
115      ONIG_ENCODING_CP1251        CP1251
116      ONIG_ENCODING_BIG5          Big5
117      ONIG_ENCODING_GB18030       GB18030
118
119      or any OnigEncodingType data address defined by user.
120
121  6 syntax:     address of pattern syntax definition.
122
123      ONIG_SYNTAX_ASIS              plain text
124      ONIG_SYNTAX_POSIX_BASIC       POSIX Basic RE
125      ONIG_SYNTAX_POSIX_EXTENDED    POSIX Extended RE
126      ONIG_SYNTAX_EMACS             Emacs
127      ONIG_SYNTAX_GREP              grep
128      ONIG_SYNTAX_GNU_REGEX         GNU regex
129      ONIG_SYNTAX_JAVA              Java (Sun java.util.regex)
130      ONIG_SYNTAX_PERL              Perl
131      ONIG_SYNTAX_PERL_NG           Perl + named group
132      ONIG_SYNTAX_RUBY              Ruby
133      ONIG_SYNTAX_DEFAULT           default (== Ruby)
134                                   onig_set_default_syntax()
135
136      or any OnigSyntaxType data address defined by user.
137
138  7 err_info: address for return optional error info.
139              Use this value as 3rd argument of onig_error_code_to_str().
140
141
142
143# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
144            const UChar* pattern_end,
145            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
146            OnigErrorInfo* err_info)
147
148  Create a regex object.
149  reg object area is not allocated in this function.
150
151  normal return: ONIG_NORMAL
152
153
154
155# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
156                      OnigCompileInfo* ci, OnigErrorInfo* einfo)
157
158  Create a regex object.
159  This function is deluxe version of onig_new().
160
161  normal return: ONIG_NORMAL
162
163  arguments
164  1 reg:         return address of regex object.
165  2 pattern:     regex pattern string.
166  3 pattern_end: terminate address of pattern. (pattern + pattern length)
167  4 ci:          compile time info.
168
169    ci->num_of_elements: number of elements in ci. (current version: 5)
170    ci->pattern_enc:     pattern string character encoding.
171    ci->target_enc:      target string character encoding.
172    ci->syntax:          address of pattern syntax definition.
173    ci->option:          compile time option.
174    ci->case_fold_flag:  character matching case fold bit flag for
175                         ONIG_OPTION_IGNORECASE mode.
176
177       ONIGENC_CASE_FOLD_MIN:           minimum
178       ONIGENC_CASE_FOLD_DEFAULT:       minimum
179                                        onig_set_default_case_fold_flag()
180
181  5 err_info:    address for return optional error info.
182                 Use this value as 3rd argument of onig_error_code_to_str().
183
184
185  Different character encoding combination is allowed for
186  the following cases only.
187
188    pattern_enc: ASCII, ISO_8859_1
189    target_enc:  UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
190
191    pattern_enc: UTF16_BE/LE
192    target_enc:  UTF16_LE/BE
193
194    pattern_enc: UTF32_BE/LE
195    target_enc:  UTF32_LE/BE
196
197
198# void onig_free(regex_t* reg)
199
200  Free memory used by regex object.
201
202  arguments
203  1 reg: regex object.
204
205
206# void onig_free_body(regex_t* reg)
207
208  Free memory used by regex object. (Except reg oneself.)
209
210  arguments
211  1 reg: regex object.
212
213
214# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
215                   const UChar* range, OnigRegion* region, OnigOptionType option)
216
217  Search string and return search result and matching region.
218
219  normal return: match position offset (i.e.  p - str >= 0)
220  not found:     ONIG_MISMATCH (< 0)
221
222  arguments
223  1 reg:    regex object
224  2 str:    target string
225  3 end:    terminate address of target string
226  4 start:  search start address of target string
227  5 range:  search terminate address of target string
228    in forward search  (start <= searched string < range)
229    in backward search (range <= searched string <= start)
230  6 region: address for return group match range info (NULL is allowed)
231  7 option: search time option
232
233    ONIG_OPTION_NOTBOL        string head(str) isn't considered as begin of line
234    ONIG_OPTION_NOTEOL        string end (end) isn't considered as end of line
235    ONIG_OPTION_POSIX_REGION  region argument is regmatch_t[] of POSIX API.
236
237
238# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
239		 OnigRegion* region, OnigOptionType option)
240
241  Match string and return result and matching region.
242
243  normal return: match length  (>= 0)
244  not match:     ONIG_MISMATCH ( < 0)
245
246  arguments
247  1 reg:    regex object
248  2 str:    target string
249  3 end:    terminate address of target string
250  4 at:     match address of target string
251  5 region: address for return group match range info (NULL is allowed)
252  6 option: search time option
253
254    ONIG_OPTION_NOTBOL       string head(str) isn't considered as begin of line
255    ONIG_OPTION_NOTEOL       string end (end) isn't considered as end of line
256    ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API.
257
258
259# int onig_scan(regex_t* reg, const UChar* str, const UChar* end,
260                OnigRegion* region, OnigOptionType option,
261                int (*scan_callback)(int, int, OnigRegion*, void*),
262                void* callback_arg)
263
264  Scan string and callback with matching region.
265
266  normal return: number of matching times
267  error:         error code
268  interruption:  return value of callback function (!= 0)
269
270  arguments
271  1 reg:    regex object
272  2 str:    target string
273  3 end:    terminate address of target string
274  4 region: address for return group match range info (NULL is allowed)
275  5 option: search time option
276  6 scan_callback: callback function (defined by user)
277  7 callback_arg:  optional argument passed to callback
278
279
280# OnigRegion* onig_region_new(void)
281
282  Create a region.
283
284
285# void onig_region_free(OnigRegion* region, int free_self)
286
287  Free memory used by region.
288
289  arguments
290  1 region:    target region
291  2 free_self: [1: free all, 0: free memory used in region but not self]
292
293
294# void onig_region_copy(OnigRegion* to, OnigRegion* from)
295
296  Copy contents of region.
297
298  arguments
299  1 to:   target region
300  2 from: source region
301
302
303# void onig_region_clear(OnigRegion* region)
304
305  Clear contents of region.
306
307  arguments
308  1 region: target region
309
310
311# int onig_region_resize(OnigRegion* region, int n)
312
313  Resize group range area of region.
314
315  normal return: ONIG_NORMAL
316
317  arguments
318  1 region: target region
319  2 n:      new size
320
321
322# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
323                                  int** num_list)
324
325  Return the group number list of the name.
326  Named subexp is defined by (?<name>....).
327
328  normal return:  number of groups for the name.
329                  (ex. /(?<x>..)(?<x>..)/  ==>  2)
330  name not found: -1
331
332  arguments
333  1 reg:       regex object.
334  2 name:      group name.
335  3 name_end:  terminate address of group name.
336  4 num_list:  return list of group number.
337
338
339# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
340                                  OnigRegion *region)
341
342  Return the group number corresponding to the named backref (\k<name>).
343  If two or more regions for the groups of the name are effective,
344  the greatest number in it is obtained.
345
346  normal return: group number.
347
348  arguments
349  1 reg:      regex object.
350  2 name:     group name.
351  3 name_end: terminate address of group name.
352  4 region:   search/match result region.
353
354
355# int onig_foreach_name(regex_t* reg,
356                        int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
357                        void* arg)
358
359  Iterate function call for all names.
360
361  normal return: 0
362  error:         func's return value.
363
364  arguments
365  1 reg:     regex object.
366  2 func:    callback function.
367             func(name, name_end, <number of groups>, <group number's list>,
368                  reg, arg);
369             if func does not return 0, then iteration is stopped.
370  3 arg:     argument for func.
371
372
373# int onig_number_of_names(regex_t* reg)
374
375  Return the number of names defined in the pattern.
376  Multiple definitions of one name is counted as one.
377
378  arguments
379  1 reg:     regex object.
380
381
382# OnigEncoding     onig_get_encoding(regex_t* reg)
383# OnigOptionType   onig_get_options(regex_t* reg)
384# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
385# OnigSyntaxType*  onig_get_syntax(regex_t* reg)
386
387  Return a value of the regex object.
388
389  arguments
390  1 reg:     regex object.
391
392
393# int onig_number_of_captures(regex_t* reg)
394
395  Return the number of capture group in the pattern.
396
397  arguments
398  1 reg:     regex object.
399
400
401# int onig_number_of_capture_histories(regex_t* reg)
402
403  Return the number of capture history defined in the pattern.
404
405  You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
406  is disabled in the pattern syntax.(disabled in the default syntax)
407
408  arguments
409  1 reg:     regex object.
410
411
412
413# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
414
415  Return the root node of capture history data tree.
416
417  This value is undefined if matching has faild.
418
419  arguments
420  1 region: matching result.
421
422
423# int onig_capture_tree_traverse(OnigRegion* region, int at,
424                  int(*func)(int,int,int,int,int,void*), void* arg)
425
426 Traverse and callback in capture history data tree.
427
428  normal return: 0
429  error:         callback func's return value.
430
431  arguments
432  1 region:  match region data.
433  2 at:      callback position.
434
435    ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse children.
436    ONIG_TRAVERSE_CALLBACK_AT_LAST:  traverse children first, then callback.
437    ONIG_TRAVERSE_CALLBACK_AT_BOTH:  callback first, then traverse children,
438                                     and at last callback again.
439
440  3 func:    callback function.
441             if func does not return 0, then traverse is stopped.
442
443             int func(int group, int beg, int end, int level, int at,
444                      void* arg)
445
446               group: group number
447               beg:   capture start position
448               end:   capture end position
449               level: nest level (from 0)
450               at:    callback position
451                      ONIG_TRAVERSE_CALLBACK_AT_FIRST
452                      ONIG_TRAVERSE_CALLBACK_AT_LAST
453               arg:   optional callback argument
454
455  4 arg;     optional callback argument.
456
457
458# int onig_noname_group_capture_is_active(regex_t* reg)
459
460  Return noname group capture activity.
461
462  active:   1
463  inactive: 0
464
465  arguments
466  1 reg:     regex object.
467
468  if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON
469    --> inactive
470
471  if the regex pattern have named group
472     and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON
473     and option ONIG_OPTION_CAPTURE_GROUP == OFF
474    --> inactive
475
476  else --> active
477
478
479# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
480
481  Return previous character head address.
482
483  arguments
484  1 enc:   character encoding
485  2 start: string address
486  3 s:     target address of string
487
488
489# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
490                                           const UChar* start, const UChar* s)
491
492  Return left-adjusted head address of a character.
493
494  arguments
495  1 enc:   character encoding
496  2 start: string address
497  3 s:     target address of string
498
499
500# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
501                                            const UChar* start, const UChar* s)
502
503  Return right-adjusted head address of a character.
504
505  arguments
506  1 enc:   character encoding
507  2 start: string address
508  3 s:     target address of string
509
510
511# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
512# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
513
514  Return number of characters in the string.
515
516
517# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
518
519  Return number of bytes in the string.
520
521
522# int onig_set_default_syntax(OnigSyntaxType* syntax)
523
524  Set default syntax.
525
526  arguments
527  1 syntax: address of pattern syntax definition.
528
529
530# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
531
532  Copy syntax.
533
534  arguments
535  1 to:   destination address.
536  2 from: source address.
537
538
539# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
540# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
541# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
542# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
543
544# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
545# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
546# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
547# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
548
549 Get/Set elements of the syntax.
550
551  arguments
552  1 syntax:  syntax
553  2 op, op2, behavior, options: value of element.
554
555
556# void onig_copy_encoding(OnigEncoding to, OnigEncoding from)
557
558  Copy encoding.
559
560  arguments
561  1 to:   destination address.
562  2 from: source address.
563
564
565# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
566                         OnigCodePoint code)
567
568  Set a variable meta character to the code point value.
569  Except for an escape character, this meta characters specification
570  is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective
571  by the syntax. (Build-in syntaxes are not effective.)
572
573  normal return: ONIG_NORMAL
574
575  arguments
576  1 syntax: target syntax
577  2 what:   specifies which meta character it is.
578
579          ONIG_META_CHAR_ESCAPE
580          ONIG_META_CHAR_ANYCHAR
581          ONIG_META_CHAR_ANYTIME
582          ONIG_META_CHAR_ZERO_OR_ONE_TIME
583          ONIG_META_CHAR_ONE_OR_MORE_TIME
584          ONIG_META_CHAR_ANYCHAR_ANYTIME
585
586  3 code: meta character or ONIG_INEFFECTIVE_META_CHAR.
587
588
589# OnigCaseFoldType onig_get_default_case_fold_flag()
590
591  Get default case fold flag.
592
593
594# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
595
596  Set default case fold flag.
597
598  1 case_fold_flag: case fold flag
599
600
601# unsigned int onig_get_match_stack_limit_size(void)
602
603  Return the maximum number of stack size.
604  (default: 0 == unlimited)
605
606
607# int onig_set_match_stack_limit_size(unsigned int size)
608
609  Set the maximum number of stack size.
610  (size = 0: unlimited)
611
612  normal return: ONIG_NORMAL
613
614
615# int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges))
616
617  Define new Unicode property.
618  (This function is not thread safe.)
619
620  arguments
621  1 name:    property name (ASCII only. character ' ', '-', '_' are ignored.)
622  2 ranges:  property code point ranges
623             (first element is number of ranges.)
624
625    [num-of-ranges, 1st-range-start, 1st-range-end, 2nd-range-start... ]
626
627    * Don't destroy the ranges after having called this function.
628
629  normal return: ONIG_NORMAL
630
631
632# unsigned int onig_get_parse_depth_limit(void)
633
634  Return the maximum depth of parser recursion.
635  (default: DEFAULT_PARSE_DEPTH_LIMIT defined in regint.h. Currently 4096.)
636
637
638# int onig_set_parse_depth_limit(unsigned int depth)
639
640  Set the maximum depth of parser recursion.
641  (depth = 0: Set to the default value defined in regint.h.)
642
643  normal return: ONIG_NORMAL
644
645
646# int onig_end(void)
647
648  The use of this library is finished.
649
650  normal return: ONIG_NORMAL
651
652  It is not allowed to use regex objects which created
653  before onig_end() call.
654
655
656# const char* onig_version(void)
657
658  Return version string.  (ex. "5.0.3")
659
660// END
661