xref: /PHP-5.5/ext/mbstring/oniguruma/doc/API (revision fe92d64a)
1Oniguruma API  Version 5.9.2  2008/02/19
2
3#include <oniguruma.h>
4
5
6# int onig_init(void)
7
8  Initialize library.
9
10  You don't have to call it explicitly, because it is called in onig_new().
11
12
13# int onig_error_code_to_str(UChar* err_buf, int err_code, ...)
14
15  Get error message string.
16  If this function is used for onig_new(),
17  don't call this after the pattern argument of onig_new() is freed.
18
19  normal return: error message string length
20
21  arguments
22  1 err_buf:              error message string buffer.
23                          (required size: ONIG_MAX_ERROR_MESSAGE_LEN)
24  2 err_code:             error code returned by other API functions.
25  3 err_info (optional):  error info returned by onig_new().
26
27
28# void onig_set_warn_func(OnigWarnFunc func)
29
30  Set warning function.
31
32  WARNING:
33    '[', '-', ']' in character class without escape.
34    ']' in pattern without escape.
35
36  arguments
37  1 func:     function pointer.    void (*func)(char* warning_message)
38
39
40# void onig_set_verb_warn_func(OnigWarnFunc func)
41
42  Set verbose warning function.
43
44  WARNING:
45    redundant nested repeat operator.
46
47  arguments
48  1 func:     function pointer.    void (*func)(char* warning_message)
49
50
51# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
52            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
53            OnigErrorInfo* err_info)
54
55  Create a regex object.
56
57  normal return: ONIG_NORMAL
58
59  arguments
60  1 reg:         return regex object's address.
61  2 pattern:     regex pattern string.
62  3 pattern_end: terminate address of pattern. (pattern + pattern length)
63  4 option:      compile time options.
64
65      ONIG_OPTION_NONE               no option
66      ONIG_OPTION_SINGLELINE         '^' -> '\A', '$' -> '\Z'
67      ONIG_OPTION_MULTILINE          '.' match with newline
68      ONIG_OPTION_IGNORECASE         ambiguity match on
69      ONIG_OPTION_EXTEND             extended pattern form
70      ONIG_OPTION_FIND_LONGEST       find longest match
71      ONIG_OPTION_FIND_NOT_EMPTY     ignore empty match
72      ONIG_OPTION_NEGATE_SINGLELINE
73            clear ONIG_OPTION_SINGLELINE which is enabled on
74            ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
75            ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA
76
77      ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured.
78      ONIG_OPTION_CAPTURE_GROUP      named and no-named group captured.
79
80  5 enc:        character encoding.
81
82      ONIG_ENCODING_ASCII         ASCII
83      ONIG_ENCODING_ISO_8859_1    ISO 8859-1
84      ONIG_ENCODING_ISO_8859_2    ISO 8859-2
85      ONIG_ENCODING_ISO_8859_3    ISO 8859-3
86      ONIG_ENCODING_ISO_8859_4    ISO 8859-4
87      ONIG_ENCODING_ISO_8859_5    ISO 8859-5
88      ONIG_ENCODING_ISO_8859_6    ISO 8859-6
89      ONIG_ENCODING_ISO_8859_7    ISO 8859-7
90      ONIG_ENCODING_ISO_8859_8    ISO 8859-8
91      ONIG_ENCODING_ISO_8859_9    ISO 8859-9
92      ONIG_ENCODING_ISO_8859_10   ISO 8859-10
93      ONIG_ENCODING_ISO_8859_11   ISO 8859-11
94      ONIG_ENCODING_ISO_8859_13   ISO 8859-13
95      ONIG_ENCODING_ISO_8859_14   ISO 8859-14
96      ONIG_ENCODING_ISO_8859_15   ISO 8859-15
97      ONIG_ENCODING_ISO_8859_16   ISO 8859-16
98      ONIG_ENCODING_UTF8          UTF-8
99      ONIG_ENCODING_UTF16_BE      UTF-16BE
100      ONIG_ENCODING_UTF16_LE      UTF-16LE
101      ONIG_ENCODING_UTF32_BE      UTF-32BE
102      ONIG_ENCODING_UTF32_LE      UTF-32LE
103      ONIG_ENCODING_EUC_JP        EUC-JP
104      ONIG_ENCODING_EUC_TW        EUC-TW
105      ONIG_ENCODING_EUC_KR        EUC-KR
106      ONIG_ENCODING_EUC_CN        EUC-CN
107      ONIG_ENCODING_SJIS          Shift_JIS
108      ONIG_ENCODING_KOI8_R        KOI8-R
109      ONIG_ENCODING_CP1251        CP1251
110      ONIG_ENCODING_BIG5          Big5
111      ONIG_ENCODING_GB18030       GB18030
112
113      or any OnigEncodingType data address defined by user.
114
115  6 syntax:     address of pattern syntax definition.
116
117      ONIG_SYNTAX_ASIS              plain text
118      ONIG_SYNTAX_POSIX_BASIC       POSIX Basic RE
119      ONIG_SYNTAX_POSIX_EXTENDED    POSIX Extended RE
120      ONIG_SYNTAX_EMACS             Emacs
121      ONIG_SYNTAX_GREP              grep
122      ONIG_SYNTAX_GNU_REGEX         GNU regex
123      ONIG_SYNTAX_JAVA              Java (Sun java.util.regex)
124      ONIG_SYNTAX_PERL              Perl
125      ONIG_SYNTAX_PERL_NG           Perl + named group
126      ONIG_SYNTAX_RUBY              Ruby
127      ONIG_SYNTAX_DEFAULT           default (== Ruby)
128                                   onig_set_default_syntax()
129
130      or any OnigSyntaxType data address defined by user.
131
132  7 err_info: address for return optional error info.
133              Use this value as 3rd argument of onig_error_code_to_str().
134
135
136
137# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
138            const UChar* pattern_end,
139            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
140            OnigErrorInfo* err_info)
141
142  Create a regex object.
143  reg object area is not allocated in this function.
144
145  normal return: ONIG_NORMAL
146
147
148
149# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
150                      OnigCompileInfo* ci, OnigErrorInfo* einfo)
151
152  Create a regex object.
153  This function is deluxe version of onig_new().
154
155  normal return: ONIG_NORMAL
156
157  arguments
158  1 reg:         return address of regex object.
159  2 pattern:     regex pattern string.
160  3 pattern_end: terminate address of pattern. (pattern + pattern length)
161  4 ci:          compile time info.
162
163    ci->num_of_elements: number of elements in ci. (current version: 5)
164    ci->pattern_enc:     pattern string character encoding.
165    ci->target_enc:      target string character encoding.
166    ci->syntax:          address of pattern syntax definition.
167    ci->option:          compile time option.
168    ci->case_fold_flag:  character matching case fold bit flag for
169                         ONIG_OPTION_IGNORECASE mode.
170
171       ONIGENC_CASE_FOLD_MIN:           minimum
172       ONIGENC_CASE_FOLD_DEFAULT:       minimum
173                                        onig_set_default_case_fold_flag()
174
175  5 err_info:    address for return optional error info.
176                 Use this value as 3rd argument of onig_error_code_to_str().
177
178
179  Different character encoding combination is allowed for
180  the following cases only.
181
182    pattern_enc: ASCII, ISO_8859_1
183    target_enc:  UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
184
185    pattern_enc: UTF16_BE/LE
186    target_enc:  UTF16_LE/BE
187
188    pattern_enc: UTF32_BE/LE
189    target_enc:  UTF32_LE/BE
190
191
192# void onig_free(regex_t* reg)
193
194  Free memory used by regex object.
195
196  arguments
197  1 reg: regex object.
198
199
200# void onig_free_body(regex_t* reg)
201
202  Free memory used by regex object. (Except reg oneself.)
203
204  arguments
205  1 reg: regex object.
206
207
208# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
209                   const UChar* range, OnigRegion* region, OnigOptionType option)
210
211  Search string and return search result and matching region.
212
213  normal return: match position offset (i.e.  p - str >= 0)
214  not found:     ONIG_MISMATCH (< 0)
215
216  arguments
217  1 reg:    regex object
218  2 str:    target string
219  3 end:    terminate address of target string
220  4 start:  search start address of target string
221  5 range:  search terminate address of target string
222    in forward search  (start <= searched string < range)
223    in backward search (range <= searched string <= start)
224  6 region: address for return group match range info (NULL is allowed)
225  7 option: search time option
226
227    ONIG_OPTION_NOTBOL        string head(str) isn't considered as begin of line
228    ONIG_OPTION_NOTEOL        string end (end) isn't considered as end of line
229    ONIG_OPTION_POSIX_REGION  region argument is regmatch_t[] of POSIX API.
230
231
232# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
233		 OnigRegion* region, OnigOptionType option)
234
235  Match string and return result and matching region.
236
237  normal return: match length  (>= 0)
238  not match:     ONIG_MISMATCH ( < 0)
239
240  arguments
241  1 reg:    regex object
242  2 str:    target string
243  3 end:    terminate address of target string
244  4 at:     match address of target string
245  5 region: address for return group match range info (NULL is allowed)
246  6 option: search time option
247
248    ONIG_OPTION_NOTBOL       string head(str) isn't considered as begin of line
249    ONIG_OPTION_NOTEOL       string end (end) isn't considered as end of line
250    ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API.
251
252
253# OnigRegion* onig_region_new(void)
254
255  Create a region.
256
257
258# void onig_region_free(OnigRegion* region, int free_self)
259
260  Free memory used by region.
261
262  arguments
263  1 region:    target region
264  2 free_self: [1: free all, 0: free memory used in region but not self]
265
266
267# void onig_region_copy(OnigRegion* to, OnigRegion* from)
268
269  Copy contents of region.
270
271  arguments
272  1 to:   target region
273  2 from: source region
274
275
276# void onig_region_clear(OnigRegion* region)
277
278  Clear contents of region.
279
280  arguments
281  1 region: target region
282
283
284# int onig_region_resize(OnigRegion* region, int n)
285
286  Resize group range area of region.
287
288  normal return: ONIG_NORMAL
289
290  arguments
291  1 region: target region
292  2 n:      new size
293
294
295# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
296                                  int** num_list)
297
298  Return the group number list of the name.
299  Named subexp is defined by (?<name>....).
300
301  normal return:  number of groups for the name.
302                  (ex. /(?<x>..)(?<x>..)/  ==>  2)
303  name not found: -1
304
305  arguments
306  1 reg:       regex object.
307  2 name:      group name.
308  3 name_end:  terminate address of group name.
309  4 num_list:  return list of group number.
310
311
312# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
313                                  OnigRegion *region)
314
315  Return the group number corresponding to the named backref (\k<name>).
316  If two or more regions for the groups of the name are effective,
317  the greatest number in it is obtained.
318
319  normal return: group number.
320
321  arguments
322  1 reg:      regex object.
323  2 name:     group name.
324  3 name_end: terminate address of group name.
325  4 region:   search/match result region.
326
327
328# int onig_foreach_name(regex_t* reg,
329                        int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
330                        void* arg)
331
332  Iterate function call for all names.
333
334  normal return: 0
335  error:         func's return value.
336
337  arguments
338  1 reg:     regex object.
339  2 func:    callback function.
340             func(name, name_end, <number of groups>, <group number's list>,
341                  reg, arg);
342             if func does not return 0, then iteration is stopped.
343  3 arg:     argument for func.
344
345
346# int onig_number_of_names(regex_t* reg)
347
348  Return the number of names defined in the pattern.
349  Multiple definitions of one name is counted as one.
350
351  arguments
352  1 reg:     regex object.
353
354
355# OnigEncoding     onig_get_encoding(regex_t* reg)
356# OnigOptionType   onig_get_options(regex_t* reg)
357# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
358# OnigSyntaxType*  onig_get_syntax(regex_t* reg)
359
360  Return a value of the regex object.
361
362  arguments
363  1 reg:     regex object.
364
365
366# int onig_number_of_captures(regex_t* reg)
367
368  Return the number of capture group in the pattern.
369
370  arguments
371  1 reg:     regex object.
372
373
374# int onig_number_of_capture_histories(regex_t* reg)
375
376  Return the number of capture history defined in the pattern.
377
378  You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
379  is disabled in the pattern syntax.(disabled in the default syntax)
380
381  arguments
382  1 reg:     regex object.
383
384
385
386# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
387
388  Return the root node of capture history data tree.
389
390  This value is undefined if matching has faild.
391
392  arguments
393  1 region: matching result.
394
395
396# int onig_capture_tree_traverse(OnigRegion* region, int at,
397                  int(*func)(int,int,int,int,int,void*), void* arg)
398
399 Traverse and callback in capture history data tree.
400
401  normal return: 0
402  error:         callback func's return value.
403
404  arguments
405  1 region:  match region data.
406  2 at:      callback position.
407
408    ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse childs.
409    ONIG_TRAVERSE_CALLBACK_AT_LAST:  traverse childs first, then callback.
410    ONIG_TRAVERSE_CALLBACK_AT_BOTH:  callback first, then traverse childs,
411                                     and at last callback again.
412
413  3 func:    callback function.
414             if func does not return 0, then traverse is stopped.
415
416             int func(int group, int beg, int end, int level, int at,
417                      void* arg)
418
419               group: group number
420               beg:   capture start position
421               end:   capture end position
422               level: nest level (from 0)
423               at:    callback position
424                      ONIG_TRAVERSE_CALLBACK_AT_FIRST
425                      ONIG_TRAVERSE_CALLBACK_AT_LAST
426               arg:   optional callback argument
427
428  4 arg;     optional callback argument.
429
430
431# int onig_noname_group_capture_is_active(regex_t* reg)
432
433  Return noname group capture activity.
434
435  active:   1
436  inactive: 0
437
438  arguments
439  1 reg:     regex object.
440
441  if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON
442    --> inactive
443
444  if the regex pattern have named group
445     and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON
446     and option ONIG_OPTION_CAPTURE_GROUP == OFF
447    --> inactive
448
449  else --> active
450
451
452# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
453
454  Return previous character head address.
455
456  arguments
457  1 enc:   character encoding
458  2 start: string address
459  3 s:     target address of string
460
461
462# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
463                                           const UChar* start, const UChar* s)
464
465  Return left-adjusted head address of a character.
466
467  arguments
468  1 enc:   character encoding
469  2 start: string address
470  3 s:     target address of string
471
472
473# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
474                                            const UChar* start, const UChar* s)
475
476  Return right-adjusted head address of a character.
477
478  arguments
479  1 enc:   character encoding
480  2 start: string address
481  3 s:     target address of string
482
483
484# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
485# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
486
487  Return number of characters in the string.
488
489
490# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
491
492  Return number of bytes in the string.
493
494
495# int onig_set_default_syntax(OnigSyntaxType* syntax)
496
497  Set default syntax.
498
499  arguments
500  1 syntax: address of pattern syntax definition.
501
502
503# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
504
505  Copy syntax.
506
507  arguments
508  1 to:   destination address.
509  2 from: source address.
510
511
512# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
513# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
514# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
515# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
516
517# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
518# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
519# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
520# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
521
522 Get/Set elements of the syntax.
523
524  arguments
525  1 syntax:  syntax
526  2 op, op2, behavior, options: value of element.
527
528
529# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
530
531  Copy encoding.
532
533  arguments
534  1 to:   destination address.
535  2 from: source address.
536
537
538# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
539                         OnigCodePoint code)
540
541  Set a variable meta character to the code point value.
542  Except for an escape character, this meta characters specification
543  is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective
544  by the syntax. (Build-in syntaxes are not effective.)
545
546  normal return: ONIG_NORMAL
547
548  arguments
549  1 syntax: target syntax
550  2 what:   specifies which meta character it is.
551
552          ONIG_META_CHAR_ESCAPE
553          ONIG_META_CHAR_ANYCHAR
554          ONIG_META_CHAR_ANYTIME
555          ONIG_META_CHAR_ZERO_OR_ONE_TIME
556          ONIG_META_CHAR_ONE_OR_MORE_TIME
557          ONIG_META_CHAR_ANYCHAR_ANYTIME
558
559  3 code: meta character or ONIG_INEFFECTIVE_META_CHAR.
560
561
562# OnigCaseFoldType onig_get_default_case_fold_flag()
563
564  Get default case fold flag.
565
566
567# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
568
569  Set default case fold flag.
570
571  1 case_fold_flag: case fold flag
572
573
574# unsigned int onig_get_match_stack_limit_size(void)
575
576  Return the maximum number of stack size.
577  (default: 0 == unlimited)
578
579
580# int onig_set_match_stack_limit_size(unsigned int size)
581
582  Set the maximum number of stack size.
583  (size = 0: unlimited)
584
585  normal return: ONIG_NORMAL
586
587
588# int onig_end(void)
589
590  The use of this library is finished.
591
592  normal return: ONIG_NORMAL
593
594  It is not allowed to use regex objects which created
595  before onig_end() call.
596
597
598# const char* onig_version(void)
599
600  Return version string.  (ex. "5.0.3")
601
602// END
603