xref: /PHP-5.3/ext/mbstring/oniguruma/regsyntax.c (revision 7aab46a2)
1 /**********************************************************************
2   regsyntax.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
32 OnigSyntaxType OnigSyntaxASIS = {
33     0
34   , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
35   , 0
36   , ONIG_OPTION_NONE
37 };
38 
39 OnigSyntaxType OnigSyntaxPosixBasic = {
40   ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
41     ONIG_SYN_OP_ESC_BRACE_INTERVAL )
42   , 0
43   , 0
44   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
45 };
46 
47 OnigSyntaxType OnigSyntaxPosixExtended = {
48   ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
49     ONIG_SYN_OP_BRACE_INTERVAL |
50     ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
51   , 0
52   , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
53       ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
54       ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
55       ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
56   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
57 };
58 
59 OnigSyntaxType OnigSyntaxEmacs = {
60   ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
61     ONIG_SYN_OP_ESC_BRACE_INTERVAL |
62     ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
63     ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
64     ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
65     ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
66   , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
67   , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
68   , ONIG_OPTION_NONE
69 };
70 
71 OnigSyntaxType OnigSyntaxGrep = {
72   ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
73     ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
74     ONIG_SYN_OP_ESC_VBAR_ALT |
75     ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
76     ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
77     ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
78     ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
79   , 0
80   , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
81   , ONIG_OPTION_NONE
82 };
83 
84 OnigSyntaxType OnigSyntaxGnuRegex = {
85   SYN_GNU_REGEX_OP
86   , 0
87   , SYN_GNU_REGEX_BV
88   , ONIG_OPTION_NONE
89 };
90 
91 OnigSyntaxType OnigSyntaxJava = {
92   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
93      ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
94      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
95    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
96   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
97       ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
98       ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
99       ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
100       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
101   , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
102   , ONIG_OPTION_SINGLELINE
103 };
104 
105 OnigSyntaxType OnigSyntaxPerl = {
106   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
107      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
108      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
109      ONIG_SYN_OP_ESC_C_CONTROL )
110    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
111   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
112       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
113       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
114       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
115       ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
116   , SYN_GNU_REGEX_BV
117   , ONIG_OPTION_SINGLELINE
118 };
119 
120 /* Perl + named group */
121 OnigSyntaxType OnigSyntaxPerl_NG = {
122   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
123      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
124      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
125      ONIG_SYN_OP_ESC_C_CONTROL )
126    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
127   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
128       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
129       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
130       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
131       ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS    |
132       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       |
133       ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        |
134       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
135   , ( SYN_GNU_REGEX_BV |
136       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
137       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
138   , ONIG_OPTION_SINGLELINE
139 };
140 
141 
142 
143 extern int
onig_set_default_syntax(OnigSyntaxType * syntax)144 onig_set_default_syntax(OnigSyntaxType* syntax)
145 {
146   if (IS_NULL(syntax))
147     syntax = ONIG_SYNTAX_RUBY;
148 
149   OnigDefaultSyntax = syntax;
150   return 0;
151 }
152 
153 extern void
onig_copy_syntax(OnigSyntaxType * to,OnigSyntaxType * from)154 onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
155 {
156   *to = *from;
157 }
158 
159 extern void
onig_set_syntax_op(OnigSyntaxType * syntax,unsigned int op)160 onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
161 {
162   syntax->op = op;
163 }
164 
165 extern void
onig_set_syntax_op2(OnigSyntaxType * syntax,unsigned int op2)166 onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
167 {
168   syntax->op2 = op2;
169 }
170 
171 extern void
onig_set_syntax_behavior(OnigSyntaxType * syntax,unsigned int behavior)172 onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
173 {
174   syntax->behavior = behavior;
175 }
176 
177 extern void
onig_set_syntax_options(OnigSyntaxType * syntax,OnigOptionType options)178 onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
179 {
180   syntax->options = options;
181 }
182 
183 extern unsigned int
onig_get_syntax_op(OnigSyntaxType * syntax)184 onig_get_syntax_op(OnigSyntaxType* syntax)
185 {
186   return syntax->op;
187 }
188 
189 extern unsigned int
onig_get_syntax_op2(OnigSyntaxType * syntax)190 onig_get_syntax_op2(OnigSyntaxType* syntax)
191 {
192   return syntax->op2;
193 }
194 
195 extern unsigned int
onig_get_syntax_behavior(OnigSyntaxType * syntax)196 onig_get_syntax_behavior(OnigSyntaxType* syntax)
197 {
198   return syntax->behavior;
199 }
200 
201 extern OnigOptionType
onig_get_syntax_options(OnigSyntaxType * syntax)202 onig_get_syntax_options(OnigSyntaxType* syntax)
203 {
204   return syntax->options;
205 }
206 
207 #ifdef USE_VARIABLE_META_CHARS
onig_set_meta_char(OnigEncoding enc,unsigned int what,OnigCodePoint code)208 extern int onig_set_meta_char(OnigEncoding enc,
209                               unsigned int what, OnigCodePoint code)
210 {
211   switch (what) {
212   case ONIG_META_CHAR_ESCAPE:
213     enc->meta_char_table.esc = code;
214     break;
215   case ONIG_META_CHAR_ANYCHAR:
216     enc->meta_char_table.anychar = code;
217     break;
218   case ONIG_META_CHAR_ANYTIME:
219     enc->meta_char_table.anytime = code;
220     break;
221   case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
222     enc->meta_char_table.zero_or_one_time = code;
223     break;
224   case ONIG_META_CHAR_ONE_OR_MORE_TIME:
225     enc->meta_char_table.one_or_more_time = code;
226     break;
227   case ONIG_META_CHAR_ANYCHAR_ANYTIME:
228     enc->meta_char_table.anychar_anytime = code;
229     break;
230   default:
231     return ONIGERR_INVALID_ARGUMENT;
232     break;
233   }
234   return 0;
235 }
236 #endif /* USE_VARIABLE_META_CHARS */
237