1 /**********************************************************************
2 regsyntax.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 OnigSyntaxType OnigSyntaxASIS = {
33 0
34 , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
35 , 0
36 , ONIG_OPTION_NONE
37 };
38
39 OnigSyntaxType OnigSyntaxPosixBasic = {
40 ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
41 ONIG_SYN_OP_ESC_BRACE_INTERVAL )
42 , 0
43 , 0
44 , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
45 };
46
47 OnigSyntaxType OnigSyntaxPosixExtended = {
48 ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
49 ONIG_SYN_OP_BRACE_INTERVAL |
50 ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
51 , 0
52 , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
53 ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
54 ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
55 ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
56 , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
57 };
58
59 OnigSyntaxType OnigSyntaxEmacs = {
60 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
61 ONIG_SYN_OP_ESC_BRACE_INTERVAL |
62 ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
63 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
64 ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
65 ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
66 , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
67 , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
68 , ONIG_OPTION_NONE
69 };
70
71 OnigSyntaxType OnigSyntaxGrep = {
72 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
73 ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
74 ONIG_SYN_OP_ESC_VBAR_ALT |
75 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
76 ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
77 ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
78 ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
79 , 0
80 , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
81 , ONIG_OPTION_NONE
82 };
83
84 OnigSyntaxType OnigSyntaxGnuRegex = {
85 SYN_GNU_REGEX_OP
86 , 0
87 , SYN_GNU_REGEX_BV
88 , ONIG_OPTION_NONE
89 };
90
91 OnigSyntaxType OnigSyntaxJava = {
92 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
93 ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
94 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
95 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
96 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
97 ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
98 ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
99 ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
100 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
101 , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
102 , ONIG_OPTION_SINGLELINE
103 };
104
105 OnigSyntaxType OnigSyntaxPerl = {
106 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
107 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
108 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
109 ONIG_SYN_OP_ESC_C_CONTROL )
110 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
111 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
112 ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
113 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
114 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
115 ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
116 , SYN_GNU_REGEX_BV
117 , ONIG_OPTION_SINGLELINE
118 };
119
120 /* Perl + named group */
121 OnigSyntaxType OnigSyntaxPerl_NG = {
122 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
123 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
124 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
125 ONIG_SYN_OP_ESC_C_CONTROL )
126 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
127 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
128 ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
129 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
130 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
131 ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS |
132 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
133 ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
134 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
135 , ( SYN_GNU_REGEX_BV |
136 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
137 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
138 , ONIG_OPTION_SINGLELINE
139 };
140
141
142
143 extern int
onig_set_default_syntax(OnigSyntaxType * syntax)144 onig_set_default_syntax(OnigSyntaxType* syntax)
145 {
146 if (IS_NULL(syntax))
147 syntax = ONIG_SYNTAX_RUBY;
148
149 OnigDefaultSyntax = syntax;
150 return 0;
151 }
152
153 extern void
onig_copy_syntax(OnigSyntaxType * to,OnigSyntaxType * from)154 onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
155 {
156 *to = *from;
157 }
158
159 extern void
onig_set_syntax_op(OnigSyntaxType * syntax,unsigned int op)160 onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
161 {
162 syntax->op = op;
163 }
164
165 extern void
onig_set_syntax_op2(OnigSyntaxType * syntax,unsigned int op2)166 onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
167 {
168 syntax->op2 = op2;
169 }
170
171 extern void
onig_set_syntax_behavior(OnigSyntaxType * syntax,unsigned int behavior)172 onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
173 {
174 syntax->behavior = behavior;
175 }
176
177 extern void
onig_set_syntax_options(OnigSyntaxType * syntax,OnigOptionType options)178 onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
179 {
180 syntax->options = options;
181 }
182
183 extern unsigned int
onig_get_syntax_op(OnigSyntaxType * syntax)184 onig_get_syntax_op(OnigSyntaxType* syntax)
185 {
186 return syntax->op;
187 }
188
189 extern unsigned int
onig_get_syntax_op2(OnigSyntaxType * syntax)190 onig_get_syntax_op2(OnigSyntaxType* syntax)
191 {
192 return syntax->op2;
193 }
194
195 extern unsigned int
onig_get_syntax_behavior(OnigSyntaxType * syntax)196 onig_get_syntax_behavior(OnigSyntaxType* syntax)
197 {
198 return syntax->behavior;
199 }
200
201 extern OnigOptionType
onig_get_syntax_options(OnigSyntaxType * syntax)202 onig_get_syntax_options(OnigSyntaxType* syntax)
203 {
204 return syntax->options;
205 }
206
207 #ifdef USE_VARIABLE_META_CHARS
onig_set_meta_char(OnigEncoding enc,unsigned int what,OnigCodePoint code)208 extern int onig_set_meta_char(OnigEncoding enc,
209 unsigned int what, OnigCodePoint code)
210 {
211 switch (what) {
212 case ONIG_META_CHAR_ESCAPE:
213 enc->meta_char_table.esc = code;
214 break;
215 case ONIG_META_CHAR_ANYCHAR:
216 enc->meta_char_table.anychar = code;
217 break;
218 case ONIG_META_CHAR_ANYTIME:
219 enc->meta_char_table.anytime = code;
220 break;
221 case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
222 enc->meta_char_table.zero_or_one_time = code;
223 break;
224 case ONIG_META_CHAR_ONE_OR_MORE_TIME:
225 enc->meta_char_table.one_or_more_time = code;
226 break;
227 case ONIG_META_CHAR_ANYCHAR_ANYTIME:
228 enc->meta_char_table.anychar_anytime = code;
229 break;
230 default:
231 return ONIGERR_INVALID_ARGUMENT;
232 break;
233 }
234 return 0;
235 }
236 #endif /* USE_VARIABLE_META_CHARS */
237