1 /**********************************************************************
2 regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
33
34 extern int
onigenc_init(void)35 onigenc_init(void)
36 {
37 return 0;
38 }
39
40 extern OnigEncoding
onigenc_get_default_encoding(void)41 onigenc_get_default_encoding(void)
42 {
43 return OnigEncDefaultCharEncoding;
44 }
45
46 extern int
onigenc_set_default_encoding(OnigEncoding enc)47 onigenc_set_default_encoding(OnigEncoding enc)
48 {
49 OnigEncDefaultCharEncoding = enc;
50 return 0;
51 }
52
53 extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)54 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
55 {
56 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
57 if (p < s) {
58 p += enc_len(enc, p);
59 }
60 return p;
61 }
62
63 extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,const UChar * start,const UChar * s,const UChar ** prev)64 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
65 const UChar* start, const UChar* s, const UChar** prev)
66 {
67 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
68
69 if (p < s) {
70 if (prev) *prev = (const UChar* )p;
71 p += enc_len(enc, p);
72 }
73 else {
74 if (prev) *prev = (const UChar* )NULL; /* Sorry */
75 }
76 return p;
77 }
78
79 extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc,const UChar * start,const UChar * s)80 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
81 {
82 if (s <= start)
83 return (UChar* )NULL;
84
85 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
86 }
87
88 extern UChar*
onigenc_step_back(OnigEncoding enc,const UChar * start,const UChar * s,int n)89 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
90 {
91 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
92 if (s <= start)
93 return (UChar* )NULL;
94
95 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
96 }
97 return (UChar* )s;
98 }
99
100 extern UChar*
onigenc_step(OnigEncoding enc,const UChar * p,const UChar * end,int n)101 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
102 {
103 UChar* q = (UChar* )p;
104 while (n-- > 0) {
105 q += ONIGENC_MBC_ENC_LEN(enc, q);
106 }
107 return (q <= end ? q : NULL);
108 }
109
110 extern int
onigenc_strlen(OnigEncoding enc,const UChar * p,const UChar * end)111 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
112 {
113 int n = 0;
114 UChar* q = (UChar* )p;
115
116 while (q < end) {
117 q += ONIGENC_MBC_ENC_LEN(enc, q);
118 n++;
119 }
120 return n;
121 }
122
123 extern int
onigenc_strlen_null(OnigEncoding enc,const UChar * s)124 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
125 {
126 int n = 0;
127 UChar* p = (UChar* )s;
128
129 while (1) {
130 if (*p == '\0') {
131 UChar* q;
132 int len = ONIGENC_MBC_MINLEN(enc);
133
134 if (len == 1) return n;
135 q = p + 1;
136 while (len > 1) {
137 if (*q != '\0') break;
138 q++;
139 len--;
140 }
141 if (len == 1) return n;
142 }
143 p += ONIGENC_MBC_ENC_LEN(enc, p);
144 n++;
145 }
146 }
147
148 extern int
onigenc_str_bytelen_null(OnigEncoding enc,const UChar * s)149 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
150 {
151 UChar* start = (UChar* )s;
152 UChar* p = (UChar* )s;
153
154 while (1) {
155 if (*p == '\0') {
156 UChar* q;
157 int len = ONIGENC_MBC_MINLEN(enc);
158
159 if (len == 1) return (int )(p - start);
160 q = p + 1;
161 while (len > 1) {
162 if (*q != '\0') break;
163 q++;
164 len--;
165 }
166 if (len == 1) return (int )(p - start);
167 }
168 p += ONIGENC_MBC_ENC_LEN(enc, p);
169 }
170 }
171
172 #ifndef ONIG_RUBY_M17N
173
174 #ifndef NOT_RUBY
175
176 #define USE_APPLICATION_TO_LOWER_CASE_TABLE
177
178 const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
179 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
180 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
181 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
182 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
183 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
184 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
185 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
186 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
187 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
188 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
189 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
190 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
191 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
192 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
193 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
194 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
195 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
196 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
197 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
198 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
199 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
200 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
201 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
202 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
203 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
204 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
205 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
206 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
207 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
208 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
209 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
210 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
211 };
212 #endif
213
214 const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
215
216 #ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
217 static const UChar BuiltInAsciiToLowerCaseTable[] = {
218 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
219 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
220 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
221 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
222 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
223 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
224 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
225 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
226 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
227 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
228 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
229 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
230 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
231 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
232 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
233 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
234 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
235 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
236 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
237 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
238 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
239 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
240 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
241 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
242 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
243 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
244 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
245 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
246 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
247 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
248 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
249 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
250 };
251 #endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
252
253 #ifdef USE_UPPER_CASE_TABLE
254 const UChar OnigEncAsciiToUpperCaseTable[256] = {
255 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
256 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
257 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
258 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
259 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
260 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
261 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
262 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
263 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
264 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
265 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
266 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
267 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
268 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
269 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
270 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
271 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
272 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
273 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
274 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
275 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
276 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
277 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
278 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
279 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
280 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
281 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
282 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
283 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
284 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
285 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
286 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
287 };
288 #endif
289
290 const unsigned short OnigEncAsciiCtypeTable[256] = {
291 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
292 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
293 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
294 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
295 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
296 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
297 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
298 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
299 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
300 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
301 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
302 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
303 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
304 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
305 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
306 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
307
308 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
309 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
310 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
311 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
312 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
313 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
314 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
315 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
316 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
317 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
318 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
319 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
320 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
321 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
322 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
323 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
324 };
325
326 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
327 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
328 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
329 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
330 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
331 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
332 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
333 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
334 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
335 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
336 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
337 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
338 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
339 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
340 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
341 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
342 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
343 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
344 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
345 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
346 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
347 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
348 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
349 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
350 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
351 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
352 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
353 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
354 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
355 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
356 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
357 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
358 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
359 };
360
361 #ifdef USE_UPPER_CASE_TABLE
362 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
363 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
364 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
365 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
366 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
367 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
368 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
369 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
370 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
371 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
372 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
373 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
374 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
375 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
376 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
377 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
378 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
379 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
380 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
381 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
382 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
383 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
384 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
385 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
386 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
387 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
388 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
389 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
390 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
391 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
392 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
393 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
394 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
395 };
396 #endif
397
398 extern void
onigenc_set_default_caseconv_table(const UChar * table)399 onigenc_set_default_caseconv_table(const UChar* table)
400 {
401 if (table == (const UChar* )0) {
402 #ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
403 table = BuiltInAsciiToLowerCaseTable;
404 #else
405 return ;
406 #endif
407 }
408
409 if (table != OnigEncAsciiToLowerCaseTable) {
410 OnigEncAsciiToLowerCaseTable = table;
411 }
412 }
413
414 extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)415 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
416 {
417 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
418 }
419
420 const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
421 { 0x41, 0x61 },
422 { 0x42, 0x62 },
423 { 0x43, 0x63 },
424 { 0x44, 0x64 },
425 { 0x45, 0x65 },
426 { 0x46, 0x66 },
427 { 0x47, 0x67 },
428 { 0x48, 0x68 },
429 { 0x49, 0x69 },
430 { 0x4a, 0x6a },
431 { 0x4b, 0x6b },
432 { 0x4c, 0x6c },
433 { 0x4d, 0x6d },
434 { 0x4e, 0x6e },
435 { 0x4f, 0x6f },
436 { 0x50, 0x70 },
437 { 0x51, 0x71 },
438 { 0x52, 0x72 },
439 { 0x53, 0x73 },
440 { 0x54, 0x74 },
441 { 0x55, 0x75 },
442 { 0x56, 0x76 },
443 { 0x57, 0x77 },
444 { 0x58, 0x78 },
445 { 0x59, 0x79 },
446 { 0x5a, 0x7a },
447
448 { 0x61, 0x41 },
449 { 0x62, 0x42 },
450 { 0x63, 0x43 },
451 { 0x64, 0x44 },
452 { 0x65, 0x45 },
453 { 0x66, 0x46 },
454 { 0x67, 0x47 },
455 { 0x68, 0x48 },
456 { 0x69, 0x49 },
457 { 0x6a, 0x4a },
458 { 0x6b, 0x4b },
459 { 0x6c, 0x4c },
460 { 0x6d, 0x4d },
461 { 0x6e, 0x4e },
462 { 0x6f, 0x4f },
463 { 0x70, 0x50 },
464 { 0x71, 0x51 },
465 { 0x72, 0x52 },
466 { 0x73, 0x53 },
467 { 0x74, 0x54 },
468 { 0x75, 0x55 },
469 { 0x76, 0x56 },
470 { 0x77, 0x57 },
471 { 0x78, 0x58 },
472 { 0x79, 0x59 },
473 { 0x7a, 0x5a }
474 };
475
476 extern int
onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,const OnigPairAmbigCodes ** ccs)477 onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
478 const OnigPairAmbigCodes** ccs)
479 {
480 if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
481 *ccs = OnigAsciiPairAmbigCodes;
482 return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
483 }
484 else {
485 return 0;
486 }
487 }
488
489 extern int
onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,const OnigCompAmbigCodes ** ccs)490 onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
491 const OnigCompAmbigCodes** ccs)
492 {
493 return 0;
494 }
495
496 extern int
onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,const OnigPairAmbigCodes ** ccs)497 onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
498 const OnigPairAmbigCodes** ccs)
499 {
500 static const OnigPairAmbigCodes cc[] = {
501 { 0xc0, 0xe0 },
502 { 0xc1, 0xe1 },
503 { 0xc2, 0xe2 },
504 { 0xc3, 0xe3 },
505 { 0xc4, 0xe4 },
506 { 0xc5, 0xe5 },
507 { 0xc6, 0xe6 },
508 { 0xc7, 0xe7 },
509 { 0xc8, 0xe8 },
510 { 0xc9, 0xe9 },
511 { 0xca, 0xea },
512 { 0xcb, 0xeb },
513 { 0xcc, 0xec },
514 { 0xcd, 0xed },
515 { 0xce, 0xee },
516 { 0xcf, 0xef },
517
518 { 0xd0, 0xf0 },
519 { 0xd1, 0xf1 },
520 { 0xd2, 0xf2 },
521 { 0xd3, 0xf3 },
522 { 0xd4, 0xf4 },
523 { 0xd5, 0xf5 },
524 { 0xd6, 0xf6 },
525 { 0xd8, 0xf8 },
526 { 0xd9, 0xf9 },
527 { 0xda, 0xfa },
528 { 0xdb, 0xfb },
529 { 0xdc, 0xfc },
530 { 0xdd, 0xfd },
531 { 0xde, 0xfe },
532
533 { 0xe0, 0xc0 },
534 { 0xe1, 0xc1 },
535 { 0xe2, 0xc2 },
536 { 0xe3, 0xc3 },
537 { 0xe4, 0xc4 },
538 { 0xe5, 0xc5 },
539 { 0xe6, 0xc6 },
540 { 0xe7, 0xc7 },
541 { 0xe8, 0xc8 },
542 { 0xe9, 0xc9 },
543 { 0xea, 0xca },
544 { 0xeb, 0xcb },
545 { 0xec, 0xcc },
546 { 0xed, 0xcd },
547 { 0xee, 0xce },
548 { 0xef, 0xcf },
549
550 { 0xf0, 0xd0 },
551 { 0xf1, 0xd1 },
552 { 0xf2, 0xd2 },
553 { 0xf3, 0xd3 },
554 { 0xf4, 0xd4 },
555 { 0xf5, 0xd5 },
556 { 0xf6, 0xd6 },
557 { 0xf8, 0xd8 },
558 { 0xf9, 0xd9 },
559 { 0xfa, 0xda },
560 { 0xfb, 0xdb },
561 { 0xfc, 0xdc },
562 { 0xfd, 0xdd },
563 { 0xfe, 0xde }
564 };
565
566 if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
567 *ccs = OnigAsciiPairAmbigCodes;
568 return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
569 }
570 else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
571 *ccs = cc;
572 return sizeof(cc) / sizeof(OnigPairAmbigCodes);
573 }
574 else
575 return 0;
576 }
577
578 extern int
onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,const OnigCompAmbigCodes ** ccs)579 onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
580 const OnigCompAmbigCodes** ccs)
581 {
582 static const OnigCompAmbigCodes folds[] = {
583 { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
584 };
585
586 if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
587 *ccs = folds;
588 return sizeof(folds) / sizeof(OnigCompAmbigCodes);
589 }
590 else
591 return 0;
592 }
593
594 extern int
onigenc_not_support_get_ctype_code_range(int ctype,const OnigCodePoint * sbr[],const OnigCodePoint * mbr[])595 onigenc_not_support_get_ctype_code_range(int ctype,
596 const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
597 {
598 return ONIG_NO_SUPPORT_CONFIG;
599 }
600
601 extern int
onigenc_is_mbc_newline_0x0a(const UChar * p,const UChar * end)602 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
603 {
604 if (p < end) {
605 if (*p == 0x0a) return 1;
606 }
607 return 0;
608 }
609
610 /* for single byte encodings */
611 extern int
onigenc_ascii_mbc_to_normalize(OnigAmbigType flag,const UChar ** p,const UChar * end,UChar * lower)612 onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
613 UChar* lower)
614 {
615 if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
616 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
617 }
618 else {
619 *lower = **p;
620 }
621
622 (*p)++;
623 return 1; /* return byte length of converted char to lower */
624 }
625
626 extern int
onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,const UChar ** pp,const UChar * end)627 onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
628 const UChar** pp, const UChar* end)
629 {
630 const UChar* p = *pp;
631
632 (*pp)++;
633 if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
634 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
635 }
636 else {
637 return FALSE;
638 }
639 }
640
641 extern int
onigenc_single_byte_mbc_enc_len(const UChar * p)642 onigenc_single_byte_mbc_enc_len(const UChar* p)
643 {
644 return 1;
645 }
646
647 extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar * p,const UChar * end)648 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end)
649 {
650 return (OnigCodePoint )(*p);
651 }
652
653 extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code)654 onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
655 {
656 return 1;
657 }
658
659 extern int
onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)660 onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
661 {
662 return (code & 0xff);
663 }
664
665 extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code,UChar * buf)666 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
667 {
668 *buf = (UChar )(code & 0xff);
669 return 1;
670 }
671
672 extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar * start,const UChar * s)673 onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s)
674 {
675 return (UChar* )s;
676 }
677
678 extern int
onigenc_always_true_is_allowed_reverse_match(const UChar * s,const UChar * end)679 onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end)
680 {
681 return TRUE;
682 }
683
684 extern int
onigenc_always_false_is_allowed_reverse_match(const UChar * s,const UChar * end)685 onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end)
686 {
687 return FALSE;
688 }
689
690 extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc,const UChar * p,const UChar * end)691 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
692 {
693 int c, i, len;
694 OnigCodePoint n;
695
696 len = enc_len(enc, p);
697 n = (OnigCodePoint )(*p++);
698 if (len == 1) return n;
699
700 for (i = 1; i < len; i++) {
701 if (p >= end) break;
702 c = *p++;
703 n <<= 8; n += c;
704 }
705 return n;
706 }
707
708 extern int
onigenc_mbn_mbc_to_normalize(OnigEncoding enc,OnigAmbigType flag,const UChar ** pp,const UChar * end,UChar * lower)709 onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
710 const UChar** pp, const UChar* end, UChar* lower)
711 {
712 int len;
713 const UChar *p = *pp;
714
715 if (ONIGENC_IS_MBC_ASCII(p)) {
716 if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
717 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
718 }
719 else {
720 *lower = *p;
721 }
722 (*pp)++;
723 return 1;
724 }
725 else {
726 len = enc_len(enc, p);
727 if (lower != p) {
728 int i;
729 for (i = 0; i < len; i++) {
730 *lower++ = *p++;
731 }
732 }
733 (*pp) += len;
734 return len; /* return byte length of converted to lower char */
735 }
736 }
737
738 extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc,OnigAmbigType flag,const UChar ** pp,const UChar * end)739 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
740 const UChar** pp, const UChar* end)
741 {
742 const UChar* p = *pp;
743
744 if (ONIGENC_IS_MBC_ASCII(p)) {
745 (*pp)++;
746 if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
747 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
748 }
749 else {
750 return FALSE;
751 }
752 }
753
754 (*pp) += enc_len(enc, p);
755 return FALSE;
756 }
757
758 extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)759 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
760 {
761 if ((code & 0xff00) != 0) return 2;
762 else return 1;
763 }
764
765 extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)766 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
767 {
768 if ((code & 0xff000000) != 0) return 4;
769 else if ((code & 0xff0000) != 0) return 3;
770 else if ((code & 0xff00) != 0) return 2;
771 else return 1;
772 }
773
774 extern int
onigenc_mb2_code_to_mbc_first(OnigCodePoint code)775 onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
776 {
777 int first;
778
779 if ((code & 0xff00) != 0) {
780 first = (code >> 8) & 0xff;
781 }
782 else {
783 return (int )code;
784 }
785 return first;
786 }
787
788 extern int
onigenc_mb4_code_to_mbc_first(OnigCodePoint code)789 onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
790 {
791 int first;
792
793 if ((code & 0xff000000) != 0) {
794 first = (code >> 24) & 0xff;
795 }
796 else if ((code & 0xff0000) != 0) {
797 first = (code >> 16) & 0xff;
798 }
799 else if ((code & 0xff00) != 0) {
800 first = (code >> 8) & 0xff;
801 }
802 else {
803 return (int )code;
804 }
805 return first;
806 }
807
808 extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)809 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
810 {
811 UChar *p = buf;
812
813 if ((code & 0xff00) != 0) {
814 *p++ = (UChar )((code >> 8) & 0xff);
815 }
816 *p++ = (UChar )(code & 0xff);
817
818 #if 1
819 if (enc_len(enc, buf) != (p - buf))
820 return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
821 #endif
822 return p - buf;
823 }
824
825 extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)826 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
827 {
828 UChar *p = buf;
829
830 if ((code & 0xff000000) != 0) {
831 *p++ = (UChar )((code >> 24) & 0xff);
832 }
833 if ((code & 0xff0000) != 0 || p != buf) {
834 *p++ = (UChar )((code >> 16) & 0xff);
835 }
836 if ((code & 0xff00) != 0 || p != buf) {
837 *p++ = (UChar )((code >> 8) & 0xff);
838 }
839 *p++ = (UChar )(code & 0xff);
840
841 #if 1
842 if (enc_len(enc, buf) != (p - buf))
843 return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
844 #endif
845 return p - buf;
846 }
847
848 extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)849 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
850 unsigned int ctype)
851 {
852 if (code < 128)
853 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
854 else {
855 if ((ctype & (ONIGENC_CTYPE_WORD |
856 ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
857 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
858 }
859 }
860
861 return FALSE;
862 }
863
864 extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)865 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
866 unsigned int ctype)
867 {
868 if (code < 128)
869 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
870 else {
871 if ((ctype & (ONIGENC_CTYPE_WORD |
872 ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
873 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
874 }
875 }
876
877 return FALSE;
878 }
879
880 extern int
onigenc_with_ascii_strncmp(OnigEncoding enc,const UChar * p,const UChar * end,const UChar * sascii,int n)881 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
882 const UChar* sascii /* ascii */, int n)
883 {
884 int x, c;
885
886 while (n-- > 0) {
887 if (p >= end) return (int )(*sascii);
888
889 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
890 x = *sascii - c;
891 if (x) return x;
892
893 sascii++;
894 p += enc_len(enc, p);
895 }
896 return 0;
897 }
898
899 #else /* ONIG_RUBY_M17N */
900
901 extern int
onigenc_is_code_ctype(OnigEncoding enc,OnigCodePoint code,int ctype)902 onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
903 {
904 switch (ctype) {
905 case ONIGENC_CTYPE_NEWLINE:
906 if (code == 0x0a) return 1;
907 break;
908
909 case ONIGENC_CTYPE_ALPHA:
910 return m17n_isalpha(enc, code);
911 break;
912 case ONIGENC_CTYPE_BLANK:
913 return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
914 break;
915 case ONIGENC_CTYPE_CNTRL:
916 return m17n_iscntrl(enc, code);
917 break;
918 case ONIGENC_CTYPE_DIGIT:
919 return m17n_isdigit(enc, code);
920 break;
921 case ONIGENC_CTYPE_GRAPH:
922 return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
923 break;
924 case ONIGENC_CTYPE_LOWER:
925 return m17n_islower(enc, code);
926 break;
927 case ONIGENC_CTYPE_PRINT:
928 return m17n_isprint(enc, code);
929 break;
930 case ONIGENC_CTYPE_PUNCT:
931 return m17n_ispunct(enc, code);
932 break;
933 case ONIGENC_CTYPE_SPACE:
934 return m17n_isspace(enc, code);
935 break;
936 case ONIGENC_CTYPE_UPPER:
937 return m17n_isupper(enc, code);
938 break;
939 case ONIGENC_CTYPE_XDIGIT:
940 return m17n_isxdigit(enc, code);
941 break;
942 case ONIGENC_CTYPE_WORD:
943 return m17n_iswchar(enc, code);
944 break;
945 case ONIGENC_CTYPE_ASCII:
946 return (code < 128 ? TRUE : FALSE);
947 break;
948 case ONIGENC_CTYPE_ALNUM:
949 return m17n_isalnum(enc, code);
950 break;
951 default:
952 break;
953 }
954
955 return 0;
956 }
957
958 extern int
onigenc_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)959 onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
960 {
961 int c, len;
962
963 m17n_mbcput(enc, code, buf);
964 c = m17n_firstbyte(enc, code);
965 len = enc_len(enc, c);
966 return len;
967 }
968
969 extern int
onigenc_mbc_to_lower(OnigEncoding enc,UChar * p,UChar * buf)970 onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
971 {
972 unsigned int c, low;
973
974 c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
975 low = m17n_tolower(enc, c);
976 m17n_mbcput(enc, low, buf);
977
978 return m17n_codelen(enc, low);
979 }
980
981 extern int
onigenc_is_mbc_ambiguous(OnigEncoding enc,OnigAmbigType flag,UChar ** pp,UChar * end)982 onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
983 UChar** pp, UChar* end)
984 {
985 int len;
986 unsigned int c;
987 UChar* p = *pp;
988
989 len = enc_len(enc, *p);
990 (*pp) += len;
991 c = m17n_codepoint(enc, p, p + len);
992
993 if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
994 if (m17n_isupper(enc, c) || m17n_islower(enc, c))
995 return TRUE;
996 }
997
998 return FALSE;
999 }
1000
1001 extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc,UChar * start,UChar * s)1002 onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
1003 {
1004 UChar *p;
1005 int len;
1006
1007 if (s <= start) return s;
1008 p = s;
1009
1010 while (!m17n_islead(enc, *p) && p > start) p--;
1011 while (p + (len = enc_len(enc, *p)) < s) {
1012 p += len;
1013 }
1014 if (p + len == s) return s;
1015 return p;
1016 }
1017
1018 extern int
onigenc_is_allowed_reverse_match(OnigEncoding enc,const UChar * s,const UChar * end)1019 onigenc_is_allowed_reverse_match(OnigEncoding enc,
1020 const UChar* s, const UChar* end)
1021 {
1022 return ONIGENC_IS_SINGLEBYTE(enc);
1023 }
1024
1025 extern void
onigenc_set_default_caseconv_table(UChar * table)1026 onigenc_set_default_caseconv_table(UChar* table) { }
1027
1028 #endif /* ONIG_RUBY_M17N */
1029