xref: /PHP-5.3/ext/mbstring/oniguruma/regenc.c (revision 7aab46a2)
1 /**********************************************************************
2   regenc.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
32 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
33 
34 extern int
onigenc_init(void)35 onigenc_init(void)
36 {
37   return 0;
38 }
39 
40 extern OnigEncoding
onigenc_get_default_encoding(void)41 onigenc_get_default_encoding(void)
42 {
43   return OnigEncDefaultCharEncoding;
44 }
45 
46 extern int
onigenc_set_default_encoding(OnigEncoding enc)47 onigenc_set_default_encoding(OnigEncoding enc)
48 {
49   OnigEncDefaultCharEncoding = enc;
50   return 0;
51 }
52 
53 extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)54 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
55 {
56   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
57   if (p < s) {
58     p += enc_len(enc, p);
59   }
60   return p;
61 }
62 
63 extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,const UChar * start,const UChar * s,const UChar ** prev)64 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
65 				   const UChar* start, const UChar* s, const UChar** prev)
66 {
67   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
68 
69   if (p < s) {
70     if (prev) *prev = (const UChar* )p;
71     p += enc_len(enc, p);
72   }
73   else {
74     if (prev) *prev = (const UChar* )NULL; /* Sorry */
75   }
76   return p;
77 }
78 
79 extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc,const UChar * start,const UChar * s)80 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
81 {
82   if (s <= start)
83     return (UChar* )NULL;
84 
85   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
86 }
87 
88 extern UChar*
onigenc_step_back(OnigEncoding enc,const UChar * start,const UChar * s,int n)89 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
90 {
91   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
92     if (s <= start)
93       return (UChar* )NULL;
94 
95     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
96   }
97   return (UChar* )s;
98 }
99 
100 extern UChar*
onigenc_step(OnigEncoding enc,const UChar * p,const UChar * end,int n)101 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
102 {
103   UChar* q = (UChar* )p;
104   while (n-- > 0) {
105     q += ONIGENC_MBC_ENC_LEN(enc, q);
106   }
107   return (q <= end ? q : NULL);
108 }
109 
110 extern int
onigenc_strlen(OnigEncoding enc,const UChar * p,const UChar * end)111 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
112 {
113   int n = 0;
114   UChar* q = (UChar* )p;
115 
116   while (q < end) {
117     q += ONIGENC_MBC_ENC_LEN(enc, q);
118     n++;
119   }
120   return n;
121 }
122 
123 extern int
onigenc_strlen_null(OnigEncoding enc,const UChar * s)124 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
125 {
126   int n = 0;
127   UChar* p = (UChar* )s;
128 
129   while (1) {
130     if (*p == '\0') {
131       UChar* q;
132       int len = ONIGENC_MBC_MINLEN(enc);
133 
134       if (len == 1) return n;
135       q = p + 1;
136       while (len > 1) {
137         if (*q != '\0') break;
138         q++;
139         len--;
140       }
141       if (len == 1) return n;
142     }
143     p += ONIGENC_MBC_ENC_LEN(enc, p);
144     n++;
145   }
146 }
147 
148 extern int
onigenc_str_bytelen_null(OnigEncoding enc,const UChar * s)149 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
150 {
151   UChar* start = (UChar* )s;
152   UChar* p = (UChar* )s;
153 
154   while (1) {
155     if (*p == '\0') {
156       UChar* q;
157       int len = ONIGENC_MBC_MINLEN(enc);
158 
159       if (len == 1) return (int )(p - start);
160       q = p + 1;
161       while (len > 1) {
162         if (*q != '\0') break;
163         q++;
164         len--;
165       }
166       if (len == 1) return (int )(p - start);
167     }
168     p += ONIGENC_MBC_ENC_LEN(enc, p);
169   }
170 }
171 
172 #ifndef ONIG_RUBY_M17N
173 
174 #ifndef NOT_RUBY
175 
176 #define USE_APPLICATION_TO_LOWER_CASE_TABLE
177 
178 const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
179   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
180   0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
181   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
182   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
183   0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
184   0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
185   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
186   0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
187   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
188   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
189   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
190   0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
191   0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
192   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
193   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
194   0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
195   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
196   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
197   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
198   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
199   0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
200   0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
201   0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
202   0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
203   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
204   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
205   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
206   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
207   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
208   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
209   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
210   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
211 };
212 #endif
213 
214 const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
215 
216 #ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
217 static const UChar BuiltInAsciiToLowerCaseTable[] = {
218   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
219   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
220   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
221   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
222   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
223   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
224   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
225   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
226   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
227   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
228   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
229   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
230   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
231   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
232   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
233   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
234   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
235   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
236   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
237   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
238   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
239   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
240   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
241   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
242   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
243   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
244   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
245   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
246   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
247   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
248   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
249   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
250 };
251 #endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
252 
253 #ifdef USE_UPPER_CASE_TABLE
254 const UChar OnigEncAsciiToUpperCaseTable[256] = {
255   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
256   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
257   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
258   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
259   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
260   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
261   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
262   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
263   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
264   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
265   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
266   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
267   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
268   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
269   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
270   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
271   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
272   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
273   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
274   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
275   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
276   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
277   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
278   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
279   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
280   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
281   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
282   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
283   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
284   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
285   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
286   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
287 };
288 #endif
289 
290 const unsigned short OnigEncAsciiCtypeTable[256] = {
291   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
292   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
293   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
294   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
295   0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
296   0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
297   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
298   0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
299   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
300   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
301   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
302   0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
303   0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
304   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
305   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
306   0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
307 
308   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
309   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
310   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
311   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
312   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
313   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
314   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
315   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
316   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
317   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
318   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
319   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
320   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
321   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
322   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
323   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
324 };
325 
326 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
327   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
328   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
329   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
330   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
331   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
332   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
333   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
334   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
335   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
336   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
337   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
338   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
339   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
340   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
341   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
342   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
343   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
344   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
345   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
346   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
347   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
348   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
349   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
350   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
351   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
352   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
353   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
354   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
355   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
356   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
357   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
358   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
359 };
360 
361 #ifdef USE_UPPER_CASE_TABLE
362 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
363   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
364   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
365   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
366   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
367   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
368   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
369   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
370   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
371   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
372   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
373   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
374   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
375   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
376   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
377   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
378   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
379   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
380   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
381   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
382   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
383   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
384   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
385   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
386   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
387   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
388   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
389   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
390   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
391   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
392   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
393   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
394   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
395 };
396 #endif
397 
398 extern void
onigenc_set_default_caseconv_table(const UChar * table)399 onigenc_set_default_caseconv_table(const UChar* table)
400 {
401   if (table == (const UChar* )0) {
402 #ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
403     table = BuiltInAsciiToLowerCaseTable;
404 #else
405     return ;
406 #endif
407   }
408 
409   if (table != OnigEncAsciiToLowerCaseTable) {
410     OnigEncAsciiToLowerCaseTable = table;
411   }
412 }
413 
414 extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)415 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
416 {
417   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
418 }
419 
420 const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
421   { 0x41, 0x61 },
422   { 0x42, 0x62 },
423   { 0x43, 0x63 },
424   { 0x44, 0x64 },
425   { 0x45, 0x65 },
426   { 0x46, 0x66 },
427   { 0x47, 0x67 },
428   { 0x48, 0x68 },
429   { 0x49, 0x69 },
430   { 0x4a, 0x6a },
431   { 0x4b, 0x6b },
432   { 0x4c, 0x6c },
433   { 0x4d, 0x6d },
434   { 0x4e, 0x6e },
435   { 0x4f, 0x6f },
436   { 0x50, 0x70 },
437   { 0x51, 0x71 },
438   { 0x52, 0x72 },
439   { 0x53, 0x73 },
440   { 0x54, 0x74 },
441   { 0x55, 0x75 },
442   { 0x56, 0x76 },
443   { 0x57, 0x77 },
444   { 0x58, 0x78 },
445   { 0x59, 0x79 },
446   { 0x5a, 0x7a },
447 
448   { 0x61, 0x41 },
449   { 0x62, 0x42 },
450   { 0x63, 0x43 },
451   { 0x64, 0x44 },
452   { 0x65, 0x45 },
453   { 0x66, 0x46 },
454   { 0x67, 0x47 },
455   { 0x68, 0x48 },
456   { 0x69, 0x49 },
457   { 0x6a, 0x4a },
458   { 0x6b, 0x4b },
459   { 0x6c, 0x4c },
460   { 0x6d, 0x4d },
461   { 0x6e, 0x4e },
462   { 0x6f, 0x4f },
463   { 0x70, 0x50 },
464   { 0x71, 0x51 },
465   { 0x72, 0x52 },
466   { 0x73, 0x53 },
467   { 0x74, 0x54 },
468   { 0x75, 0x55 },
469   { 0x76, 0x56 },
470   { 0x77, 0x57 },
471   { 0x78, 0x58 },
472   { 0x79, 0x59 },
473   { 0x7a, 0x5a }
474 };
475 
476 extern int
onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,const OnigPairAmbigCodes ** ccs)477 onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
478                                        const OnigPairAmbigCodes** ccs)
479 {
480   if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
481     *ccs = OnigAsciiPairAmbigCodes;
482     return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
483   }
484   else {
485     return 0;
486   }
487 }
488 
489 extern int
onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,const OnigCompAmbigCodes ** ccs)490 onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
491                                          const OnigCompAmbigCodes** ccs)
492 {
493   return 0;
494 }
495 
496 extern int
onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,const OnigPairAmbigCodes ** ccs)497 onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
498                                             const OnigPairAmbigCodes** ccs)
499 {
500   static const OnigPairAmbigCodes cc[] = {
501     { 0xc0, 0xe0 },
502     { 0xc1, 0xe1 },
503     { 0xc2, 0xe2 },
504     { 0xc3, 0xe3 },
505     { 0xc4, 0xe4 },
506     { 0xc5, 0xe5 },
507     { 0xc6, 0xe6 },
508     { 0xc7, 0xe7 },
509     { 0xc8, 0xe8 },
510     { 0xc9, 0xe9 },
511     { 0xca, 0xea },
512     { 0xcb, 0xeb },
513     { 0xcc, 0xec },
514     { 0xcd, 0xed },
515     { 0xce, 0xee },
516     { 0xcf, 0xef },
517 
518     { 0xd0, 0xf0 },
519     { 0xd1, 0xf1 },
520     { 0xd2, 0xf2 },
521     { 0xd3, 0xf3 },
522     { 0xd4, 0xf4 },
523     { 0xd5, 0xf5 },
524     { 0xd6, 0xf6 },
525     { 0xd8, 0xf8 },
526     { 0xd9, 0xf9 },
527     { 0xda, 0xfa },
528     { 0xdb, 0xfb },
529     { 0xdc, 0xfc },
530     { 0xdd, 0xfd },
531     { 0xde, 0xfe },
532 
533     { 0xe0, 0xc0 },
534     { 0xe1, 0xc1 },
535     { 0xe2, 0xc2 },
536     { 0xe3, 0xc3 },
537     { 0xe4, 0xc4 },
538     { 0xe5, 0xc5 },
539     { 0xe6, 0xc6 },
540     { 0xe7, 0xc7 },
541     { 0xe8, 0xc8 },
542     { 0xe9, 0xc9 },
543     { 0xea, 0xca },
544     { 0xeb, 0xcb },
545     { 0xec, 0xcc },
546     { 0xed, 0xcd },
547     { 0xee, 0xce },
548     { 0xef, 0xcf },
549 
550     { 0xf0, 0xd0 },
551     { 0xf1, 0xd1 },
552     { 0xf2, 0xd2 },
553     { 0xf3, 0xd3 },
554     { 0xf4, 0xd4 },
555     { 0xf5, 0xd5 },
556     { 0xf6, 0xd6 },
557     { 0xf8, 0xd8 },
558     { 0xf9, 0xd9 },
559     { 0xfa, 0xda },
560     { 0xfb, 0xdb },
561     { 0xfc, 0xdc },
562     { 0xfd, 0xdd },
563     { 0xfe, 0xde }
564   };
565 
566   if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
567     *ccs = OnigAsciiPairAmbigCodes;
568     return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
569   }
570   else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
571     *ccs = cc;
572     return sizeof(cc) / sizeof(OnigPairAmbigCodes);
573   }
574   else
575     return 0;
576 }
577 
578 extern int
onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,const OnigCompAmbigCodes ** ccs)579 onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
580                                            const OnigCompAmbigCodes** ccs)
581 {
582   static const OnigCompAmbigCodes folds[] = {
583     { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
584   };
585 
586   if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
587     *ccs = folds;
588     return sizeof(folds) / sizeof(OnigCompAmbigCodes);
589   }
590   else
591     return 0;
592 }
593 
594 extern int
onigenc_not_support_get_ctype_code_range(int ctype,const OnigCodePoint * sbr[],const OnigCodePoint * mbr[])595 onigenc_not_support_get_ctype_code_range(int ctype,
596                              const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
597 {
598   return ONIG_NO_SUPPORT_CONFIG;
599 }
600 
601 extern int
onigenc_is_mbc_newline_0x0a(const UChar * p,const UChar * end)602 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
603 {
604   if (p < end) {
605     if (*p == 0x0a) return 1;
606   }
607   return 0;
608 }
609 
610 /* for single byte encodings */
611 extern int
onigenc_ascii_mbc_to_normalize(OnigAmbigType flag,const UChar ** p,const UChar * end,UChar * lower)612 onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
613                                UChar* lower)
614 {
615   if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
616     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
617   }
618   else {
619     *lower = **p;
620   }
621 
622   (*p)++;
623   return 1; /* return byte length of converted char to lower */
624 }
625 
626 extern int
onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,const UChar ** pp,const UChar * end)627 onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
628 			       const UChar** pp, const UChar* end)
629 {
630   const UChar* p = *pp;
631 
632   (*pp)++;
633   if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
634     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
635   }
636   else {
637     return FALSE;
638   }
639 }
640 
641 extern int
onigenc_single_byte_mbc_enc_len(const UChar * p)642 onigenc_single_byte_mbc_enc_len(const UChar* p)
643 {
644   return 1;
645 }
646 
647 extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar * p,const UChar * end)648 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end)
649 {
650   return (OnigCodePoint )(*p);
651 }
652 
653 extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code)654 onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
655 {
656   return 1;
657 }
658 
659 extern int
onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)660 onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
661 {
662   return (code & 0xff);
663 }
664 
665 extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code,UChar * buf)666 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
667 {
668   *buf = (UChar )(code & 0xff);
669   return 1;
670 }
671 
672 extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar * start,const UChar * s)673 onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s)
674 {
675   return (UChar* )s;
676 }
677 
678 extern int
onigenc_always_true_is_allowed_reverse_match(const UChar * s,const UChar * end)679 onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end)
680 {
681   return TRUE;
682 }
683 
684 extern int
onigenc_always_false_is_allowed_reverse_match(const UChar * s,const UChar * end)685 onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end)
686 {
687   return FALSE;
688 }
689 
690 extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc,const UChar * p,const UChar * end)691 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
692 {
693   int c, i, len;
694   OnigCodePoint n;
695 
696   len = enc_len(enc, p);
697   n = (OnigCodePoint )(*p++);
698   if (len == 1) return n;
699 
700   for (i = 1; i < len; i++) {
701     if (p >= end) break;
702     c = *p++;
703     n <<= 8;  n += c;
704   }
705   return n;
706 }
707 
708 extern int
onigenc_mbn_mbc_to_normalize(OnigEncoding enc,OnigAmbigType flag,const UChar ** pp,const UChar * end,UChar * lower)709 onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
710                              const UChar** pp, const UChar* end, UChar* lower)
711 {
712   int len;
713   const UChar *p = *pp;
714 
715   if (ONIGENC_IS_MBC_ASCII(p)) {
716     if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
717       *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
718     }
719     else {
720       *lower = *p;
721     }
722     (*pp)++;
723     return 1;
724   }
725   else {
726     len = enc_len(enc, p);
727     if (lower != p) {
728       int i;
729       for (i = 0; i < len; i++) {
730 	*lower++ = *p++;
731       }
732     }
733     (*pp) += len;
734     return len; /* return byte length of converted to lower char */
735   }
736 }
737 
738 extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc,OnigAmbigType flag,const UChar ** pp,const UChar * end)739 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
740                              const UChar** pp, const UChar* end)
741 {
742   const UChar* p = *pp;
743 
744   if (ONIGENC_IS_MBC_ASCII(p)) {
745     (*pp)++;
746     if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
747       return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
748     }
749     else {
750       return FALSE;
751     }
752   }
753 
754   (*pp) += enc_len(enc, p);
755   return FALSE;
756 }
757 
758 extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)759 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
760 {
761   if ((code & 0xff00) != 0) return 2;
762   else return 1;
763 }
764 
765 extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)766 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
767 {
768        if ((code & 0xff000000) != 0) return 4;
769   else if ((code & 0xff0000) != 0) return 3;
770   else if ((code & 0xff00) != 0) return 2;
771   else return 1;
772 }
773 
774 extern int
onigenc_mb2_code_to_mbc_first(OnigCodePoint code)775 onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
776 {
777   int first;
778 
779   if ((code & 0xff00) != 0) {
780     first = (code >> 8) & 0xff;
781   }
782   else {
783     return (int )code;
784   }
785   return first;
786 }
787 
788 extern int
onigenc_mb4_code_to_mbc_first(OnigCodePoint code)789 onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
790 {
791   int first;
792 
793   if ((code & 0xff000000) != 0) {
794     first = (code >> 24) & 0xff;
795   }
796   else if ((code & 0xff0000) != 0) {
797     first = (code >> 16) & 0xff;
798   }
799   else if ((code & 0xff00) != 0) {
800     first = (code >>  8) & 0xff;
801   }
802   else {
803     return (int )code;
804   }
805   return first;
806 }
807 
808 extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)809 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
810 {
811   UChar *p = buf;
812 
813   if ((code & 0xff00) != 0) {
814     *p++ = (UChar )((code >>  8) & 0xff);
815   }
816   *p++ = (UChar )(code & 0xff);
817 
818 #if 1
819   if (enc_len(enc, buf) != (p - buf))
820     return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
821 #endif
822   return p - buf;
823 }
824 
825 extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)826 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
827 {
828   UChar *p = buf;
829 
830   if ((code & 0xff000000) != 0) {
831     *p++ = (UChar )((code >> 24) & 0xff);
832   }
833   if ((code & 0xff0000) != 0 || p != buf) {
834     *p++ = (UChar )((code >> 16) & 0xff);
835   }
836   if ((code & 0xff00) != 0 || p != buf) {
837     *p++ = (UChar )((code >> 8) & 0xff);
838   }
839   *p++ = (UChar )(code & 0xff);
840 
841 #if 1
842   if (enc_len(enc, buf) != (p - buf))
843     return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
844 #endif
845   return p - buf;
846 }
847 
848 extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)849 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
850 			  unsigned int ctype)
851 {
852   if (code < 128)
853     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
854   else {
855     if ((ctype & (ONIGENC_CTYPE_WORD |
856                   ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
857       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
858     }
859   }
860 
861   return FALSE;
862 }
863 
864 extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)865 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
866 			  unsigned int ctype)
867 {
868   if (code < 128)
869     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
870   else {
871     if ((ctype & (ONIGENC_CTYPE_WORD |
872                   ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
873       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
874     }
875   }
876 
877   return FALSE;
878 }
879 
880 extern int
onigenc_with_ascii_strncmp(OnigEncoding enc,const UChar * p,const UChar * end,const UChar * sascii,int n)881 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
882                            const UChar* sascii /* ascii */, int n)
883 {
884   int x, c;
885 
886   while (n-- > 0) {
887     if (p >= end) return (int )(*sascii);
888 
889     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
890     x = *sascii - c;
891     if (x) return x;
892 
893     sascii++;
894     p += enc_len(enc, p);
895   }
896   return 0;
897 }
898 
899 #else /* ONIG_RUBY_M17N */
900 
901 extern int
onigenc_is_code_ctype(OnigEncoding enc,OnigCodePoint code,int ctype)902 onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
903 {
904   switch (ctype) {
905   case ONIGENC_CTYPE_NEWLINE:
906     if (code == 0x0a) return 1;
907     break;
908 
909   case ONIGENC_CTYPE_ALPHA:
910     return m17n_isalpha(enc, code);
911     break;
912   case ONIGENC_CTYPE_BLANK:
913     return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
914     break;
915   case ONIGENC_CTYPE_CNTRL:
916     return m17n_iscntrl(enc, code);
917     break;
918   case ONIGENC_CTYPE_DIGIT:
919     return m17n_isdigit(enc, code);
920     break;
921   case ONIGENC_CTYPE_GRAPH:
922     return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
923     break;
924   case ONIGENC_CTYPE_LOWER:
925     return m17n_islower(enc, code);
926     break;
927   case ONIGENC_CTYPE_PRINT:
928     return m17n_isprint(enc, code);
929     break;
930   case ONIGENC_CTYPE_PUNCT:
931     return m17n_ispunct(enc, code);
932     break;
933   case ONIGENC_CTYPE_SPACE:
934     return m17n_isspace(enc, code);
935     break;
936   case ONIGENC_CTYPE_UPPER:
937     return m17n_isupper(enc, code);
938     break;
939   case ONIGENC_CTYPE_XDIGIT:
940     return m17n_isxdigit(enc, code);
941     break;
942   case ONIGENC_CTYPE_WORD:
943     return m17n_iswchar(enc, code);
944     break;
945   case ONIGENC_CTYPE_ASCII:
946     return (code < 128 ? TRUE : FALSE);
947     break;
948   case ONIGENC_CTYPE_ALNUM:
949     return m17n_isalnum(enc, code);
950     break;
951   default:
952     break;
953   }
954 
955   return 0;
956 }
957 
958 extern int
onigenc_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)959 onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
960 {
961   int c, len;
962 
963   m17n_mbcput(enc, code, buf);
964   c = m17n_firstbyte(enc, code);
965   len = enc_len(enc, c);
966   return len;
967 }
968 
969 extern int
onigenc_mbc_to_lower(OnigEncoding enc,UChar * p,UChar * buf)970 onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
971 {
972   unsigned int c, low;
973 
974   c   = m17n_codepoint(enc, p, p + enc_len(enc, *p));
975   low = m17n_tolower(enc, c);
976   m17n_mbcput(enc, low, buf);
977 
978   return m17n_codelen(enc, low);
979 }
980 
981 extern int
onigenc_is_mbc_ambiguous(OnigEncoding enc,OnigAmbigType flag,UChar ** pp,UChar * end)982 onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
983                          UChar** pp, UChar* end)
984 {
985   int len;
986   unsigned int c;
987   UChar* p = *pp;
988 
989   len = enc_len(enc, *p);
990   (*pp) += len;
991   c = m17n_codepoint(enc, p, p + len);
992 
993   if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
994     if (m17n_isupper(enc, c) || m17n_islower(enc, c))
995       return TRUE;
996   }
997 
998   return FALSE;
999 }
1000 
1001 extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc,UChar * start,UChar * s)1002 onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
1003 {
1004   UChar *p;
1005   int len;
1006 
1007   if (s <= start) return s;
1008   p = s;
1009 
1010   while (!m17n_islead(enc, *p) && p > start) p--;
1011   while (p + (len = enc_len(enc, *p)) < s) {
1012     p += len;
1013   }
1014   if (p + len == s) return s;
1015   return p;
1016 }
1017 
1018 extern int
onigenc_is_allowed_reverse_match(OnigEncoding enc,const UChar * s,const UChar * end)1019 onigenc_is_allowed_reverse_match(OnigEncoding enc,
1020 				 const UChar* s, const UChar* end)
1021 {
1022   return ONIGENC_IS_SINGLEBYTE(enc);
1023 }
1024 
1025 extern void
onigenc_set_default_caseconv_table(UChar * table)1026 onigenc_set_default_caseconv_table(UChar* table) { }
1027 
1028 #endif /* ONIG_RUBY_M17N */
1029