xref: /PHP-7.2/ext/mbstring/oniguruma/src/regenc.c (revision 0ae2f95b)
1 /**********************************************************************
2   regenc.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
32 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
33 
34 extern int
onigenc_init(void)35 onigenc_init(void)
36 {
37   return 0;
38 }
39 
40 extern int
onig_initialize_encoding(OnigEncoding enc)41 onig_initialize_encoding(OnigEncoding enc)
42 {
43   if (enc->init != 0 && (enc->is_initialized() == 0)) {
44     int r = (enc->init)();
45     return r;
46   }
47 
48   return 0;
49 }
50 
51 extern OnigEncoding
onigenc_get_default_encoding(void)52 onigenc_get_default_encoding(void)
53 {
54   return OnigEncDefaultCharEncoding;
55 }
56 
57 extern int
onigenc_set_default_encoding(OnigEncoding enc)58 onigenc_set_default_encoding(OnigEncoding enc)
59 {
60   OnigEncDefaultCharEncoding = enc;
61   return 0;
62 }
63 
64 extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)65 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
66 {
67   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
68   if (p < s) {
69     p += enclen(enc, p);
70   }
71   return p;
72 }
73 
74 extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,const UChar * start,const UChar * s,const UChar ** prev)75 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
76 				   const UChar* start, const UChar* s, const UChar** prev)
77 {
78   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
79 
80   if (p < s) {
81     if (prev) *prev = (const UChar* )p;
82     p += enclen(enc, p);
83   }
84   else {
85     if (prev) *prev = (const UChar* )NULL; /* Sorry */
86   }
87   return p;
88 }
89 
90 extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc,const UChar * start,const UChar * s)91 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
92 {
93   if (s <= start)
94     return (UChar* )NULL;
95 
96   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
97 }
98 
99 extern UChar*
onigenc_step_back(OnigEncoding enc,const UChar * start,const UChar * s,int n)100 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
101 {
102   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
103     if (s <= start)
104       return (UChar* )NULL;
105 
106     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
107   }
108   return (UChar* )s;
109 }
110 
111 #if 0
112 extern int
113 onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end)
114 {
115   int len;
116   int n;
117 
118   len = ONIGENC_MBC_ENC_LEN(enc, p);
119   n = (int )(end - p);
120 
121   return (n < len ? n : len);
122 }
123 #endif
124 
125 extern UChar*
onigenc_step(OnigEncoding enc,const UChar * p,const UChar * end,int n)126 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
127 {
128   UChar* q = (UChar* )p;
129   while (n-- > 0) {
130     q += ONIGENC_MBC_ENC_LEN(enc, q);
131   }
132   return (q <= end ? q : NULL);
133 }
134 
135 extern int
onigenc_strlen(OnigEncoding enc,const UChar * p,const UChar * end)136 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
137 {
138   int n = 0;
139   UChar* q = (UChar* )p;
140 
141   while (q < end) {
142     q += ONIGENC_MBC_ENC_LEN(enc, q);
143     n++;
144   }
145   return n;
146 }
147 
148 extern int
onigenc_strlen_null(OnigEncoding enc,const UChar * s)149 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
150 {
151   int n = 0;
152   UChar* p = (UChar* )s;
153 
154   while (1) {
155     if (*p == '\0') {
156       UChar* q;
157       int len = ONIGENC_MBC_MINLEN(enc);
158 
159       if (len == 1) return n;
160       q = p + 1;
161       while (len > 1) {
162         if (*q != '\0') break;
163         q++;
164         len--;
165       }
166       if (len == 1) return n;
167     }
168     p += ONIGENC_MBC_ENC_LEN(enc, p);
169     n++;
170   }
171 }
172 
173 extern int
onigenc_str_bytelen_null(OnigEncoding enc,const UChar * s)174 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
175 {
176   UChar* start = (UChar* )s;
177   UChar* p = (UChar* )s;
178 
179   while (1) {
180     if (*p == '\0') {
181       UChar* q;
182       int len = ONIGENC_MBC_MINLEN(enc);
183 
184       if (len == 1) return (int )(p - start);
185       q = p + 1;
186       while (len > 1) {
187         if (*q != '\0') break;
188         q++;
189         len--;
190       }
191       if (len == 1) return (int )(p - start);
192     }
193     p += ONIGENC_MBC_ENC_LEN(enc, p);
194   }
195 }
196 
197 const UChar OnigEncAsciiToLowerCaseTable[] = {
198   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
199   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
200   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
201   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
202   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
203   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
204   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
205   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
206   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
207   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
208   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
209   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
210   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
211   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
212   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
213   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
214   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
215   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
216   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
217   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
218   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
219   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
220   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
221   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
222   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
223   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
224   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
225   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
226   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
227   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
228   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
229   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
230 };
231 
232 #ifdef USE_UPPER_CASE_TABLE
233 const UChar OnigEncAsciiToUpperCaseTable[256] = {
234   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
235   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
236   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
237   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
238   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
239   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
240   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
241   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
242   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
243   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
244   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
245   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
246   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
247   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
248   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
249   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
250   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
251   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
252   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
253   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
254   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
255   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
256   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
257   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
258   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
259   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
260   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
261   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
262   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
263   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
264   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
265   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
266 };
267 #endif
268 
269 const unsigned short OnigEncAsciiCtypeTable[256] = {
270   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
271   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
272   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
273   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
274   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
275   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
276   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
277   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
278   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
279   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
280   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
281   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
282   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
283   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
284   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
285   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
286   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
287   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
288   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
289   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
290   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
291   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
292   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
293   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
294   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
295   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
296   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
297   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
298   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
299   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
300   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
301   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
302 };
303 
304 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
305   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
306   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
307   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
308   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
309   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
310   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
311   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
312   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
313   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
314   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
315   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
316   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
317   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
318   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
319   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
320   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
321   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
322   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
323   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
324   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
325   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
326   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
327   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
328   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
329   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
330   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
331   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
332   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
333   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
334   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
335   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
336   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
337 };
338 
339 #ifdef USE_UPPER_CASE_TABLE
340 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
341   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
342   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
343   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
344   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
345   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
346   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
347   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
348   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
349   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
350   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
351   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
352   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
353   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
354   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
355   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
356   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
357   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
358   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
359   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
360   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
361   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
362   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
363   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
364   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
365   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
366   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
367   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
368   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
369   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
370   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
371   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
372   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
373 };
374 #endif
375 
376 extern void
onigenc_set_default_caseconv_table(const UChar * table ARG_UNUSED)377 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
378 {
379   /* nothing */
380   /* obsoleted. */
381 }
382 
383 extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)384 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
385 {
386   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
387 }
388 
389 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
390   { 0x41, 0x61 },
391   { 0x42, 0x62 },
392   { 0x43, 0x63 },
393   { 0x44, 0x64 },
394   { 0x45, 0x65 },
395   { 0x46, 0x66 },
396   { 0x47, 0x67 },
397   { 0x48, 0x68 },
398   { 0x49, 0x69 },
399   { 0x4a, 0x6a },
400   { 0x4b, 0x6b },
401   { 0x4c, 0x6c },
402   { 0x4d, 0x6d },
403   { 0x4e, 0x6e },
404   { 0x4f, 0x6f },
405   { 0x50, 0x70 },
406   { 0x51, 0x71 },
407   { 0x52, 0x72 },
408   { 0x53, 0x73 },
409   { 0x54, 0x74 },
410   { 0x55, 0x75 },
411   { 0x56, 0x76 },
412   { 0x57, 0x77 },
413   { 0x58, 0x78 },
414   { 0x59, 0x79 },
415   { 0x5a, 0x7a }
416 };
417 
418 extern int
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,OnigApplyAllCaseFoldFunc f,void * arg)419 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
420 				  OnigApplyAllCaseFoldFunc f, void* arg)
421 {
422   OnigCodePoint code;
423   int i, r;
424 
425   for (i = 0;
426        i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
427        i++) {
428     code = OnigAsciiLowerMap[i].to;
429     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
430     if (r != 0) return r;
431 
432     code = OnigAsciiLowerMap[i].from;
433     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
434     if (r != 0) return r;
435   }
436 
437   return 0;
438 }
439 
440 extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,const OnigUChar * p,const OnigUChar * end ARG_UNUSED,OnigCaseFoldCodeItem items[])441 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
442 	 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
443 	 OnigCaseFoldCodeItem items[])
444 {
445   if (0x41 <= *p && *p <= 0x5a) {
446     items[0].byte_len = 1;
447     items[0].code_len = 1;
448     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
449     return 1;
450   }
451   else if (0x61 <= *p && *p <= 0x7a) {
452     items[0].byte_len = 1;
453     items[0].code_len = 1;
454     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
455     return 1;
456   }
457   else
458     return 0;
459 }
460 
461 static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,OnigApplyAllCaseFoldFunc f,void * arg)462 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
463 		       OnigApplyAllCaseFoldFunc f, void* arg)
464 {
465   static OnigCodePoint ss[] = { 0x73, 0x73 };
466 
467   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
468 }
469 
470 extern int
onigenc_apply_all_case_fold_with_map(int map_size,const OnigPairCaseFoldCodes map[],int ess_tsett_flag,OnigCaseFoldType flag,OnigApplyAllCaseFoldFunc f,void * arg)471 onigenc_apply_all_case_fold_with_map(int map_size,
472     const OnigPairCaseFoldCodes map[],
473     int ess_tsett_flag, OnigCaseFoldType flag,
474     OnigApplyAllCaseFoldFunc f, void* arg)
475 {
476   OnigCodePoint code;
477   int i, r;
478 
479   r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
480   if (r != 0) return r;
481 
482   for (i = 0; i < map_size; i++) {
483     code = map[i].to;
484     r = (*f)(map[i].from, &code, 1, arg);
485     if (r != 0) return r;
486 
487     code = map[i].from;
488     r = (*f)(map[i].to, &code, 1, arg);
489     if (r != 0) return r;
490   }
491 
492   if (ess_tsett_flag != 0)
493     return ss_apply_all_case_fold(flag, f, arg);
494 
495   return 0;
496 }
497 
498 extern int
onigenc_get_case_fold_codes_by_str_with_map(int map_size,const OnigPairCaseFoldCodes map[],int ess_tsett_flag,OnigCaseFoldType flag ARG_UNUSED,const OnigUChar * p,const OnigUChar * end,OnigCaseFoldCodeItem items[])499 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
500     const OnigPairCaseFoldCodes map[],
501     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
502     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
503 {
504   if (0x41 <= *p && *p <= 0x5a) {
505     items[0].byte_len = 1;
506     items[0].code_len = 1;
507     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
508     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
509 	&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
510       /* SS */
511       items[1].byte_len = 2;
512       items[1].code_len = 1;
513       items[1].code[0] = (OnigCodePoint )0xdf;
514       return 2;
515     }
516     else
517       return 1;
518   }
519   else if (0x61 <= *p && *p <= 0x7a) {
520     items[0].byte_len = 1;
521     items[0].code_len = 1;
522     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
523     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
524 	&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
525       /* ss */
526       items[1].byte_len = 2;
527       items[1].code_len = 1;
528       items[1].code[0] = (OnigCodePoint )0xdf;
529       return 2;
530     }
531     else
532       return 1;
533   }
534   else if (*p == 0xdf && ess_tsett_flag != 0) {
535     items[0].byte_len = 1;
536     items[0].code_len = 2;
537     items[0].code[0] = (OnigCodePoint )'s';
538     items[0].code[1] = (OnigCodePoint )'s';
539 
540     items[1].byte_len = 1;
541     items[1].code_len = 2;
542     items[1].code[0] = (OnigCodePoint )'S';
543     items[1].code[1] = (OnigCodePoint )'S';
544 
545     items[2].byte_len = 1;
546     items[2].code_len = 2;
547     items[2].code[0] = (OnigCodePoint )'s';
548     items[2].code[1] = (OnigCodePoint )'S';
549 
550     items[3].byte_len = 1;
551     items[3].code_len = 2;
552     items[3].code[0] = (OnigCodePoint )'S';
553     items[3].code[1] = (OnigCodePoint )'s';
554 
555     return 4;
556   }
557   else {
558     int i;
559 
560     for (i = 0; i < map_size; i++) {
561       if (*p == map[i].from) {
562 	items[0].byte_len = 1;
563 	items[0].code_len = 1;
564 	items[0].code[0] = map[i].to;
565 	return 1;
566       }
567       else if (*p == map[i].to) {
568 	items[0].byte_len = 1;
569 	items[0].code_len = 1;
570 	items[0].code[0] = map[i].from;
571 	return 1;
572       }
573     }
574   }
575 
576   return 0;
577 }
578 
579 
580 extern int
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,OnigCodePoint * sb_out ARG_UNUSED,const OnigCodePoint * ranges[]ARG_UNUSED)581 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
582 	 OnigCodePoint* sb_out ARG_UNUSED,
583 	 const OnigCodePoint* ranges[] ARG_UNUSED)
584 {
585   return ONIG_NO_SUPPORT_CONFIG;
586 }
587 
588 extern int
onigenc_is_mbc_newline_0x0a(const UChar * p,const UChar * end)589 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
590 {
591   if (p < end) {
592     if (*p == 0x0a) return 1;
593   }
594   return 0;
595 }
596 
597 /* for single byte encodings */
598 extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,const UChar ** p,const UChar * end ARG_UNUSED,UChar * lower)599 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
600 	    const UChar*end ARG_UNUSED, UChar* lower)
601 {
602   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
603 
604   (*p)++;
605   return 1; /* return byte length of converted char to lower */
606 }
607 
608 #if 0
609 extern int
610 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
611 			       const UChar** pp, const UChar* end)
612 {
613   const UChar* p = *pp;
614 
615   (*pp)++;
616   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
617 }
618 #endif
619 
620 extern int
onigenc_single_byte_mbc_enc_len(const UChar * p ARG_UNUSED)621 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
622 {
623   return 1;
624 }
625 
626 extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar * p,const UChar * end ARG_UNUSED)627 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
628 {
629   return (OnigCodePoint )(*p);
630 }
631 
632 extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)633 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
634 {
635   return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
636 }
637 
638 extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code,UChar * buf)639 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
640 {
641   *buf = (UChar )(code & 0xff);
642   return 1;
643 }
644 
645 extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar * start ARG_UNUSED,const UChar * s)646 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
647 					  const UChar* s)
648 {
649   return (UChar* )s;
650 }
651 
652 extern int
onigenc_always_true_is_allowed_reverse_match(const UChar * s ARG_UNUSED,const UChar * end ARG_UNUSED)653 onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
654 					     const UChar* end ARG_UNUSED)
655 {
656   return TRUE;
657 }
658 
659 extern int
onigenc_always_false_is_allowed_reverse_match(const UChar * s ARG_UNUSED,const UChar * end ARG_UNUSED)660 onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
661 					      const UChar* end ARG_UNUSED)
662 {
663   return FALSE;
664 }
665 
666 extern int
onigenc_always_true_is_valid_mbc_string(const UChar * s ARG_UNUSED,const UChar * end ARG_UNUSED)667 onigenc_always_true_is_valid_mbc_string(const UChar* s   ARG_UNUSED,
668 					const UChar* end ARG_UNUSED)
669 {
670   return TRUE;
671 }
672 
673 extern int
onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,const UChar * p,const UChar * end)674 onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
675 					 const UChar* p, const UChar* end)
676 {
677   while (p < end) {
678     p += enclen(enc, p);
679   }
680 
681   if (p != end)
682     return FALSE;
683   else
684     return TRUE;
685 }
686 
687 extern int
onigenc_is_valid_mbc_string(OnigEncoding enc,const UChar * s,const UChar * end)688 onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end)
689 {
690   return ONIGENC_IS_VALID_MBC_STRING(enc, s, end);
691 }
692 
693 extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc,const UChar * p,const UChar * end)694 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
695 {
696   int c, i, len;
697   OnigCodePoint n;
698 
699   len = enclen(enc, p);
700   n = (OnigCodePoint )(*p++);
701   if (len == 1) return n;
702 
703   for (i = 1; i < len; i++) {
704     if (p >= end) break;
705     c = *p++;
706     n <<= 8;  n += c;
707   }
708   return n;
709 }
710 
711 extern int
onigenc_mbn_mbc_case_fold(OnigEncoding enc,OnigCaseFoldType flag ARG_UNUSED,const UChar ** pp,const UChar * end ARG_UNUSED,UChar * lower)712 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
713                           const UChar** pp, const UChar* end ARG_UNUSED,
714 			  UChar* lower)
715 {
716   int len;
717   const UChar *p = *pp;
718 
719   if (ONIGENC_IS_MBC_ASCII(p)) {
720     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
721     (*pp)++;
722     return 1;
723   }
724   else {
725     int i;
726 
727     len = enclen(enc, p);
728     for (i = 0; i < len; i++) {
729       *lower++ = *p++;
730     }
731     (*pp) += len;
732     return len; /* return byte length of converted to lower char */
733   }
734 }
735 
736 #if 0
737 extern int
738 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
739                              const UChar** pp, const UChar* end)
740 {
741   const UChar* p = *pp;
742 
743   if (ONIGENC_IS_MBC_ASCII(p)) {
744     (*pp)++;
745     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
746   }
747 
748   (*pp) += enclen(enc, p);
749   return FALSE;
750 }
751 #endif
752 
753 extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)754 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
755 {
756   if ((code & 0xff00) != 0) return 2;
757   else return 1;
758 }
759 
760 extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)761 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
762 {
763        if ((code & 0xff000000) != 0) return 4;
764   else if ((code & 0xff0000) != 0) return 3;
765   else if ((code & 0xff00) != 0) return 2;
766   else return 1;
767 }
768 
769 extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)770 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
771 {
772   UChar *p = buf;
773 
774   if ((code & 0xff00) != 0) {
775     *p++ = (UChar )((code >>  8) & 0xff);
776   }
777   *p++ = (UChar )(code & 0xff);
778 
779 #if 1
780   if (enclen(enc, buf) != (p - buf))
781     return ONIGERR_INVALID_CODE_POINT_VALUE;
782 #endif
783   return p - buf;
784 }
785 
786 extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)787 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
788 {
789   UChar *p = buf;
790 
791   if ((code & 0xff000000) != 0) {
792     *p++ = (UChar )((code >> 24) & 0xff);
793   }
794   if ((code & 0xff0000) != 0 || p != buf) {
795     *p++ = (UChar )((code >> 16) & 0xff);
796   }
797   if ((code & 0xff00) != 0 || p != buf) {
798     *p++ = (UChar )((code >> 8) & 0xff);
799   }
800   *p++ = (UChar )(code & 0xff);
801 
802 #if 1
803   if (enclen(enc, buf) != (p - buf))
804     return ONIGERR_INVALID_CODE_POINT_VALUE;
805 #endif
806   return p - buf;
807 }
808 
809 extern int
onigenc_minimum_property_name_to_ctype(OnigEncoding enc,UChar * p,UChar * end)810 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
811 {
812   static PosixBracketEntryType PBS[] = {
813     { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
814     { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
815     { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
816     { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
817     { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
818     { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
819     { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
820     { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
821     { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
822     { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
823     { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
824     { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
825     { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
826     { (UChar* )"Word",   ONIGENC_CTYPE_WORD,   4 },
827     { (UChar* )NULL, -1, 0 }
828   };
829 
830   PosixBracketEntryType *pb;
831   int len;
832 
833   len = onigenc_strlen(enc, p, end);
834   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
835     if (len == pb->len &&
836         onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
837       return pb->ctype;
838   }
839 
840   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
841 }
842 
843 extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)844 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
845 			  unsigned int ctype)
846 {
847   if (code < 128)
848     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
849   else {
850     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
851       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
852     }
853   }
854 
855   return FALSE;
856 }
857 
858 extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)859 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
860 			  unsigned int ctype)
861 {
862   if (code < 128)
863     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
864   else {
865     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
866       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
867     }
868   }
869 
870   return FALSE;
871 }
872 
873 extern int
onigenc_with_ascii_strncmp(OnigEncoding enc,const UChar * p,const UChar * end,const UChar * sascii,int n)874 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
875                            const UChar* sascii /* ascii */, int n)
876 {
877   int x, c;
878 
879   while (n-- > 0) {
880     if (p >= end) return (int )(*sascii);
881 
882     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
883     x = *sascii - c;
884     if (x) return x;
885 
886     sascii++;
887     p += enclen(enc, p);
888   }
889   return 0;
890 }
891 
892 extern int
onig_codes_cmp(OnigCodePoint a[],OnigCodePoint b[],int n)893 onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n)
894 {
895   int i;
896 
897   for (i = 0; i < n; i++) {
898     if (a[i] != b[i])
899       return -1;
900   }
901 
902   return 0;
903 }
904 
905 extern int
onig_codes_byte_at(OnigCodePoint codes[],int at)906 onig_codes_byte_at(OnigCodePoint codes[], int at)
907 {
908   int index;
909   int b;
910   OnigCodePoint code;
911 
912   index = at / 3;
913   b     = at % 3;
914   code = codes[index];
915 
916   return ((code >> ((2 - b) * 8)) & 0xff);
917 }
918