1 /**********************************************************************
2 unicode.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
33 ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
34
35 static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
36 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
37 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
38 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
39 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
40 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
41 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
42 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
43 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
44 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
45 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
46 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
47 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
48 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
49 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
50 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
51 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
52 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
53 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
54 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
55 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
56 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
57 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
58 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
59 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
60 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
61 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
62 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
63 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
64 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
65 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
66 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
67 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
68 };
69
70 #ifdef USE_UNICODE_PROPERTIES
71 #include "unicode_property_data.c"
72 #else
73 #include "unicode_property_data_posix.c"
74 #endif
75
76 #include "st.h"
77
78 #define USER_DEFINED_PROPERTY_MAX_NUM 20
79
80 typedef struct {
81 int ctype;
82 OnigCodePoint* ranges;
83 } UserDefinedPropertyValue;
84
85 static int UserDefinedPropertyNum;
86 static UserDefinedPropertyValue
87 UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM];
88 static st_table* UserDefinedPropertyTable;
89
90 extern int
onig_unicode_define_user_property(const char * name,OnigCodePoint * ranges)91 onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
92 {
93 UserDefinedPropertyValue* e;
94 int i;
95 int n;
96 int len;
97 int c;
98 char* s;
99
100 if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
101 return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
102
103 len = strlen(name);
104 if (len >= PROPERTY_NAME_MAX_SIZE)
105 return ONIGERR_TOO_LONG_PROPERTY_NAME;
106
107 s = (char* )xmalloc(len + 1);
108 if (s == 0)
109 return ONIGERR_MEMORY;
110
111 n = 0;
112 for (i = 0; i < len; i++) {
113 c = name[i];
114 if (c <= 0 || c >= 0x80) {
115 xfree(s);
116 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
117 }
118
119 if (c != ' ' && c != '-' && c != '_') {
120 s[n] = c;
121 n++;
122 }
123 }
124 s[n] = '\0';
125
126 if (UserDefinedPropertyTable == 0) {
127 UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);
128 }
129
130 e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
131 e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;
132 e->ranges = ranges;
133 onig_st_insert_strend(UserDefinedPropertyTable,
134 (const UChar* )s, (const UChar* )s + n,
135 (hash_data_type )((void* )e));
136
137 UserDefinedPropertyNum++;
138 return 0;
139 }
140
141 extern int
onigenc_unicode_is_code_ctype(OnigCodePoint code,unsigned int ctype)142 onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
143 {
144 if (
145 #ifdef USE_UNICODE_PROPERTIES
146 ctype <= ONIGENC_MAX_STD_CTYPE &&
147 #endif
148 code < 256) {
149 return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
150 }
151
152 if (ctype >= CODE_RANGES_NUM) {
153 int index = ctype - CODE_RANGES_NUM;
154 if (index < UserDefinedPropertyNum)
155 return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code);
156 else
157 return ONIGERR_TYPE_BUG;
158 }
159
160 return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
161 }
162
163
164 extern int
onigenc_unicode_ctype_code_range(int ctype,const OnigCodePoint * ranges[])165 onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
166 {
167 if (ctype >= CODE_RANGES_NUM) {
168 int index = ctype - CODE_RANGES_NUM;
169 if (index < UserDefinedPropertyNum) {
170 *ranges = UserDefinedPropertyRanges[index].ranges;
171 return 0;
172 }
173 else
174 return ONIGERR_TYPE_BUG;
175 }
176
177 *ranges = CodeRanges[ctype];
178 return 0;
179 }
180
181 extern int
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype,OnigCodePoint * sb_out,const OnigCodePoint * ranges[])182 onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
183 const OnigCodePoint* ranges[])
184 {
185 *sb_out = 0x00;
186 return onigenc_unicode_ctype_code_range(ctype, ranges);
187 }
188
189 extern int
onigenc_unicode_property_name_to_ctype(OnigEncoding enc,UChar * name,UChar * end)190 onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
191 {
192 int len;
193 UChar *p;
194 OnigCodePoint code;
195 const struct PropertyNameCtype* pc;
196 char buf[PROPERTY_NAME_MAX_SIZE];
197
198 p = name;
199 len = 0;
200 while (p < end) {
201 code = ONIGENC_MBC_TO_CODE(enc, p, end);
202 if (code >= 0x80)
203 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
204
205 if (code != ' ' && code != '-' && code != '_') {
206 buf[len++] = (char )code;
207 if (len >= PROPERTY_NAME_MAX_SIZE)
208 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
209 }
210
211 p += enclen(enc, p);
212 }
213
214 buf[len] = 0;
215
216 if (UserDefinedPropertyTable != 0) {
217 UserDefinedPropertyValue* e;
218 e = (UserDefinedPropertyValue* )NULL;
219 onig_st_lookup_strend(UserDefinedPropertyTable,
220 (const UChar* )buf, (const UChar* )buf + len,
221 (hash_data_type* )((void* )(&e)));
222 if (e != 0) {
223 return e->ctype;
224 }
225 }
226
227 pc = unicode_lookup_property_name(buf, len);
228 if (pc != 0) {
229 /* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */
230 #ifndef USE_UNICODE_PROPERTIES
231 if (pc->ctype > ONIGENC_MAX_STD_CTYPE)
232 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
233 #endif
234
235 return pc->ctype;
236 }
237
238 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
239 }
240
241 /* for use macros in unicode_fold_data.c */
242 #include "unicode_fold_data.c"
243
244
245 extern int
onigenc_unicode_mbc_case_fold(OnigEncoding enc,OnigCaseFoldType flag ARG_UNUSED,const UChar ** pp,const UChar * end,UChar * fold)246 onigenc_unicode_mbc_case_fold(OnigEncoding enc,
247 OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
248 UChar* fold)
249 {
250 const struct ByUnfoldKey* buk;
251
252 OnigCodePoint code;
253 int i, len, rlen;
254 const UChar *p = *pp;
255
256 code = ONIGENC_MBC_TO_CODE(enc, p, end);
257 len = enclen(enc, p);
258 *pp += len;
259
260 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
261 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
262 if (code == 0x0130) {
263 return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
264 }
265 #if 0
266 if (code == 0x0049) {
267 return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
268 }
269 #endif
270 }
271 #endif
272
273 buk = unicode_unfold_key(code);
274 if (buk != 0) {
275 if (buk->fold_len == 1) {
276 return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
277 }
278 else {
279 OnigCodePoint* addr;
280
281 FOLDS_FOLD_ADDR_BUK(buk, addr);
282 rlen = 0;
283 for (i = 0; i < buk->fold_len; i++) {
284 OnigCodePoint c = addr[i];
285 len = ONIGENC_CODE_TO_MBC(enc, c, fold);
286 fold += len;
287 rlen += len;
288 }
289 return rlen;
290 }
291 }
292
293 for (i = 0; i < len; i++) {
294 *fold++ = *p++;
295 }
296 return len;
297 }
298
299 static int
apply_case_fold1(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)300 apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
301 {
302 int i, j, k, n, r;
303
304 for (i = from; i < to; ) {
305 OnigCodePoint fold = *FOLDS1_FOLD(i);
306 n = FOLDS1_UNFOLDS_NUM(i);
307 for (j = 0; j < n; j++) {
308 OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];
309
310 r = (*f)(fold, &unfold, 1, arg);
311 if (r != 0) return r;
312 r = (*f)(unfold, &fold, 1, arg);
313 if (r != 0) return r;
314
315 for (k = 0; k < j; k++) {
316 OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];
317 r = (*f)(unfold, &unfold2, 1, arg);
318 if (r != 0) return r;
319 r = (*f)(unfold2, &unfold, 1, arg);
320 if (r != 0) return r;
321 }
322 }
323
324 i = FOLDS1_NEXT_INDEX(i);
325 }
326
327 return 0;
328 }
329
330 static int
apply_case_fold2(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)331 apply_case_fold2(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
332 {
333 int i, j, k, n, r;
334
335 for (i = from; i < to; ) {
336 OnigCodePoint* fold = FOLDS2_FOLD(i);
337 n = FOLDS2_UNFOLDS_NUM(i);
338 for (j = 0; j < n; j++) {
339 OnigCodePoint unfold = FOLDS2_UNFOLDS(i)[j];
340
341 r = (*f)(unfold, fold, 2, arg);
342 if (r != 0) return r;
343
344 for (k = 0; k < j; k++) {
345 OnigCodePoint unfold2 = FOLDS2_UNFOLDS(i)[k];
346 r = (*f)(unfold, &unfold2, 1, arg);
347 if (r != 0) return r;
348 r = (*f)(unfold2, &unfold, 1, arg);
349 if (r != 0) return r;
350 }
351 }
352
353 i = FOLDS2_NEXT_INDEX(i);
354 }
355
356 return 0;
357 }
358
359 static int
apply_case_fold3(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)360 apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
361 {
362 int i, j, k, n, r;
363
364 for (i = from; i < to; ) {
365 OnigCodePoint* fold = FOLDS3_FOLD(i);
366 n = FOLDS3_UNFOLDS_NUM(i);
367 for (j = 0; j < n; j++) {
368 OnigCodePoint unfold = FOLDS3_UNFOLDS(i)[j];
369
370 r = (*f)(unfold, fold, 3, arg);
371 if (r != 0) return r;
372
373 for (k = 0; k < j; k++) {
374 OnigCodePoint unfold2 = FOLDS3_UNFOLDS(i)[k];
375 r = (*f)(unfold, &unfold2, 1, arg);
376 if (r != 0) return r;
377 r = (*f)(unfold2, &unfold, 1, arg);
378 if (r != 0) return r;
379 }
380 }
381
382 i = FOLDS3_NEXT_INDEX(i);
383 }
384
385 return 0;
386 }
387
388 extern int
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,OnigApplyAllCaseFoldFunc f,void * arg)389 onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
390 OnigApplyAllCaseFoldFunc f, void* arg)
391 {
392 int r;
393
394 r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);
395 if (r != 0) return r;
396
397 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
398 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
399 code = 0x0131;
400 r = (*f)(0x0049, &code, 1, arg);
401 if (r != 0) return r;
402 code = 0x0049;
403 r = (*f)(0x0131, &code, 1, arg);
404 if (r != 0) return r;
405
406 code = 0x0130;
407 r = (*f)(0x0069, &code, 1, arg);
408 if (r != 0) return r;
409 code = 0x0069;
410 r = (*f)(0x0130, &code, 1, arg);
411 if (r != 0) return r;
412 }
413 else {
414 #endif
415 r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
416 if (r != 0) return r;
417 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
418 }
419 #endif
420
421 if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
422 return 0;
423
424 r = apply_case_fold2(0, FOLDS2_NORMAL_END_INDEX, f, arg);
425 if (r != 0) return r;
426
427 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
428 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
429 #endif
430 r = apply_case_fold2(FOLDS2_NORMAL_END_INDEX, FOLDS2_END_INDEX, f, arg);
431 if (r != 0) return r;
432 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
433 }
434 #endif
435
436 r = apply_case_fold3(0, FOLDS3_NORMAL_END_INDEX, f, arg);
437 if (r != 0) return r;
438
439 return 0;
440 }
441
442 extern int
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,OnigCaseFoldType flag,const OnigUChar * p,const OnigUChar * end,OnigCaseFoldCodeItem items[])443 onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
444 OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
445 OnigCaseFoldCodeItem items[])
446 {
447 int n, m, i, j, k, len;
448 OnigCodePoint code, codes[3];
449 const struct ByUnfoldKey* buk;
450
451 n = 0;
452
453 code = ONIGENC_MBC_TO_CODE(enc, p, end);
454 len = enclen(enc, p);
455
456 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
457 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
458 if (code == 0x0049) {
459 items[0].byte_len = len;
460 items[0].code_len = 1;
461 items[0].code[0] = 0x0131;
462 return 1;
463 }
464 else if (code == 0x0130) {
465 items[0].byte_len = len;
466 items[0].code_len = 1;
467 items[0].code[0] = 0x0069;
468 return 1;
469 }
470 else if (code == 0x0131) {
471 items[0].byte_len = len;
472 items[0].code_len = 1;
473 items[0].code[0] = 0x0049;
474 return 1;
475 }
476 else if (code == 0x0069) {
477 items[0].byte_len = len;
478 items[0].code_len = 1;
479 items[0].code[0] = 0x0130;
480 return 1;
481 }
482 }
483 #endif
484
485 buk = unicode_unfold_key(code);
486 if (buk != 0) {
487 if (buk->fold_len == 1) {
488 int un;
489 items[0].byte_len = len;
490 items[0].code_len = 1;
491 items[0].code[0] = *FOLDS1_FOLD(buk->index);
492 n++;
493
494 un = FOLDS1_UNFOLDS_NUM(buk->index);
495 for (i = 0; i < un; i++) {
496 OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];
497 if (unfold != code) {
498 items[n].byte_len = len;
499 items[n].code_len = 1;
500 items[n].code[0] = unfold;
501 n++;
502 }
503 }
504 code = items[0].code[0]; // for multi-code to unfold search.
505 }
506 else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
507 OnigCodePoint cs[3][4];
508 int fn, ncs[3];
509
510 if (buk->fold_len == 2) {
511 m = FOLDS2_UNFOLDS_NUM(buk->index);
512 for (i = 0; i < m; i++) {
513 OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];
514 if (unfold == code) continue;
515
516 items[n].byte_len = len;
517 items[n].code_len = 1;
518 items[n].code[0] = unfold;
519 n++;
520 }
521
522 for (fn = 0; fn < 2; fn++) {
523 int index;
524 cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
525 index = unicode_fold1_key(&cs[fn][0]);
526 if (index >= 0) {
527 int m = FOLDS1_UNFOLDS_NUM(index);
528 for (i = 0; i < m; i++) {
529 cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
530 }
531 ncs[fn] = m + 1;
532 }
533 else
534 ncs[fn] = 1;
535 }
536
537 for (i = 0; i < ncs[0]; i++) {
538 for (j = 0; j < ncs[1]; j++) {
539 items[n].byte_len = len;
540 items[n].code_len = 2;
541 items[n].code[0] = cs[0][i];
542 items[n].code[1] = cs[1][j];
543 n++;
544 }
545 }
546 }
547 else { /* fold_len == 3 */
548 m = FOLDS3_UNFOLDS_NUM(buk->index);
549 for (i = 0; i < m; i++) {
550 OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];
551 if (unfold == code) continue;
552
553 items[n].byte_len = len;
554 items[n].code_len = 1;
555 items[n].code[0] = unfold;
556 n++;
557 }
558
559 for (fn = 0; fn < 3; fn++) {
560 int index;
561 cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
562 index = unicode_fold1_key(&cs[fn][0]);
563 if (index >= 0) {
564 int m = FOLDS1_UNFOLDS_NUM(index);
565 for (i = 0; i < m; i++) {
566 cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
567 }
568 ncs[fn] = m + 1;
569 }
570 else
571 ncs[fn] = 1;
572 }
573
574 for (i = 0; i < ncs[0]; i++) {
575 for (j = 0; j < ncs[1]; j++) {
576 for (k = 0; k < ncs[2]; k++) {
577 items[n].byte_len = len;
578 items[n].code_len = 3;
579 items[n].code[0] = cs[0][i];
580 items[n].code[1] = cs[1][j];
581 items[n].code[2] = cs[2][k];
582 n++;
583 }
584 }
585 }
586 }
587
588 /* multi char folded code is not head of another folded multi char */
589 return n;
590 }
591 }
592 else {
593 int index = unicode_fold1_key(&code);
594 if (index >= 0) {
595 int m = FOLDS1_UNFOLDS_NUM(index);
596 for (i = 0; i < m; i++) {
597 items[n].byte_len = len;
598 items[n].code_len = 1;
599 items[n].code[0] = FOLDS1_UNFOLDS(index)[i];
600 n++;
601 }
602 }
603 }
604
605 if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
606 return n;
607
608 p += len;
609 if (p < end) {
610 int clen;
611 int index;
612
613 codes[0] = code;
614 code = ONIGENC_MBC_TO_CODE(enc, p, end);
615
616 buk = unicode_unfold_key(code);
617 if (buk != 0 && buk->fold_len == 1) {
618 codes[1] = *FOLDS1_FOLD(buk->index);
619 }
620 else
621 codes[1] = code;
622
623 clen = enclen(enc, p);
624 len += clen;
625
626 index = unicode_fold2_key(codes);
627 if (index >= 0) {
628 m = FOLDS2_UNFOLDS_NUM(index);
629 for (i = 0; i < m; i++) {
630 items[n].byte_len = len;
631 items[n].code_len = 1;
632 items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
633 n++;
634 }
635 }
636
637 p += clen;
638 if (p < end) {
639 code = ONIGENC_MBC_TO_CODE(enc, p, end);
640 buk = unicode_unfold_key(code);
641 if (buk != 0 && buk->fold_len == 1) {
642 codes[2] = *FOLDS1_FOLD(buk->index);
643 }
644 else
645 codes[2] = code;
646
647 clen = enclen(enc, p);
648 len += clen;
649
650 index = unicode_fold3_key(codes);
651 if (index >= 0) {
652 m = FOLDS3_UNFOLDS_NUM(index);
653 for (i = 0; i < m; i++) {
654 items[n].byte_len = len;
655 items[n].code_len = 1;
656 items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
657 n++;
658 }
659 }
660 }
661 }
662
663 return n;
664 }
665