xref: /PHP-7.2/ext/mbstring/oniguruma/src/unicode.c (revision 0ae2f95b)
1 /**********************************************************************
2   unicode.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
32 #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
33   ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
34 
35 static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
36   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
37   0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
38   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
39   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
40   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
41   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
42   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
43   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
44   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
45   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
46   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
47   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
48   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
49   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
50   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
51   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
52   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
53   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
54   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
55   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
56   0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
57   0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
58   0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
59   0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
60   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
61   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
62   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
63   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
64   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
65   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
66   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
67   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
68 };
69 
70 #ifdef USE_UNICODE_PROPERTIES
71 #include "unicode_property_data.c"
72 #else
73 #include "unicode_property_data_posix.c"
74 #endif
75 
76 #include "st.h"
77 
78 #define USER_DEFINED_PROPERTY_MAX_NUM  20
79 
80 typedef struct {
81   int ctype;
82   OnigCodePoint* ranges;
83 } UserDefinedPropertyValue;
84 
85 static int UserDefinedPropertyNum;
86 static UserDefinedPropertyValue
87 UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM];
88 static st_table* UserDefinedPropertyTable;
89 
90 extern int
onig_unicode_define_user_property(const char * name,OnigCodePoint * ranges)91 onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
92 {
93   UserDefinedPropertyValue* e;
94   int i;
95   int n;
96   int len;
97   int c;
98   char* s;
99 
100   if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
101     return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
102 
103   len = strlen(name);
104   if (len >= PROPERTY_NAME_MAX_SIZE)
105     return ONIGERR_TOO_LONG_PROPERTY_NAME;
106 
107   s = (char* )xmalloc(len + 1);
108   if (s == 0)
109     return ONIGERR_MEMORY;
110 
111   n = 0;
112   for (i = 0; i < len; i++) {
113     c = name[i];
114     if (c <= 0 || c >= 0x80) {
115       xfree(s);
116       return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
117     }
118 
119     if (c != ' ' && c != '-' && c != '_') {
120       s[n] = c;
121       n++;
122     }
123   }
124   s[n] = '\0';
125 
126   if (UserDefinedPropertyTable == 0) {
127     UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);
128   }
129 
130   e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
131   e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;
132   e->ranges = ranges;
133   onig_st_insert_strend(UserDefinedPropertyTable,
134                         (const UChar* )s, (const UChar* )s + n,
135                         (hash_data_type )((void* )e));
136 
137   UserDefinedPropertyNum++;
138   return 0;
139 }
140 
141 extern int
onigenc_unicode_is_code_ctype(OnigCodePoint code,unsigned int ctype)142 onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
143 {
144   if (
145 #ifdef USE_UNICODE_PROPERTIES
146       ctype <= ONIGENC_MAX_STD_CTYPE &&
147 #endif
148       code < 256) {
149     return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
150   }
151 
152   if (ctype >= CODE_RANGES_NUM) {
153     int index = ctype - CODE_RANGES_NUM;
154     if (index < UserDefinedPropertyNum)
155       return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code);
156     else
157       return ONIGERR_TYPE_BUG;
158   }
159 
160   return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
161 }
162 
163 
164 extern int
onigenc_unicode_ctype_code_range(int ctype,const OnigCodePoint * ranges[])165 onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
166 {
167   if (ctype >= CODE_RANGES_NUM) {
168     int index = ctype - CODE_RANGES_NUM;
169     if (index < UserDefinedPropertyNum) {
170       *ranges = UserDefinedPropertyRanges[index].ranges;
171       return 0;
172     }
173     else
174       return ONIGERR_TYPE_BUG;
175   }
176 
177   *ranges = CodeRanges[ctype];
178   return 0;
179 }
180 
181 extern int
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype,OnigCodePoint * sb_out,const OnigCodePoint * ranges[])182 onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
183                                       const OnigCodePoint* ranges[])
184 {
185   *sb_out = 0x00;
186   return onigenc_unicode_ctype_code_range(ctype, ranges);
187 }
188 
189 extern int
onigenc_unicode_property_name_to_ctype(OnigEncoding enc,UChar * name,UChar * end)190 onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
191 {
192   int len;
193   UChar *p;
194   OnigCodePoint code;
195   const struct PropertyNameCtype* pc;
196   char buf[PROPERTY_NAME_MAX_SIZE];
197 
198   p = name;
199   len = 0;
200   while (p < end) {
201     code = ONIGENC_MBC_TO_CODE(enc, p, end);
202     if (code >= 0x80)
203       return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
204 
205     if (code != ' ' && code != '-' && code != '_') {
206       buf[len++] = (char )code;
207       if (len >= PROPERTY_NAME_MAX_SIZE)
208         return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
209     }
210 
211     p += enclen(enc, p);
212   }
213 
214   buf[len] = 0;
215 
216   if (UserDefinedPropertyTable != 0) {
217     UserDefinedPropertyValue* e;
218     e = (UserDefinedPropertyValue* )NULL;
219     onig_st_lookup_strend(UserDefinedPropertyTable,
220 			  (const UChar* )buf, (const UChar* )buf + len,
221 			  (hash_data_type* )((void* )(&e)));
222     if (e != 0) {
223       return e->ctype;
224     }
225   }
226 
227   pc = unicode_lookup_property_name(buf, len);
228   if (pc != 0) {
229     /* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */
230 #ifndef USE_UNICODE_PROPERTIES
231     if (pc->ctype > ONIGENC_MAX_STD_CTYPE)
232       return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
233 #endif
234 
235     return pc->ctype;
236   }
237 
238   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
239 }
240 
241 /* for use macros in unicode_fold_data.c */
242 #include "unicode_fold_data.c"
243 
244 
245 extern int
onigenc_unicode_mbc_case_fold(OnigEncoding enc,OnigCaseFoldType flag ARG_UNUSED,const UChar ** pp,const UChar * end,UChar * fold)246 onigenc_unicode_mbc_case_fold(OnigEncoding enc,
247     OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
248     UChar* fold)
249 {
250   const struct ByUnfoldKey* buk;
251 
252   OnigCodePoint code;
253   int i, len, rlen;
254   const UChar *p = *pp;
255 
256   code = ONIGENC_MBC_TO_CODE(enc, p, end);
257   len = enclen(enc, p);
258   *pp += len;
259 
260 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
261   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
262     if (code == 0x0130) {
263       return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
264     }
265 #if 0
266     if (code == 0x0049) {
267       return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
268     }
269 #endif
270   }
271 #endif
272 
273   buk = unicode_unfold_key(code);
274   if (buk != 0) {
275     if (buk->fold_len == 1) {
276       return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
277     }
278     else {
279       OnigCodePoint* addr;
280 
281       FOLDS_FOLD_ADDR_BUK(buk, addr);
282       rlen = 0;
283       for (i = 0; i < buk->fold_len; i++) {
284         OnigCodePoint c = addr[i];
285         len = ONIGENC_CODE_TO_MBC(enc, c, fold);
286         fold += len;
287         rlen += len;
288       }
289       return rlen;
290     }
291   }
292 
293   for (i = 0; i < len; i++) {
294     *fold++ = *p++;
295   }
296   return len;
297 }
298 
299 static int
apply_case_fold1(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)300 apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
301 {
302   int i, j, k, n, r;
303 
304   for (i = from; i < to; ) {
305     OnigCodePoint fold = *FOLDS1_FOLD(i);
306     n = FOLDS1_UNFOLDS_NUM(i);
307     for (j = 0; j < n; j++) {
308       OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];
309 
310       r = (*f)(fold, &unfold, 1, arg);
311       if (r != 0) return r;
312       r = (*f)(unfold, &fold, 1, arg);
313       if (r != 0) return r;
314 
315       for (k = 0; k < j; k++) {
316         OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];
317         r = (*f)(unfold, &unfold2, 1, arg);
318         if (r != 0) return r;
319         r = (*f)(unfold2, &unfold, 1, arg);
320         if (r != 0) return r;
321       }
322     }
323 
324     i = FOLDS1_NEXT_INDEX(i);
325   }
326 
327   return 0;
328 }
329 
330 static int
apply_case_fold2(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)331 apply_case_fold2(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
332 {
333   int i, j, k, n, r;
334 
335   for (i = from; i < to; ) {
336     OnigCodePoint* fold = FOLDS2_FOLD(i);
337     n = FOLDS2_UNFOLDS_NUM(i);
338     for (j = 0; j < n; j++) {
339       OnigCodePoint unfold = FOLDS2_UNFOLDS(i)[j];
340 
341       r = (*f)(unfold, fold, 2, arg);
342       if (r != 0) return r;
343 
344       for (k = 0; k < j; k++) {
345         OnigCodePoint unfold2 = FOLDS2_UNFOLDS(i)[k];
346         r = (*f)(unfold, &unfold2, 1, arg);
347         if (r != 0) return r;
348         r = (*f)(unfold2, &unfold, 1, arg);
349         if (r != 0) return r;
350       }
351     }
352 
353     i = FOLDS2_NEXT_INDEX(i);
354   }
355 
356   return 0;
357 }
358 
359 static int
apply_case_fold3(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)360 apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
361 {
362   int i, j, k, n, r;
363 
364   for (i = from; i < to; ) {
365     OnigCodePoint* fold = FOLDS3_FOLD(i);
366     n = FOLDS3_UNFOLDS_NUM(i);
367     for (j = 0; j < n; j++) {
368       OnigCodePoint unfold = FOLDS3_UNFOLDS(i)[j];
369 
370       r = (*f)(unfold, fold, 3, arg);
371       if (r != 0) return r;
372 
373       for (k = 0; k < j; k++) {
374         OnigCodePoint unfold2 = FOLDS3_UNFOLDS(i)[k];
375         r = (*f)(unfold, &unfold2, 1, arg);
376         if (r != 0) return r;
377         r = (*f)(unfold2, &unfold, 1, arg);
378         if (r != 0) return r;
379       }
380     }
381 
382     i = FOLDS3_NEXT_INDEX(i);
383   }
384 
385   return 0;
386 }
387 
388 extern int
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,OnigApplyAllCaseFoldFunc f,void * arg)389 onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
390 				    OnigApplyAllCaseFoldFunc f, void* arg)
391 {
392   int r;
393 
394   r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);
395   if (r != 0) return r;
396 
397 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
398   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
399     code = 0x0131;
400     r = (*f)(0x0049, &code, 1, arg);
401     if (r != 0) return r;
402     code = 0x0049;
403     r = (*f)(0x0131, &code, 1, arg);
404     if (r != 0) return r;
405 
406     code = 0x0130;
407     r = (*f)(0x0069, &code, 1, arg);
408     if (r != 0) return r;
409     code = 0x0069;
410     r = (*f)(0x0130, &code, 1, arg);
411     if (r != 0) return r;
412   }
413   else {
414 #endif
415     r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
416     if (r != 0) return r;
417 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
418   }
419 #endif
420 
421   if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
422     return 0;
423 
424   r = apply_case_fold2(0, FOLDS2_NORMAL_END_INDEX, f, arg);
425   if (r != 0) return r;
426 
427 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
428   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
429 #endif
430     r = apply_case_fold2(FOLDS2_NORMAL_END_INDEX, FOLDS2_END_INDEX, f, arg);
431     if (r != 0) return r;
432 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
433   }
434 #endif
435 
436   r = apply_case_fold3(0, FOLDS3_NORMAL_END_INDEX, f, arg);
437   if (r != 0) return r;
438 
439   return 0;
440 }
441 
442 extern int
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,OnigCaseFoldType flag,const OnigUChar * p,const OnigUChar * end,OnigCaseFoldCodeItem items[])443 onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
444     OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
445     OnigCaseFoldCodeItem items[])
446 {
447   int n, m, i, j, k, len;
448   OnigCodePoint code, codes[3];
449   const struct ByUnfoldKey* buk;
450 
451   n = 0;
452 
453   code = ONIGENC_MBC_TO_CODE(enc, p, end);
454   len = enclen(enc, p);
455 
456 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
457   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
458     if (code == 0x0049) {
459       items[0].byte_len = len;
460       items[0].code_len = 1;
461       items[0].code[0]  = 0x0131;
462       return 1;
463     }
464     else if (code == 0x0130) {
465       items[0].byte_len = len;
466       items[0].code_len = 1;
467       items[0].code[0]  = 0x0069;
468       return 1;
469     }
470     else if (code == 0x0131) {
471       items[0].byte_len = len;
472       items[0].code_len = 1;
473       items[0].code[0]  = 0x0049;
474       return 1;
475     }
476     else if (code == 0x0069) {
477       items[0].byte_len = len;
478       items[0].code_len = 1;
479       items[0].code[0]  = 0x0130;
480       return 1;
481     }
482   }
483 #endif
484 
485   buk = unicode_unfold_key(code);
486   if (buk != 0) {
487     if (buk->fold_len == 1) {
488       int un;
489       items[0].byte_len = len;
490       items[0].code_len = 1;
491       items[0].code[0]  = *FOLDS1_FOLD(buk->index);
492       n++;
493 
494       un = FOLDS1_UNFOLDS_NUM(buk->index);
495       for (i = 0; i < un; i++) {
496         OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];
497         if (unfold != code) {
498           items[n].byte_len = len;
499           items[n].code_len = 1;
500           items[n].code[0]  = unfold;
501           n++;
502         }
503       }
504       code = items[0].code[0]; // for multi-code to unfold search.
505     }
506     else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
507       OnigCodePoint cs[3][4];
508       int fn, ncs[3];
509 
510       if (buk->fold_len == 2) {
511         m = FOLDS2_UNFOLDS_NUM(buk->index);
512         for (i = 0; i < m; i++) {
513           OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];
514           if (unfold == code) continue;
515 
516           items[n].byte_len = len;
517           items[n].code_len = 1;
518           items[n].code[0]  = unfold;
519           n++;
520         }
521 
522         for (fn = 0; fn < 2; fn++) {
523           int index;
524           cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
525           index = unicode_fold1_key(&cs[fn][0]);
526           if (index >= 0) {
527             int m = FOLDS1_UNFOLDS_NUM(index);
528             for (i = 0; i < m; i++) {
529               cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
530             }
531             ncs[fn] = m + 1;
532           }
533           else
534             ncs[fn] = 1;
535         }
536 
537         for (i = 0; i < ncs[0]; i++) {
538           for (j = 0; j < ncs[1]; j++) {
539             items[n].byte_len = len;
540             items[n].code_len = 2;
541             items[n].code[0]  = cs[0][i];
542             items[n].code[1]  = cs[1][j];
543             n++;
544           }
545         }
546       }
547       else { /* fold_len == 3 */
548         m = FOLDS3_UNFOLDS_NUM(buk->index);
549         for (i = 0; i < m; i++) {
550           OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];
551           if (unfold == code) continue;
552 
553           items[n].byte_len = len;
554           items[n].code_len = 1;
555           items[n].code[0]  = unfold;
556           n++;
557         }
558 
559         for (fn = 0; fn < 3; fn++) {
560           int index;
561           cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
562           index = unicode_fold1_key(&cs[fn][0]);
563           if (index >= 0) {
564             int m = FOLDS1_UNFOLDS_NUM(index);
565             for (i = 0; i < m; i++) {
566               cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
567             }
568             ncs[fn] = m + 1;
569           }
570           else
571             ncs[fn] = 1;
572         }
573 
574         for (i = 0; i < ncs[0]; i++) {
575           for (j = 0; j < ncs[1]; j++) {
576             for (k = 0; k < ncs[2]; k++) {
577               items[n].byte_len = len;
578               items[n].code_len = 3;
579               items[n].code[0]  = cs[0][i];
580               items[n].code[1]  = cs[1][j];
581               items[n].code[2]  = cs[2][k];
582               n++;
583             }
584           }
585         }
586       }
587 
588       /* multi char folded code is not head of another folded multi char */
589       return n;
590     }
591   }
592   else {
593     int index = unicode_fold1_key(&code);
594     if (index >= 0) {
595       int m = FOLDS1_UNFOLDS_NUM(index);
596       for (i = 0; i < m; i++) {
597         items[n].byte_len = len;
598         items[n].code_len = 1;
599         items[n].code[0]  = FOLDS1_UNFOLDS(index)[i];
600         n++;
601       }
602     }
603   }
604 
605   if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
606     return n;
607 
608   p += len;
609   if (p < end) {
610     int clen;
611     int index;
612 
613     codes[0] = code;
614     code = ONIGENC_MBC_TO_CODE(enc, p, end);
615 
616     buk = unicode_unfold_key(code);
617     if (buk != 0 && buk->fold_len == 1) {
618       codes[1] = *FOLDS1_FOLD(buk->index);
619     }
620     else
621       codes[1] = code;
622 
623     clen = enclen(enc, p);
624     len += clen;
625 
626     index = unicode_fold2_key(codes);
627     if (index >= 0) {
628       m = FOLDS2_UNFOLDS_NUM(index);
629       for (i = 0; i < m; i++) {
630         items[n].byte_len = len;
631         items[n].code_len = 1;
632         items[n].code[0]  = FOLDS2_UNFOLDS(index)[i];
633         n++;
634       }
635     }
636 
637     p += clen;
638     if (p < end) {
639       code = ONIGENC_MBC_TO_CODE(enc, p, end);
640       buk = unicode_unfold_key(code);
641       if (buk != 0 && buk->fold_len == 1) {
642         codes[2] = *FOLDS1_FOLD(buk->index);
643       }
644       else
645         codes[2] = code;
646 
647       clen = enclen(enc, p);
648       len += clen;
649 
650       index = unicode_fold3_key(codes);
651       if (index >= 0) {
652         m = FOLDS3_UNFOLDS_NUM(index);
653         for (i = 0; i < m; i++) {
654           items[n].byte_len = len;
655           items[n].code_len = 1;
656           items[n].code[0]  = FOLDS3_UNFOLDS(index)[i];
657           n++;
658         }
659       }
660     }
661   }
662 
663   return n;
664 }
665