1 /**********************************************************************
2 mktable.c
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <stdlib.h>
31 #include <stdio.h>
32
33 #define NOT_RUBY
34 #include "regenc.h"
35
36 #define UNICODE_ISO_8859_1 0
37 #define ISO_8859_1 1
38 #define ISO_8859_2 2
39 #define ISO_8859_3 3
40 #define ISO_8859_4 4
41 #define ISO_8859_5 5
42 #define ISO_8859_6 6
43 #define ISO_8859_7 7
44 #define ISO_8859_8 8
45 #define ISO_8859_9 9
46 #define ISO_8859_10 10
47 #define ISO_8859_11 11
48 #define ISO_8859_13 12
49 #define ISO_8859_14 13
50 #define ISO_8859_15 14
51 #define ISO_8859_16 15
52 #define KOI8 16
53 #define KOI8_R 17
54
55 typedef struct {
56 int num;
57 char* name;
58 } ENC_INFO;
59
60 static ENC_INFO Info[] = {
61 { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
62 { ISO_8859_1, "ISO_8859_1" },
63 { ISO_8859_2, "ISO_8859_2" },
64 { ISO_8859_3, "ISO_8859_3" },
65 { ISO_8859_4, "ISO_8859_4" },
66 { ISO_8859_5, "ISO_8859_5" },
67 { ISO_8859_6, "ISO_8859_6" },
68 { ISO_8859_7, "ISO_8859_7" },
69 { ISO_8859_8, "ISO_8859_8" },
70 { ISO_8859_9, "ISO_8859_9" },
71 { ISO_8859_10, "ISO_8859_10" },
72 { ISO_8859_11, "ISO_8859_11" },
73 { ISO_8859_13, "ISO_8859_13" },
74 { ISO_8859_14, "ISO_8859_14" },
75 { ISO_8859_15, "ISO_8859_15" },
76 { ISO_8859_16, "ISO_8859_16" },
77 { KOI8, "KOI8" },
78 { KOI8_R, "KOI8_R" }
79 };
80
81
IsAlpha(int enc,int c)82 static int IsAlpha(int enc, int c)
83 {
84 if (c >= 0x41 && c <= 0x5a) return 1;
85 if (c >= 0x61 && c <= 0x7a) return 1;
86
87 switch (enc) {
88 case UNICODE_ISO_8859_1:
89 case ISO_8859_1:
90 case ISO_8859_9:
91 if (c == 0xaa) return 1;
92 if (c == 0xb5) return 1;
93 if (c == 0xba) return 1;
94 if (c >= 0xc0 && c <= 0xd6) return 1;
95 if (c >= 0xd8 && c <= 0xf6) return 1;
96 if (c >= 0xf8 && c <= 0xff) return 1;
97 break;
98
99 case ISO_8859_2:
100 if (c == 0xa1 || c == 0xa3) return 1;
101 if (c == 0xa5 || c == 0xa6) return 1;
102 if (c >= 0xa9 && c <= 0xac) return 1;
103 if (c >= 0xae && c <= 0xaf) return 1;
104 if (c == 0xb1 || c == 0xb3) return 1;
105 if (c == 0xb5 || c == 0xb6) return 1;
106 if (c >= 0xb9 && c <= 0xbc) return 1;
107 if (c >= 0xbe && c <= 0xbf) return 1;
108 if (c >= 0xc0 && c <= 0xd6) return 1;
109 if (c >= 0xd8 && c <= 0xf6) return 1;
110 if (c >= 0xf8 && c <= 0xfe) return 1;
111 break;
112
113 case ISO_8859_3:
114 if (c == 0xa1) return 1;
115 if (c == 0xa6) return 1;
116 if (c >= 0xa9 && c <= 0xac) return 1;
117 if (c == 0xaf) return 1;
118 if (c == 0xb1) return 1;
119 if (c == 0xb5 || c == 0xb6) return 1;
120 if (c >= 0xb9 && c <= 0xbc) return 1;
121 if (c == 0xbf) return 1;
122 if (c >= 0xc0 && c <= 0xc2) return 1;
123 if (c >= 0xc4 && c <= 0xcf) return 1;
124 if (c >= 0xd1 && c <= 0xd6) return 1;
125 if (c >= 0xd8 && c <= 0xe2) return 1;
126 if (c >= 0xe4 && c <= 0xef) return 1;
127 if (c >= 0xf1 && c <= 0xf6) return 1;
128 if (c >= 0xf8 && c <= 0xfe) return 1;
129 break;
130
131 case ISO_8859_4:
132 if (c >= 0xa1 && c <= 0xa3) return 1;
133 if (c == 0xa5 || c == 0xa6) return 1;
134 if (c >= 0xa9 && c <= 0xac) return 1;
135 if (c == 0xae) return 1;
136 if (c == 0xb1 || c == 0xb3) return 1;
137 if (c == 0xb5 || c == 0xb6) return 1;
138 if (c >= 0xb9 && c <= 0xbf) return 1;
139 if (c >= 0xc0 && c <= 0xd6) return 1;
140 if (c >= 0xd8 && c <= 0xf6) return 1;
141 if (c >= 0xf8 && c <= 0xfe) return 1;
142 break;
143
144 case ISO_8859_5:
145 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
146 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
147 break;
148
149 case ISO_8859_6:
150 if (c >= 0xc1 && c <= 0xda) return 1;
151 if (c >= 0xe0 && c <= 0xf2) return 1;
152 break;
153
154 case ISO_8859_7:
155 if (c == 0xb6) return 1;
156 if (c >= 0xb8 && c <= 0xba) return 1;
157 if (c == 0xbc) return 1;
158 if (c >= 0xbe && c <= 0xbf) return 1;
159 if (c == 0xc0) return 1;
160 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
161 if (c >= 0xdc && c <= 0xfe) return 1;
162 break;
163
164 case ISO_8859_8:
165 if (c == 0xb5) return 1;
166 if (c >= 0xe0 && c <= 0xfa) return 1;
167 break;
168
169 case ISO_8859_10:
170 if (c >= 0xa1 && c <= 0xa6) return 1;
171 if (c >= 0xa8 && c <= 0xac) return 1;
172 if (c == 0xae || c == 0xaf) return 1;
173 if (c >= 0xb1 && c <= 0xb6) return 1;
174 if (c >= 0xb8 && c <= 0xbc) return 1;
175 if (c >= 0xbe && c <= 0xff) return 1;
176 break;
177
178 case ISO_8859_11:
179 if (c >= 0xa1 && c <= 0xda) return 1;
180 if (c >= 0xdf && c <= 0xfb) return 1;
181 break;
182
183 case ISO_8859_13:
184 if (c == 0xa8) return 1;
185 if (c == 0xaa) return 1;
186 if (c == 0xaf) return 1;
187 if (c == 0xb5) return 1;
188 if (c == 0xb8) return 1;
189 if (c == 0xba) return 1;
190 if (c >= 0xbf && c <= 0xd6) return 1;
191 if (c >= 0xd8 && c <= 0xf6) return 1;
192 if (c >= 0xf8 && c <= 0xfe) return 1;
193 break;
194
195 case ISO_8859_14:
196 if (c == 0xa1 || c == 0xa2) return 1;
197 if (c == 0xa4 || c == 0xa5) return 1;
198 if (c == 0xa6 || c == 0xa8) return 1;
199 if (c >= 0xaa && c <= 0xac) return 1;
200 if (c >= 0xaf && c <= 0xb5) return 1;
201 if (c >= 0xb7 && c <= 0xff) return 1;
202 break;
203
204 case ISO_8859_15:
205 if (c == 0xaa) return 1;
206 if (c == 0xb5) return 1;
207 if (c == 0xba) return 1;
208 if (c >= 0xc0 && c <= 0xd6) return 1;
209 if (c >= 0xd8 && c <= 0xf6) return 1;
210 if (c >= 0xf8 && c <= 0xff) return 1;
211 if (c == 0xa6) return 1;
212 if (c == 0xa8) return 1;
213 if (c == 0xb4) return 1;
214 if (c == 0xb8) return 1;
215 if (c == 0xbc) return 1;
216 if (c == 0xbd) return 1;
217 if (c == 0xbe) return 1;
218 break;
219
220 case ISO_8859_16:
221 if (c == 0xa1) return 1;
222 if (c == 0xa2) return 1;
223 if (c == 0xa3) return 1;
224 if (c == 0xa6) return 1;
225 if (c == 0xa8) return 1;
226 if (c == 0xaa) return 1;
227 if (c == 0xac) return 1;
228 if (c == 0xae) return 1;
229 if (c == 0xaf) return 1;
230 if (c == 0xb2) return 1;
231 if (c == 0xb3) return 1;
232 if (c == 0xb4) return 1;
233 if (c >= 0xb8 && c <= 0xba) return 1;
234 if (c == 0xbc) return 1;
235 if (c == 0xbd) return 1;
236 if (c == 0xbe) return 1;
237 if (c == 0xbf) return 1;
238 if (c >= 0xc0 && c <= 0xde) return 1;
239 if (c >= 0xdf && c <= 0xff) return 1;
240 break;
241
242 case KOI8_R:
243 if (c == 0xa3 || c == 0xb3) return 1;
244 /* fall */
245 case KOI8:
246 if (c >= 0xc0 && c <= 0xff) return 1;
247 break;
248
249 default:
250 exit(-1);
251 }
252
253 return 0;
254 }
255
IsBlank(int enc,int c)256 static int IsBlank(int enc, int c)
257 {
258 if (c == 0x09 || c == 0x20) return 1;
259
260 switch (enc) {
261 case UNICODE_ISO_8859_1:
262 case ISO_8859_1:
263 case ISO_8859_2:
264 case ISO_8859_3:
265 case ISO_8859_4:
266 case ISO_8859_5:
267 case ISO_8859_6:
268 case ISO_8859_7:
269 case ISO_8859_8:
270 case ISO_8859_9:
271 case ISO_8859_10:
272 case ISO_8859_11:
273 case ISO_8859_13:
274 case ISO_8859_14:
275 case ISO_8859_15:
276 case ISO_8859_16:
277 case KOI8:
278 if (c == 0xa0) return 1;
279 break;
280
281 case KOI8_R:
282 if (c == 0x9a) return 1;
283 break;
284
285 default:
286 exit(-1);
287 }
288
289 return 0;
290 }
291
IsCntrl(int enc,int c)292 static int IsCntrl(int enc, int c)
293 {
294 if (c >= 0x00 && c <= 0x1F) return 1;
295
296 switch (enc) {
297 case UNICODE_ISO_8859_1:
298 if (c == 0xad) return 1;
299 /* fall */
300 case ISO_8859_1:
301 case ISO_8859_2:
302 case ISO_8859_3:
303 case ISO_8859_4:
304 case ISO_8859_5:
305 case ISO_8859_6:
306 case ISO_8859_7:
307 case ISO_8859_8:
308 case ISO_8859_9:
309 case ISO_8859_10:
310 case ISO_8859_11:
311 case ISO_8859_13:
312 case ISO_8859_14:
313 case ISO_8859_15:
314 case ISO_8859_16:
315 case KOI8:
316 if (c >= 0x7f && c <= 0x9F) return 1;
317 break;
318
319
320 case KOI8_R:
321 if (c == 0x7f) return 1;
322 break;
323
324 default:
325 exit(-1);
326 }
327
328 return 0;
329 }
330
IsDigit(int enc,int c)331 static int IsDigit(int enc, int c)
332 {
333 if (c >= 0x30 && c <= 0x39) return 1;
334 return 0;
335 }
336
IsGraph(int enc,int c)337 static int IsGraph(int enc, int c)
338 {
339 if (c >= 0x21 && c <= 0x7e) return 1;
340
341 switch (enc) {
342 case UNICODE_ISO_8859_1:
343 case ISO_8859_1:
344 case ISO_8859_2:
345 case ISO_8859_4:
346 case ISO_8859_5:
347 case ISO_8859_9:
348 case ISO_8859_10:
349 case ISO_8859_13:
350 case ISO_8859_14:
351 case ISO_8859_15:
352 case ISO_8859_16:
353 if (c >= 0xa1 && c <= 0xff) return 1;
354 break;
355
356 case ISO_8859_3:
357 if (c >= 0xa1) {
358 if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
359 c == 0xe3 || c == 0xf0)
360 return 0;
361 else
362 return 1;
363 }
364 break;
365
366 case ISO_8859_6:
367 if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
368 return 1;
369 if (c >= 0xc1 && c <= 0xda) return 1;
370 if (c >= 0xe0 && c <= 0xf2) return 1;
371 break;
372
373 case ISO_8859_7:
374 if (c >= 0xa1 && c <= 0xfe &&
375 c != 0xa4 && c != 0xa5 && c != 0xaa &&
376 c != 0xae && c != 0xd2) return 1;
377 break;
378
379 case ISO_8859_8:
380 if (c >= 0xa2 && c <= 0xfa) {
381 if (c >= 0xbf && c <= 0xde) return 0;
382 return 1;
383 }
384 break;
385
386 case ISO_8859_11:
387 if (c >= 0xa1 && c <= 0xda) return 1;
388 if (c >= 0xdf && c <= 0xfb) return 1;
389 break;
390
391 case KOI8:
392 if (c >= 0xc0 && c <= 0xff) return 1;
393 break;
394
395 case KOI8_R:
396 if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
397 break;
398
399 default:
400 exit(-1);
401 }
402
403 return 0;
404 }
405
IsLower(int enc,int c)406 static int IsLower(int enc, int c)
407 {
408 if (c >= 0x61 && c <= 0x7a) return 1;
409
410 switch (enc) {
411 case UNICODE_ISO_8859_1:
412 case ISO_8859_1:
413 case ISO_8859_9:
414 if (c == 0xaa) return 1;
415 if (c == 0xb5) return 1;
416 if (c == 0xba) return 1;
417 if (c >= 0xdf && c <= 0xf6) return 1;
418 if (c >= 0xf8 && c <= 0xff) return 1;
419 break;
420
421 case ISO_8859_2:
422 if (c == 0xb1 || c == 0xb3) return 1;
423 if (c == 0xb5 || c == 0xb6) return 1;
424 if (c >= 0xb9 && c <= 0xbc) return 1;
425 if (c >= 0xbe && c <= 0xbf) return 1;
426 if (c >= 0xdf && c <= 0xf6) return 1;
427 if (c >= 0xf8 && c <= 0xfe) return 1;
428 break;
429
430 case ISO_8859_3:
431 if (c == 0xb1) return 1;
432 if (c == 0xb5 || c == 0xb6) return 1;
433 if (c >= 0xb9 && c <= 0xbc) return 1;
434 if (c == 0xbf) return 1;
435 if (c == 0xdf) return 1;
436 if (c >= 0xe0 && c <= 0xe2) return 1;
437 if (c >= 0xe4 && c <= 0xef) return 1;
438 if (c >= 0xf1 && c <= 0xf6) return 1;
439 if (c >= 0xf8 && c <= 0xfe) return 1;
440 break;
441
442 case ISO_8859_4:
443 if (c == 0xa2) return 1;
444 if (c == 0xb1 || c == 0xb3) return 1;
445 if (c == 0xb5 || c == 0xb6) return 1;
446 if (c >= 0xb9 && c <= 0xbc) return 1;
447 if (c >= 0xbe && c <= 0xbf) return 1;
448 if (c == 0xdf) return 1;
449 if (c >= 0xe0 && c <= 0xf6) return 1;
450 if (c >= 0xf8 && c <= 0xfe) return 1;
451 break;
452
453 case ISO_8859_5:
454 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
455 break;
456
457 case ISO_8859_6:
458 break;
459
460 case ISO_8859_7:
461 if (c == 0xc0) return 1;
462 if (c >= 0xdc && c <= 0xfe) return 1;
463 break;
464
465 case ISO_8859_8:
466 if (c == 0xb5) return 1;
467 break;
468
469 case ISO_8859_10:
470 if (c >= 0xb1 && c <= 0xb6) return 1;
471 if (c >= 0xb8 && c <= 0xbc) return 1;
472 if (c == 0xbe || c == 0xbf) return 1;
473 if (c >= 0xdf && c <= 0xff) return 1;
474 break;
475
476 case ISO_8859_11:
477 break;
478
479 case ISO_8859_13:
480 if (c == 0xb5) return 1;
481 if (c == 0xb8) return 1;
482 if (c == 0xba) return 1;
483 if (c == 0xbf) return 1;
484 if (c >= 0xdf && c <= 0xf6) return 1;
485 if (c >= 0xf8 && c <= 0xfe) return 1;
486 break;
487
488 case ISO_8859_14:
489 if (c == 0xa2) return 1;
490 if (c == 0xa5) return 1;
491 if (c == 0xab) return 1;
492 if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
493 if (c >= 0xb8 && c <= 0xba) return 1;
494 if (c == 0xbc) return 1;
495 if (c == 0xbe || c == 0xbf) return 1;
496 if (c >= 0xdf && c <= 0xff) return 1;
497 break;
498
499 case ISO_8859_15:
500 if (c == 0xaa) return 1;
501 if (c == 0xb5) return 1;
502 if (c == 0xba) return 1;
503 if (c >= 0xdf && c <= 0xf6) return 1;
504 if (c >= 0xf8 && c <= 0xff) return 1;
505 if (c == 0xa8) return 1;
506 if (c == 0xb8) return 1;
507 if (c == 0xbd) return 1;
508 break;
509
510 case ISO_8859_16:
511 if (c == 0xa2) return 1;
512 if (c == 0xa8) return 1;
513 if (c == 0xae) return 1;
514 if (c == 0xb3) return 1;
515 if (c >= 0xb8 && c <= 0xba) return 1;
516 if (c == 0xbd) return 1;
517 if (c == 0xbf) return 1;
518 if (c >= 0xdf && c <= 0xff) return 1;
519 break;
520
521 case KOI8_R:
522 if (c == 0xa3) return 1;
523 /* fall */
524 case KOI8:
525 if (c >= 0xc0 && c <= 0xdf) return 1;
526 break;
527
528 default:
529 exit(-1);
530 }
531
532 return 0;
533 }
534
IsPrint(int enc,int c)535 static int IsPrint(int enc, int c)
536 {
537 if (c >= 0x20 && c <= 0x7e) return 1;
538
539 switch (enc) {
540 case UNICODE_ISO_8859_1:
541 if (c >= 0x09 && c <= 0x0d) return 1;
542 if (c == 0x85) return 1;
543 /* fall */
544 case ISO_8859_1:
545 case ISO_8859_2:
546 case ISO_8859_4:
547 case ISO_8859_5:
548 case ISO_8859_9:
549 case ISO_8859_10:
550 case ISO_8859_13:
551 case ISO_8859_14:
552 case ISO_8859_15:
553 case ISO_8859_16:
554 if (c >= 0xa0 && c <= 0xff) return 1;
555 break;
556
557 case ISO_8859_3:
558 if (c >= 0xa0) {
559 if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
560 c == 0xe3 || c == 0xf0)
561 return 0;
562 else
563 return 1;
564 }
565 break;
566
567 case ISO_8859_6:
568 if (c == 0xa0) return 1;
569 if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
570 return 1;
571 if (c >= 0xc1 && c <= 0xda) return 1;
572 if (c >= 0xe0 && c <= 0xf2) return 1;
573 break;
574
575 case ISO_8859_7:
576 if (c >= 0xa0 && c <= 0xfe &&
577 c != 0xa4 && c != 0xa5 && c != 0xaa &&
578 c != 0xae && c != 0xd2) return 1;
579 break;
580
581 case ISO_8859_8:
582 if (c >= 0xa0 && c <= 0xfa) {
583 if (c >= 0xbf && c <= 0xde) return 0;
584 if (c == 0xa1) return 0;
585 return 1;
586 }
587 break;
588
589 case ISO_8859_11:
590 if (c >= 0xa0 && c <= 0xda) return 1;
591 if (c >= 0xdf && c <= 0xfb) return 1;
592 break;
593
594 case KOI8:
595 if (c == 0xa0) return 1;
596 if (c >= 0xc0 && c <= 0xff) return 1;
597 break;
598
599 case KOI8_R:
600 if (c >= 0x80 && c <= 0xff) return 1;
601 break;
602
603 default:
604 exit(-1);
605 }
606
607 return 0;
608 }
609
IsPunct(int enc,int c)610 static int IsPunct(int enc, int c)
611 {
612 if (enc == UNICODE_ISO_8859_1) {
613 if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
614 c == 0x7c || c == 0x7e) return 1;
615 if (c >= 0x3c && c <= 0x3e) return 1;
616 }
617
618 if (c >= 0x21 && c <= 0x2f) return 1;
619 if (c >= 0x3a && c <= 0x40) return 1;
620 if (c >= 0x5b && c <= 0x60) return 1;
621 if (c >= 0x7b && c <= 0x7e) return 1;
622
623 switch (enc) {
624 case ISO_8859_1:
625 case ISO_8859_9:
626 case ISO_8859_15:
627 if (c == 0xad) return 1;
628 /* fall */
629 case UNICODE_ISO_8859_1:
630 if (c == 0xa1) return 1;
631 if (c == 0xab) return 1;
632 if (c == 0xb7) return 1;
633 if (c == 0xbb) return 1;
634 if (c == 0xbf) return 1;
635 break;
636
637 case ISO_8859_2:
638 case ISO_8859_4:
639 case ISO_8859_5:
640 case ISO_8859_14:
641 if (c == 0xad) return 1;
642 break;
643
644 case ISO_8859_3:
645 case ISO_8859_10:
646 if (c == 0xad) return 1;
647 if (c == 0xb7) return 1;
648 if (c == 0xbd) return 1;
649 break;
650
651 case ISO_8859_6:
652 if (c == 0xac) return 1;
653 if (c == 0xad) return 1;
654 if (c == 0xbb) return 1;
655 if (c == 0xbf) return 1;
656 break;
657
658 case ISO_8859_7:
659 if (c == 0xa1 || c == 0xa2) return 1;
660 if (c == 0xab) return 1;
661 if (c == 0xaf) return 1;
662 if (c == 0xad) return 1;
663 if (c == 0xb7 || c == 0xbb) return 1;
664 break;
665
666 case ISO_8859_8:
667 if (c == 0xab) return 1;
668 if (c == 0xad) return 1;
669 if (c == 0xb7) return 1;
670 if (c == 0xbb) return 1;
671 if (c == 0xdf) return 1;
672 break;
673
674 case ISO_8859_13:
675 if (c == 0xa1 || c == 0xa5) return 1;
676 if (c == 0xab || c == 0xad) return 1;
677 if (c == 0xb4 || c == 0xb7) return 1;
678 if (c == 0xbb) return 1;
679 if (c == 0xff) return 1;
680 break;
681
682 case ISO_8859_16:
683 if (c == 0xa5) return 1;
684 if (c == 0xab) return 1;
685 if (c == 0xad) return 1;
686 if (c == 0xb5) return 1;
687 if (c == 0xb7) return 1;
688 if (c == 0xbb) return 1;
689 break;
690
691 case KOI8_R:
692 if (c == 0x9e) return 1;
693 break;
694
695 case ISO_8859_11:
696 case KOI8:
697 break;
698
699 default:
700 exit(-1);
701 }
702
703 return 0;
704 }
705
IsSpace(int enc,int c)706 static int IsSpace(int enc, int c)
707 {
708 if (c >= 0x09 && c <= 0x0d) return 1;
709 if (c == 0x20) return 1;
710
711 switch (enc) {
712 case UNICODE_ISO_8859_1:
713 if (c == 0x85) return 1;
714 /* fall */
715 case ISO_8859_1:
716 case ISO_8859_2:
717 case ISO_8859_3:
718 case ISO_8859_4:
719 case ISO_8859_5:
720 case ISO_8859_6:
721 case ISO_8859_7:
722 case ISO_8859_8:
723 case ISO_8859_9:
724 case ISO_8859_10:
725 case ISO_8859_11:
726 case ISO_8859_13:
727 case ISO_8859_14:
728 case ISO_8859_15:
729 case ISO_8859_16:
730 case KOI8:
731 if (c == 0xa0) return 1;
732 break;
733
734 case KOI8_R:
735 if (c == 0x9a) return 1;
736 break;
737
738 default:
739 exit(-1);
740 }
741
742 return 0;
743 }
744
IsUpper(int enc,int c)745 static int IsUpper(int enc, int c)
746 {
747 if (c >= 0x41 && c <= 0x5a) return 1;
748
749 switch (enc) {
750 case UNICODE_ISO_8859_1:
751 case ISO_8859_1:
752 case ISO_8859_9:
753 if (c >= 0xc0 && c <= 0xd6) return 1;
754 if (c >= 0xd8 && c <= 0xde) return 1;
755 break;
756
757 case ISO_8859_2:
758 if (c == 0xa1 || c == 0xa3) return 1;
759 if (c == 0xa5 || c == 0xa6) return 1;
760 if (c >= 0xa9 && c <= 0xac) return 1;
761 if (c >= 0xae && c <= 0xaf) return 1;
762 if (c >= 0xc0 && c <= 0xd6) return 1;
763 if (c >= 0xd8 && c <= 0xde) return 1;
764 break;
765
766 case ISO_8859_3:
767 if (c == 0xa1) return 1;
768 if (c == 0xa6) return 1;
769 if (c >= 0xa9 && c <= 0xac) return 1;
770 if (c == 0xaf) return 1;
771 if (c >= 0xc0 && c <= 0xc2) return 1;
772 if (c >= 0xc4 && c <= 0xcf) return 1;
773 if (c >= 0xd1 && c <= 0xd6) return 1;
774 if (c >= 0xd8 && c <= 0xde) return 1;
775 break;
776
777 case ISO_8859_4:
778 if (c == 0xa1 || c == 0xa3) return 1;
779 if (c == 0xa5 || c == 0xa6) return 1;
780 if (c >= 0xa9 && c <= 0xac) return 1;
781 if (c == 0xae) return 1;
782 if (c == 0xbd) return 1;
783 if (c >= 0xc0 && c <= 0xd6) return 1;
784 if (c >= 0xd8 && c <= 0xde) return 1;
785 break;
786
787 case ISO_8859_5:
788 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
789 break;
790
791 case ISO_8859_6:
792 break;
793
794 case ISO_8859_7:
795 if (c == 0xb6) return 1;
796 if (c >= 0xb8 && c <= 0xba) return 1;
797 if (c == 0xbc) return 1;
798 if (c >= 0xbe && c <= 0xbf) return 1;
799 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
800 break;
801
802 case ISO_8859_8:
803 case ISO_8859_11:
804 break;
805
806 case ISO_8859_10:
807 if (c >= 0xa1 && c <= 0xa6) return 1;
808 if (c >= 0xa8 && c <= 0xac) return 1;
809 if (c == 0xae || c == 0xaf) return 1;
810 if (c >= 0xc0 && c <= 0xde) return 1;
811 break;
812
813 case ISO_8859_13:
814 if (c == 0xa8) return 1;
815 if (c == 0xaa) return 1;
816 if (c == 0xaf) return 1;
817 if (c >= 0xc0 && c <= 0xd6) return 1;
818 if (c >= 0xd8 && c <= 0xde) return 1;
819 break;
820
821 case ISO_8859_14:
822 if (c == 0xa1) return 1;
823 if (c == 0xa4 || c == 0xa6) return 1;
824 if (c == 0xa8) return 1;
825 if (c == 0xaa || c == 0xac) return 1;
826 if (c == 0xaf || c == 0xb0) return 1;
827 if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
828 if (c == 0xbb || c == 0xbd) return 1;
829 if (c >= 0xc0 && c <= 0xde) return 1;
830 break;
831
832 case ISO_8859_15:
833 if (c >= 0xc0 && c <= 0xd6) return 1;
834 if (c >= 0xd8 && c <= 0xde) return 1;
835 if (c == 0xa6) return 1;
836 if (c == 0xb4) return 1;
837 if (c == 0xbc) return 1;
838 if (c == 0xbe) return 1;
839 break;
840
841 case ISO_8859_16:
842 if (c == 0xa1) return 1;
843 if (c == 0xa3) return 1;
844 if (c == 0xa6) return 1;
845 if (c == 0xaa) return 1;
846 if (c == 0xac) return 1;
847 if (c == 0xaf) return 1;
848 if (c == 0xb2) return 1;
849 if (c == 0xb4) return 1;
850 if (c == 0xbc) return 1;
851 if (c == 0xbe) return 1;
852 if (c >= 0xc0 && c <= 0xde) return 1;
853 break;
854
855 case KOI8_R:
856 if (c == 0xb3) return 1;
857 /* fall */
858 case KOI8:
859 if (c >= 0xe0 && c <= 0xff) return 1;
860 break;
861
862 default:
863 exit(-1);
864 }
865
866 return 0;
867 }
868
IsXDigit(int enc,int c)869 static int IsXDigit(int enc, int c)
870 {
871 if (c >= 0x30 && c <= 0x39) return 1;
872 if (c >= 0x41 && c <= 0x46) return 1;
873 if (c >= 0x61 && c <= 0x66) return 1;
874 return 0;
875 }
876
IsWord(int enc,int c)877 static int IsWord(int enc, int c)
878 {
879 if (c >= 0x30 && c <= 0x39) return 1;
880 if (c >= 0x41 && c <= 0x5a) return 1;
881 if (c == 0x5f) return 1;
882 if (c >= 0x61 && c <= 0x7a) return 1;
883
884 switch (enc) {
885 case UNICODE_ISO_8859_1:
886 case ISO_8859_1:
887 case ISO_8859_9:
888 if (c == 0xaa) return 1;
889 if (c >= 0xb2 && c <= 0xb3) return 1;
890 if (c == 0xb5) return 1;
891 if (c >= 0xb9 && c <= 0xba) return 1;
892 if (c >= 0xbc && c <= 0xbe) return 1;
893 if (c >= 0xc0 && c <= 0xd6) return 1;
894 if (c >= 0xd8 && c <= 0xf6) return 1;
895 if (c >= 0xf8 && c <= 0xff) return 1;
896 break;
897
898 case ISO_8859_2:
899 if (c == 0xa1 || c == 0xa3) return 1;
900 if (c == 0xa5 || c == 0xa6) return 1;
901 if (c >= 0xa9 && c <= 0xac) return 1;
902 if (c >= 0xae && c <= 0xaf) return 1;
903 if (c == 0xb1 || c == 0xb3) return 1;
904 if (c == 0xb5 || c == 0xb6) return 1;
905 if (c >= 0xb9 && c <= 0xbc) return 1;
906 if (c >= 0xbe && c <= 0xbf) return 1;
907 if (c >= 0xc0 && c <= 0xd6) return 1;
908 if (c >= 0xd8 && c <= 0xf6) return 1;
909 if (c >= 0xf8 && c <= 0xfe) return 1;
910 break;
911
912 case ISO_8859_3:
913 if (c == 0xa1) return 1;
914 if (c == 0xa6) return 1;
915 if (c >= 0xa9 && c <= 0xac) return 1;
916 if (c == 0xaf) return 1;
917 if (c >= 0xb1 && c <= 0xb3) return 1;
918 if (c == 0xb5 || c == 0xb6) return 1;
919 if (c >= 0xb9 && c <= 0xbd) return 1;
920 if (c == 0xbf) return 1;
921 if (c >= 0xc0 && c <= 0xc2) return 1;
922 if (c >= 0xc4 && c <= 0xcf) return 1;
923 if (c >= 0xd1 && c <= 0xd6) return 1;
924 if (c >= 0xd8 && c <= 0xe2) return 1;
925 if (c >= 0xe4 && c <= 0xef) return 1;
926 if (c >= 0xf1 && c <= 0xf6) return 1;
927 if (c >= 0xf8 && c <= 0xfe) return 1;
928 break;
929
930 case ISO_8859_4:
931 if (c >= 0xa1 && c <= 0xa3) return 1;
932 if (c == 0xa5 || c == 0xa6) return 1;
933 if (c >= 0xa9 && c <= 0xac) return 1;
934 if (c == 0xae) return 1;
935 if (c == 0xb1 || c == 0xb3) return 1;
936 if (c == 0xb5 || c == 0xb6) return 1;
937 if (c >= 0xb9 && c <= 0xbf) return 1;
938 if (c >= 0xc0 && c <= 0xd6) return 1;
939 if (c >= 0xd8 && c <= 0xf6) return 1;
940 if (c >= 0xf8 && c <= 0xfe) return 1;
941 break;
942
943 case ISO_8859_5:
944 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
945 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
946 break;
947
948 case ISO_8859_6:
949 if (c >= 0xc1 && c <= 0xda) return 1;
950 if (c >= 0xe0 && c <= 0xea) return 1;
951 if (c >= 0xeb && c <= 0xf2) return 1;
952 break;
953
954 case ISO_8859_7:
955 if (c == 0xb2 || c == 0xb3) return 1;
956 if (c == 0xb6) return 1;
957 if (c >= 0xb8 && c <= 0xba) return 1;
958 if (c >= 0xbc && c <= 0xbf) return 1;
959 if (c == 0xc0) return 1;
960 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
961 if (c >= 0xdc && c <= 0xfe) return 1;
962 break;
963
964 case ISO_8859_8:
965 if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
966 if (c >= 0xbc && c <= 0xbe) return 1;
967 if (c >= 0xe0 && c <= 0xfa) return 1;
968 break;
969
970 case ISO_8859_10:
971 if (c >= 0xa1 && c <= 0xff) {
972 if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
973 return 1;
974 }
975 break;
976
977 case ISO_8859_11:
978 if (c >= 0xa1 && c <= 0xda) return 1;
979 if (c >= 0xdf && c <= 0xfb) return 1;
980 break;
981
982 case ISO_8859_13:
983 if (c == 0xa8) return 1;
984 if (c == 0xaa) return 1;
985 if (c == 0xaf) return 1;
986 if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
987 if (c >= 0xbc && c <= 0xbe) return 1;
988 if (c == 0xb8) return 1;
989 if (c == 0xba) return 1;
990 if (c >= 0xbf && c <= 0xd6) return 1;
991 if (c >= 0xd8 && c <= 0xf6) return 1;
992 if (c >= 0xf8 && c <= 0xfe) return 1;
993 break;
994
995 case ISO_8859_14:
996 if (c >= 0xa1 && c <= 0xff) {
997 if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
998 c == 0xb6) return 0;
999 return 1;
1000 }
1001 break;
1002
1003 case ISO_8859_15:
1004 if (c == 0xaa) return 1;
1005 if (c >= 0xb2 && c <= 0xb3) return 1;
1006 if (c == 0xb5) return 1;
1007 if (c >= 0xb9 && c <= 0xba) return 1;
1008 if (c >= 0xbc && c <= 0xbe) return 1;
1009 if (c >= 0xc0 && c <= 0xd6) return 1;
1010 if (c >= 0xd8 && c <= 0xf6) return 1;
1011 if (c >= 0xf8 && c <= 0xff) return 1;
1012 if (c == 0xa6) return 1;
1013 if (c == 0xa8) return 1;
1014 if (c == 0xb4) return 1;
1015 if (c == 0xb8) return 1;
1016 break;
1017
1018 case ISO_8859_16:
1019 if (c == 0xa1) return 1;
1020 if (c == 0xa2) return 1;
1021 if (c == 0xa3) return 1;
1022 if (c == 0xa6) return 1;
1023 if (c == 0xa8) return 1;
1024 if (c == 0xaa) return 1;
1025 if (c == 0xac) return 1;
1026 if (c == 0xae) return 1;
1027 if (c == 0xaf) return 1;
1028 if (c == 0xb2) return 1;
1029 if (c == 0xb3) return 1;
1030 if (c == 0xb4) return 1;
1031 if (c >= 0xb8 && c <= 0xba) return 1;
1032 if (c == 0xbc) return 1;
1033 if (c == 0xbd) return 1;
1034 if (c == 0xbe) return 1;
1035 if (c == 0xbf) return 1;
1036 if (c >= 0xc0 && c <= 0xde) return 1;
1037 if (c >= 0xdf && c <= 0xff) return 1;
1038 break;
1039
1040 case KOI8_R:
1041 if (c == 0x9d) return 1;
1042 if (c == 0xa3 || c == 0xb3) return 1;
1043 /* fall */
1044 case KOI8:
1045 if (c >= 0xc0 && c <= 0xff) return 1;
1046 break;
1047
1048 default:
1049 exit(-1);
1050 }
1051
1052 return 0;
1053 }
1054
IsAscii(int enc,int c)1055 static int IsAscii(int enc, int c)
1056 {
1057 if (c >= 0x00 && c <= 0x7f) return 1;
1058 return 0;
1059 }
1060
IsNewline(int enc,int c)1061 static int IsNewline(int enc, int c)
1062 {
1063 if (c == 0x0a) return 1;
1064 return 0;
1065 }
1066
exec(FILE * fp,ENC_INFO * einfo)1067 static int exec(FILE* fp, ENC_INFO* einfo)
1068 {
1069 #define NCOL 8
1070
1071 int c, val, enc;
1072
1073 enc = einfo->num;
1074
1075 fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n",
1076 einfo->name);
1077
1078 for (c = 0; c < 256; c++) {
1079 val = 0;
1080 if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE;
1081 if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
1082 if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
1083 if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
1084 if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
1085 if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
1086 if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
1087 if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
1088 if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
1089 if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
1090 if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
1091 if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
1092 if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
1093 if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
1094
1095 if (c % NCOL == 0) fputs(" ", fp);
1096 fprintf(fp, "0x%04x", val);
1097 if (c != 255) fputs(",", fp);
1098 if (c != 0 && c % NCOL == (NCOL-1))
1099 fputs("\n", fp);
1100 else
1101 fputs(" ", fp);
1102 }
1103 fprintf(fp, "};\n");
1104 return 0;
1105 }
1106
main(int argc,char * argv[])1107 extern int main(int argc, char* argv[])
1108 {
1109 int i;
1110 FILE* fp = stdout;
1111
1112 for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) {
1113 exec(fp, &Info[i]);
1114 }
1115 }
1116