1 /**********************************************************************
2 mktable.c
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <locale.h>
33
34 #ifndef __USE_ISOC99
35 #define __USE_ISOC99
36 #endif
37
38 #include <ctype.h>
39
40 #include "regenc.h"
41
42 #define ASCII 0
43 #define UNICODE_ISO_8859_1 1
44 #define ISO_8859_1 2
45 #define ISO_8859_2 3
46 #define ISO_8859_3 4
47 #define ISO_8859_4 5
48 #define ISO_8859_5 6
49 #define ISO_8859_6 7
50 #define ISO_8859_7 8
51 #define ISO_8859_8 9
52 #define ISO_8859_9 10
53 #define ISO_8859_10 11
54 #define ISO_8859_11 12
55 #define ISO_8859_13 13
56 #define ISO_8859_14 14
57 #define ISO_8859_15 15
58 #define ISO_8859_16 16
59 #define KOI8 17
60 #define KOI8_R 18
61
62 typedef struct {
63 int num;
64 char* name;
65 } ENC_INFO;
66
67 static ENC_INFO Info[] = {
68 { ASCII, "ASCII" },
69 { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
70 { ISO_8859_1, "ISO_8859_1" },
71 { ISO_8859_2, "ISO_8859_2" },
72 { ISO_8859_3, "ISO_8859_3" },
73 { ISO_8859_4, "ISO_8859_4" },
74 { ISO_8859_5, "ISO_8859_5" },
75 { ISO_8859_6, "ISO_8859_6" },
76 { ISO_8859_7, "ISO_8859_7" },
77 { ISO_8859_8, "ISO_8859_8" },
78 { ISO_8859_9, "ISO_8859_9" },
79 { ISO_8859_10, "ISO_8859_10" },
80 { ISO_8859_11, "ISO_8859_11" },
81 { ISO_8859_13, "ISO_8859_13" },
82 { ISO_8859_14, "ISO_8859_14" },
83 { ISO_8859_15, "ISO_8859_15" },
84 { ISO_8859_16, "ISO_8859_16" },
85 { KOI8, "KOI8" },
86 { KOI8_R, "KOI8_R" }
87 };
88
89
IsAlpha(int enc,int c)90 static int IsAlpha(int enc, int c)
91 {
92 if (enc == ASCII)
93 return isalpha(c);
94
95 if (c >= 0x41 && c <= 0x5a) return 1;
96 if (c >= 0x61 && c <= 0x7a) return 1;
97
98 switch (enc) {
99 case UNICODE_ISO_8859_1:
100 case ISO_8859_1:
101 case ISO_8859_9:
102 if (c == 0xaa) return 1;
103 if (c == 0xb5) return 1;
104 if (c == 0xba) return 1;
105 if (c >= 0xc0 && c <= 0xd6) return 1;
106 if (c >= 0xd8 && c <= 0xf6) return 1;
107 if (c >= 0xf8 && c <= 0xff) return 1;
108 break;
109
110 case ISO_8859_2:
111 if (c == 0xa1 || c == 0xa3) return 1;
112 if (c == 0xa5 || c == 0xa6) return 1;
113 if (c >= 0xa9 && c <= 0xac) return 1;
114 if (c >= 0xae && c <= 0xaf) return 1;
115 if (c == 0xb1 || c == 0xb3) return 1;
116 if (c == 0xb5 || c == 0xb6) return 1;
117 if (c >= 0xb9 && c <= 0xbc) return 1;
118 if (c >= 0xbe && c <= 0xbf) return 1;
119 if (c >= 0xc0 && c <= 0xd6) return 1;
120 if (c >= 0xd8 && c <= 0xf6) return 1;
121 if (c >= 0xf8 && c <= 0xfe) return 1;
122 break;
123
124 case ISO_8859_3:
125 if (c == 0xa1) return 1;
126 if (c == 0xa6) return 1;
127 if (c >= 0xa9 && c <= 0xac) return 1;
128 if (c == 0xaf) return 1;
129 if (c == 0xb1) return 1;
130 if (c == 0xb5 || c == 0xb6) return 1;
131 if (c >= 0xb9 && c <= 0xbc) return 1;
132 if (c == 0xbf) return 1;
133 if (c >= 0xc0 && c <= 0xc2) return 1;
134 if (c >= 0xc4 && c <= 0xcf) return 1;
135 if (c >= 0xd1 && c <= 0xd6) return 1;
136 if (c >= 0xd8 && c <= 0xe2) return 1;
137 if (c >= 0xe4 && c <= 0xef) return 1;
138 if (c >= 0xf1 && c <= 0xf6) return 1;
139 if (c >= 0xf8 && c <= 0xfe) return 1;
140 break;
141
142 case ISO_8859_4:
143 if (c >= 0xa1 && c <= 0xa3) return 1;
144 if (c == 0xa5 || c == 0xa6) return 1;
145 if (c >= 0xa9 && c <= 0xac) return 1;
146 if (c == 0xae) return 1;
147 if (c == 0xb1 || c == 0xb3) return 1;
148 if (c == 0xb5 || c == 0xb6) return 1;
149 if (c >= 0xb9 && c <= 0xbf) return 1;
150 if (c >= 0xc0 && c <= 0xd6) return 1;
151 if (c >= 0xd8 && c <= 0xf6) return 1;
152 if (c >= 0xf8 && c <= 0xfe) return 1;
153 break;
154
155 case ISO_8859_5:
156 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
157 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
158 break;
159
160 case ISO_8859_6:
161 if (c >= 0xc1 && c <= 0xda) return 1;
162 if (c >= 0xe0 && c <= 0xf2) return 1;
163 break;
164
165 case ISO_8859_7:
166 if (c == 0xb6) return 1;
167 if (c >= 0xb8 && c <= 0xba) return 1;
168 if (c == 0xbc) return 1;
169 if (c >= 0xbe && c <= 0xbf) return 1;
170 if (c == 0xc0) return 1;
171 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
172 if (c >= 0xdc && c <= 0xfe) return 1;
173 break;
174
175 case ISO_8859_8:
176 if (c == 0xb5) return 1;
177 if (c >= 0xe0 && c <= 0xfa) return 1;
178 break;
179
180 case ISO_8859_10:
181 if (c >= 0xa1 && c <= 0xa6) return 1;
182 if (c >= 0xa8 && c <= 0xac) return 1;
183 if (c == 0xae || c == 0xaf) return 1;
184 if (c >= 0xb1 && c <= 0xb6) return 1;
185 if (c >= 0xb8 && c <= 0xbc) return 1;
186 if (c >= 0xbe && c <= 0xff) return 1;
187 break;
188
189 case ISO_8859_11:
190 if (c >= 0xa1 && c <= 0xda) return 1;
191 if (c >= 0xdf && c <= 0xfb) return 1;
192 break;
193
194 case ISO_8859_13:
195 if (c == 0xa8) return 1;
196 if (c == 0xaa) return 1;
197 if (c == 0xaf) return 1;
198 if (c == 0xb5) return 1;
199 if (c == 0xb8) return 1;
200 if (c == 0xba) return 1;
201 if (c >= 0xbf && c <= 0xd6) return 1;
202 if (c >= 0xd8 && c <= 0xf6) return 1;
203 if (c >= 0xf8 && c <= 0xfe) return 1;
204 break;
205
206 case ISO_8859_14:
207 if (c == 0xa1 || c == 0xa2) return 1;
208 if (c == 0xa4 || c == 0xa5) return 1;
209 if (c == 0xa6 || c == 0xa8) return 1;
210 if (c >= 0xaa && c <= 0xac) return 1;
211 if (c >= 0xaf && c <= 0xb5) return 1;
212 if (c >= 0xb7 && c <= 0xff) return 1;
213 break;
214
215 case ISO_8859_15:
216 if (c == 0xaa) return 1;
217 if (c == 0xb5) return 1;
218 if (c == 0xba) return 1;
219 if (c >= 0xc0 && c <= 0xd6) return 1;
220 if (c >= 0xd8 && c <= 0xf6) return 1;
221 if (c >= 0xf8 && c <= 0xff) return 1;
222 if (c == 0xa6) return 1;
223 if (c == 0xa8) return 1;
224 if (c == 0xb4) return 1;
225 if (c == 0xb8) return 1;
226 if (c == 0xbc) return 1;
227 if (c == 0xbd) return 1;
228 if (c == 0xbe) return 1;
229 break;
230
231 case ISO_8859_16:
232 if (c == 0xa1) return 1;
233 if (c == 0xa2) return 1;
234 if (c == 0xa3) return 1;
235 if (c == 0xa6) return 1;
236 if (c == 0xa8) return 1;
237 if (c == 0xaa) return 1;
238 if (c == 0xac) return 1;
239 if (c == 0xae) return 1;
240 if (c == 0xaf) return 1;
241 if (c == 0xb2) return 1;
242 if (c == 0xb3) return 1;
243 if (c == 0xb4) return 1;
244 if (c >= 0xb8 && c <= 0xba) return 1;
245 if (c == 0xbc) return 1;
246 if (c == 0xbd) return 1;
247 if (c == 0xbe) return 1;
248 if (c == 0xbf) return 1;
249 if (c >= 0xc0 && c <= 0xde) return 1;
250 if (c >= 0xdf && c <= 0xff) return 1;
251 break;
252
253 case KOI8_R:
254 if (c == 0xa3 || c == 0xb3) return 1;
255 /* fall */
256 case KOI8:
257 if (c >= 0xc0 && c <= 0xff) return 1;
258 break;
259
260 default:
261 exit(-1);
262 }
263
264 return 0;
265 }
266
IsBlank(int enc,int c)267 static int IsBlank(int enc, int c)
268 {
269 if (enc == ASCII)
270 return isblank(c);
271
272 if (c == 0x09 || c == 0x20) return 1;
273
274 switch (enc) {
275 case UNICODE_ISO_8859_1:
276 case ISO_8859_1:
277 case ISO_8859_2:
278 case ISO_8859_3:
279 case ISO_8859_4:
280 case ISO_8859_5:
281 case ISO_8859_6:
282 case ISO_8859_7:
283 case ISO_8859_8:
284 case ISO_8859_9:
285 case ISO_8859_10:
286 case ISO_8859_11:
287 case ISO_8859_13:
288 case ISO_8859_14:
289 case ISO_8859_15:
290 case ISO_8859_16:
291 case KOI8:
292 if (c == 0xa0) return 1;
293 break;
294
295 case KOI8_R:
296 if (c == 0x9a) return 1;
297 break;
298
299 default:
300 exit(-1);
301 }
302
303 return 0;
304 }
305
IsCntrl(int enc,int c)306 static int IsCntrl(int enc, int c)
307 {
308 if (enc == ASCII)
309 return iscntrl(c);
310
311 if (c >= 0x00 && c <= 0x1F) return 1;
312
313 switch (enc) {
314 case UNICODE_ISO_8859_1:
315 if (c == 0xad) return 1;
316 /* fall */
317 case ISO_8859_1:
318 case ISO_8859_2:
319 case ISO_8859_3:
320 case ISO_8859_4:
321 case ISO_8859_5:
322 case ISO_8859_6:
323 case ISO_8859_7:
324 case ISO_8859_8:
325 case ISO_8859_9:
326 case ISO_8859_10:
327 case ISO_8859_11:
328 case ISO_8859_13:
329 case ISO_8859_14:
330 case ISO_8859_15:
331 case ISO_8859_16:
332 case KOI8:
333 if (c >= 0x7f && c <= 0x9F) return 1;
334 break;
335
336
337 case KOI8_R:
338 if (c == 0x7f) return 1;
339 break;
340
341 default:
342 exit(-1);
343 }
344
345 return 0;
346 }
347
IsDigit(int enc ARG_UNUSED,int c)348 static int IsDigit(int enc ARG_UNUSED, int c)
349 {
350 if (c >= 0x30 && c <= 0x39) return 1;
351 return 0;
352 }
353
IsGraph(int enc,int c)354 static int IsGraph(int enc, int c)
355 {
356 if (enc == ASCII)
357 return isgraph(c);
358
359 if (c >= 0x21 && c <= 0x7e) return 1;
360
361 switch (enc) {
362 case UNICODE_ISO_8859_1:
363 case ISO_8859_1:
364 case ISO_8859_2:
365 case ISO_8859_4:
366 case ISO_8859_5:
367 case ISO_8859_9:
368 case ISO_8859_10:
369 case ISO_8859_13:
370 case ISO_8859_14:
371 case ISO_8859_15:
372 case ISO_8859_16:
373 if (c >= 0xa1 && c <= 0xff) return 1;
374 break;
375
376 case ISO_8859_3:
377 if (c >= 0xa1) {
378 if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
379 c == 0xe3 || c == 0xf0)
380 return 0;
381 else
382 return 1;
383 }
384 break;
385
386 case ISO_8859_6:
387 if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
388 return 1;
389 if (c >= 0xc1 && c <= 0xda) return 1;
390 if (c >= 0xe0 && c <= 0xf2) return 1;
391 break;
392
393 case ISO_8859_7:
394 if (c >= 0xa1 && c <= 0xfe &&
395 c != 0xa4 && c != 0xa5 && c != 0xaa &&
396 c != 0xae && c != 0xd2) return 1;
397 break;
398
399 case ISO_8859_8:
400 if (c >= 0xa2 && c <= 0xfa) {
401 if (c >= 0xbf && c <= 0xde) return 0;
402 return 1;
403 }
404 break;
405
406 case ISO_8859_11:
407 if (c >= 0xa1 && c <= 0xda) return 1;
408 if (c >= 0xdf && c <= 0xfb) return 1;
409 break;
410
411 case KOI8:
412 if (c >= 0xc0 && c <= 0xff) return 1;
413 break;
414
415 case KOI8_R:
416 if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
417 break;
418
419 default:
420 exit(-1);
421 }
422
423 return 0;
424 }
425
IsLower(int enc,int c)426 static int IsLower(int enc, int c)
427 {
428 if (enc == ASCII)
429 return islower(c);
430
431 if (c >= 0x61 && c <= 0x7a) return 1;
432
433 switch (enc) {
434 case UNICODE_ISO_8859_1:
435 case ISO_8859_1:
436 case ISO_8859_9:
437 if (c == 0xaa) return 1;
438 if (c == 0xb5) return 1;
439 if (c == 0xba) return 1;
440 if (c >= 0xdf && c <= 0xf6) return 1;
441 if (c >= 0xf8 && c <= 0xff) return 1;
442 break;
443
444 case ISO_8859_2:
445 if (c == 0xb1 || c == 0xb3) return 1;
446 if (c == 0xb5 || c == 0xb6) return 1;
447 if (c >= 0xb9 && c <= 0xbc) return 1;
448 if (c >= 0xbe && c <= 0xbf) return 1;
449 if (c >= 0xdf && c <= 0xf6) return 1;
450 if (c >= 0xf8 && c <= 0xfe) return 1;
451 break;
452
453 case ISO_8859_3:
454 if (c == 0xb1) return 1;
455 if (c == 0xb5 || c == 0xb6) return 1;
456 if (c >= 0xb9 && c <= 0xbc) return 1;
457 if (c == 0xbf) return 1;
458 if (c == 0xdf) return 1;
459 if (c >= 0xe0 && c <= 0xe2) return 1;
460 if (c >= 0xe4 && c <= 0xef) return 1;
461 if (c >= 0xf1 && c <= 0xf6) return 1;
462 if (c >= 0xf8 && c <= 0xfe) return 1;
463 break;
464
465 case ISO_8859_4:
466 if (c == 0xa2) return 1;
467 if (c == 0xb1 || c == 0xb3) return 1;
468 if (c == 0xb5 || c == 0xb6) return 1;
469 if (c >= 0xb9 && c <= 0xbc) return 1;
470 if (c >= 0xbe && c <= 0xbf) return 1;
471 if (c == 0xdf) return 1;
472 if (c >= 0xe0 && c <= 0xf6) return 1;
473 if (c >= 0xf8 && c <= 0xfe) return 1;
474 break;
475
476 case ISO_8859_5:
477 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
478 break;
479
480 case ISO_8859_6:
481 break;
482
483 case ISO_8859_7:
484 if (c == 0xc0) return 1;
485 if (c >= 0xdc && c <= 0xfe) return 1;
486 break;
487
488 case ISO_8859_8:
489 if (c == 0xb5) return 1;
490 break;
491
492 case ISO_8859_10:
493 if (c >= 0xb1 && c <= 0xb6) return 1;
494 if (c >= 0xb8 && c <= 0xbc) return 1;
495 if (c == 0xbe || c == 0xbf) return 1;
496 if (c >= 0xdf && c <= 0xff) return 1;
497 break;
498
499 case ISO_8859_11:
500 break;
501
502 case ISO_8859_13:
503 if (c == 0xb5) return 1;
504 if (c == 0xb8) return 1;
505 if (c == 0xba) return 1;
506 if (c == 0xbf) return 1;
507 if (c >= 0xdf && c <= 0xf6) return 1;
508 if (c >= 0xf8 && c <= 0xfe) return 1;
509 break;
510
511 case ISO_8859_14:
512 if (c == 0xa2) return 1;
513 if (c == 0xa5) return 1;
514 if (c == 0xab) return 1;
515 if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
516 if (c >= 0xb8 && c <= 0xba) return 1;
517 if (c == 0xbc) return 1;
518 if (c == 0xbe || c == 0xbf) return 1;
519 if (c >= 0xdf && c <= 0xff) return 1;
520 break;
521
522 case ISO_8859_15:
523 if (c == 0xaa) return 1;
524 if (c == 0xb5) return 1;
525 if (c == 0xba) return 1;
526 if (c >= 0xdf && c <= 0xf6) return 1;
527 if (c >= 0xf8 && c <= 0xff) return 1;
528 if (c == 0xa8) return 1;
529 if (c == 0xb8) return 1;
530 if (c == 0xbd) return 1;
531 break;
532
533 case ISO_8859_16:
534 if (c == 0xa2) return 1;
535 if (c == 0xa8) return 1;
536 if (c == 0xae) return 1;
537 if (c == 0xb3) return 1;
538 if (c >= 0xb8 && c <= 0xba) return 1;
539 if (c == 0xbd) return 1;
540 if (c == 0xbf) return 1;
541 if (c >= 0xdf && c <= 0xff) return 1;
542 break;
543
544 case KOI8_R:
545 if (c == 0xa3) return 1;
546 /* fall */
547 case KOI8:
548 if (c >= 0xc0 && c <= 0xdf) return 1;
549 break;
550
551 default:
552 exit(-1);
553 }
554
555 return 0;
556 }
557
IsPrint(int enc,int c)558 static int IsPrint(int enc, int c)
559 {
560 if (enc == ASCII)
561 return isprint(c);
562
563 if (c >= 0x20 && c <= 0x7e) return 1;
564
565 switch (enc) {
566 case UNICODE_ISO_8859_1:
567 if (c >= 0x09 && c <= 0x0d) return 1;
568 if (c == 0x85) return 1;
569 /* fall */
570 case ISO_8859_1:
571 case ISO_8859_2:
572 case ISO_8859_4:
573 case ISO_8859_5:
574 case ISO_8859_9:
575 case ISO_8859_10:
576 case ISO_8859_13:
577 case ISO_8859_14:
578 case ISO_8859_15:
579 case ISO_8859_16:
580 if (c >= 0xa0 && c <= 0xff) return 1;
581 break;
582
583 case ISO_8859_3:
584 if (c >= 0xa0) {
585 if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
586 c == 0xe3 || c == 0xf0)
587 return 0;
588 else
589 return 1;
590 }
591 break;
592
593 case ISO_8859_6:
594 if (c == 0xa0) return 1;
595 if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
596 return 1;
597 if (c >= 0xc1 && c <= 0xda) return 1;
598 if (c >= 0xe0 && c <= 0xf2) return 1;
599 break;
600
601 case ISO_8859_7:
602 if (c >= 0xa0 && c <= 0xfe &&
603 c != 0xa4 && c != 0xa5 && c != 0xaa &&
604 c != 0xae && c != 0xd2) return 1;
605 break;
606
607 case ISO_8859_8:
608 if (c >= 0xa0 && c <= 0xfa) {
609 if (c >= 0xbf && c <= 0xde) return 0;
610 if (c == 0xa1) return 0;
611 return 1;
612 }
613 break;
614
615 case ISO_8859_11:
616 if (c >= 0xa0 && c <= 0xda) return 1;
617 if (c >= 0xdf && c <= 0xfb) return 1;
618 break;
619
620 case KOI8:
621 if (c == 0xa0) return 1;
622 if (c >= 0xc0 && c <= 0xff) return 1;
623 break;
624
625 case KOI8_R:
626 if (c >= 0x80 && c <= 0xff) return 1;
627 break;
628
629 default:
630 exit(-1);
631 }
632
633 return 0;
634 }
635
IsPunct(int enc,int c)636 static int IsPunct(int enc, int c)
637 {
638 if (enc == ASCII)
639 return ispunct(c);
640
641 if (enc == UNICODE_ISO_8859_1) {
642 if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
643 c == 0x7c || c == 0x7e) return 1;
644 if (c >= 0x3c && c <= 0x3e) return 1;
645 }
646
647 if (c >= 0x21 && c <= 0x2f) return 1;
648 if (c >= 0x3a && c <= 0x40) return 1;
649 if (c >= 0x5b && c <= 0x60) return 1;
650 if (c >= 0x7b && c <= 0x7e) return 1;
651
652 switch (enc) {
653 case ISO_8859_1:
654 case ISO_8859_9:
655 case ISO_8859_15:
656 if (c == 0xad) return 1;
657 /* fall */
658 case UNICODE_ISO_8859_1:
659 if (c == 0xa1) return 1;
660 if (c == 0xab) return 1;
661 if (c == 0xb7) return 1;
662 if (c == 0xbb) return 1;
663 if (c == 0xbf) return 1;
664 break;
665
666 case ISO_8859_2:
667 case ISO_8859_4:
668 case ISO_8859_5:
669 case ISO_8859_14:
670 if (c == 0xad) return 1;
671 break;
672
673 case ISO_8859_3:
674 case ISO_8859_10:
675 if (c == 0xad) return 1;
676 if (c == 0xb7) return 1;
677 if (c == 0xbd) return 1;
678 break;
679
680 case ISO_8859_6:
681 if (c == 0xac) return 1;
682 if (c == 0xad) return 1;
683 if (c == 0xbb) return 1;
684 if (c == 0xbf) return 1;
685 break;
686
687 case ISO_8859_7:
688 if (c == 0xa1 || c == 0xa2) return 1;
689 if (c == 0xab) return 1;
690 if (c == 0xaf) return 1;
691 if (c == 0xad) return 1;
692 if (c == 0xb7 || c == 0xbb) return 1;
693 break;
694
695 case ISO_8859_8:
696 if (c == 0xab) return 1;
697 if (c == 0xad) return 1;
698 if (c == 0xb7) return 1;
699 if (c == 0xbb) return 1;
700 if (c == 0xdf) return 1;
701 break;
702
703 case ISO_8859_13:
704 if (c == 0xa1 || c == 0xa5) return 1;
705 if (c == 0xab || c == 0xad) return 1;
706 if (c == 0xb4 || c == 0xb7) return 1;
707 if (c == 0xbb) return 1;
708 if (c == 0xff) return 1;
709 break;
710
711 case ISO_8859_16:
712 if (c == 0xa5) return 1;
713 if (c == 0xab) return 1;
714 if (c == 0xad) return 1;
715 if (c == 0xb5) return 1;
716 if (c == 0xb7) return 1;
717 if (c == 0xbb) return 1;
718 break;
719
720 case KOI8_R:
721 if (c == 0x9e) return 1;
722 break;
723
724 case ISO_8859_11:
725 case KOI8:
726 break;
727
728 default:
729 exit(-1);
730 }
731
732 return 0;
733 }
734
IsSpace(int enc,int c)735 static int IsSpace(int enc, int c)
736 {
737 if (enc == ASCII)
738 return isspace(c);
739
740 if (c >= 0x09 && c <= 0x0d) return 1;
741 if (c == 0x20) return 1;
742
743 switch (enc) {
744 case UNICODE_ISO_8859_1:
745 if (c == 0x85) return 1;
746 /* fall */
747 case ISO_8859_1:
748 case ISO_8859_2:
749 case ISO_8859_3:
750 case ISO_8859_4:
751 case ISO_8859_5:
752 case ISO_8859_6:
753 case ISO_8859_7:
754 case ISO_8859_8:
755 case ISO_8859_9:
756 case ISO_8859_10:
757 case ISO_8859_11:
758 case ISO_8859_13:
759 case ISO_8859_14:
760 case ISO_8859_15:
761 case ISO_8859_16:
762 case KOI8:
763 if (c == 0xa0) return 1;
764 break;
765
766 case KOI8_R:
767 if (c == 0x9a) return 1;
768 break;
769
770 default:
771 exit(-1);
772 }
773
774 return 0;
775 }
776
IsUpper(int enc,int c)777 static int IsUpper(int enc, int c)
778 {
779 if (enc == ASCII)
780 return isupper(c);
781
782 if (c >= 0x41 && c <= 0x5a) return 1;
783
784 switch (enc) {
785 case UNICODE_ISO_8859_1:
786 case ISO_8859_1:
787 case ISO_8859_9:
788 if (c >= 0xc0 && c <= 0xd6) return 1;
789 if (c >= 0xd8 && c <= 0xde) return 1;
790 break;
791
792 case ISO_8859_2:
793 if (c == 0xa1 || c == 0xa3) return 1;
794 if (c == 0xa5 || c == 0xa6) return 1;
795 if (c >= 0xa9 && c <= 0xac) return 1;
796 if (c >= 0xae && c <= 0xaf) return 1;
797 if (c >= 0xc0 && c <= 0xd6) return 1;
798 if (c >= 0xd8 && c <= 0xde) return 1;
799 break;
800
801 case ISO_8859_3:
802 if (c == 0xa1) return 1;
803 if (c == 0xa6) return 1;
804 if (c >= 0xa9 && c <= 0xac) return 1;
805 if (c == 0xaf) return 1;
806 if (c >= 0xc0 && c <= 0xc2) return 1;
807 if (c >= 0xc4 && c <= 0xcf) return 1;
808 if (c >= 0xd1 && c <= 0xd6) return 1;
809 if (c >= 0xd8 && c <= 0xde) return 1;
810 break;
811
812 case ISO_8859_4:
813 if (c == 0xa1 || c == 0xa3) return 1;
814 if (c == 0xa5 || c == 0xa6) return 1;
815 if (c >= 0xa9 && c <= 0xac) return 1;
816 if (c == 0xae) return 1;
817 if (c == 0xbd) return 1;
818 if (c >= 0xc0 && c <= 0xd6) return 1;
819 if (c >= 0xd8 && c <= 0xde) return 1;
820 break;
821
822 case ISO_8859_5:
823 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
824 break;
825
826 case ISO_8859_6:
827 break;
828
829 case ISO_8859_7:
830 if (c == 0xb6) return 1;
831 if (c >= 0xb8 && c <= 0xba) return 1;
832 if (c == 0xbc) return 1;
833 if (c >= 0xbe && c <= 0xbf) return 1;
834 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
835 break;
836
837 case ISO_8859_8:
838 case ISO_8859_11:
839 break;
840
841 case ISO_8859_10:
842 if (c >= 0xa1 && c <= 0xa6) return 1;
843 if (c >= 0xa8 && c <= 0xac) return 1;
844 if (c == 0xae || c == 0xaf) return 1;
845 if (c >= 0xc0 && c <= 0xde) return 1;
846 break;
847
848 case ISO_8859_13:
849 if (c == 0xa8) return 1;
850 if (c == 0xaa) return 1;
851 if (c == 0xaf) return 1;
852 if (c >= 0xc0 && c <= 0xd6) return 1;
853 if (c >= 0xd8 && c <= 0xde) return 1;
854 break;
855
856 case ISO_8859_14:
857 if (c == 0xa1) return 1;
858 if (c == 0xa4 || c == 0xa6) return 1;
859 if (c == 0xa8) return 1;
860 if (c == 0xaa || c == 0xac) return 1;
861 if (c == 0xaf || c == 0xb0) return 1;
862 if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
863 if (c == 0xbb || c == 0xbd) return 1;
864 if (c >= 0xc0 && c <= 0xde) return 1;
865 break;
866
867 case ISO_8859_15:
868 if (c >= 0xc0 && c <= 0xd6) return 1;
869 if (c >= 0xd8 && c <= 0xde) return 1;
870 if (c == 0xa6) return 1;
871 if (c == 0xb4) return 1;
872 if (c == 0xbc) return 1;
873 if (c == 0xbe) return 1;
874 break;
875
876 case ISO_8859_16:
877 if (c == 0xa1) return 1;
878 if (c == 0xa3) return 1;
879 if (c == 0xa6) return 1;
880 if (c == 0xaa) return 1;
881 if (c == 0xac) return 1;
882 if (c == 0xaf) return 1;
883 if (c == 0xb2) return 1;
884 if (c == 0xb4) return 1;
885 if (c == 0xbc) return 1;
886 if (c == 0xbe) return 1;
887 if (c >= 0xc0 && c <= 0xde) return 1;
888 break;
889
890 case KOI8_R:
891 if (c == 0xb3) return 1;
892 /* fall */
893 case KOI8:
894 if (c >= 0xe0 && c <= 0xff) return 1;
895 break;
896
897 default:
898 exit(-1);
899 }
900
901 return 0;
902 }
903
IsXDigit(int enc,int c)904 static int IsXDigit(int enc, int c)
905 {
906 if (enc == ASCII)
907 return isxdigit(c);
908
909 if (c >= 0x30 && c <= 0x39) return 1;
910 if (c >= 0x41 && c <= 0x46) return 1;
911 if (c >= 0x61 && c <= 0x66) return 1;
912 return 0;
913 }
914
IsWord(int enc,int c)915 static int IsWord(int enc, int c)
916 {
917 if (enc == ASCII) {
918 return (isalpha(c) || isdigit(c) || c == 0x5f);
919 }
920
921 if (c >= 0x30 && c <= 0x39) return 1;
922 if (c >= 0x41 && c <= 0x5a) return 1;
923 if (c == 0x5f) return 1;
924 if (c >= 0x61 && c <= 0x7a) return 1;
925
926 switch (enc) {
927 case UNICODE_ISO_8859_1:
928 case ISO_8859_1:
929 case ISO_8859_9:
930 if (c == 0xaa) return 1;
931 if (c >= 0xb2 && c <= 0xb3) return 1;
932 if (c == 0xb5) return 1;
933 if (c >= 0xb9 && c <= 0xba) return 1;
934 if (c >= 0xbc && c <= 0xbe) return 1;
935 if (c >= 0xc0 && c <= 0xd6) return 1;
936 if (c >= 0xd8 && c <= 0xf6) return 1;
937 if (c >= 0xf8 && c <= 0xff) return 1;
938 break;
939
940 case ISO_8859_2:
941 if (c == 0xa1 || c == 0xa3) return 1;
942 if (c == 0xa5 || c == 0xa6) return 1;
943 if (c >= 0xa9 && c <= 0xac) return 1;
944 if (c >= 0xae && c <= 0xaf) return 1;
945 if (c == 0xb1 || c == 0xb3) return 1;
946 if (c == 0xb5 || c == 0xb6) return 1;
947 if (c >= 0xb9 && c <= 0xbc) return 1;
948 if (c >= 0xbe && c <= 0xbf) return 1;
949 if (c >= 0xc0 && c <= 0xd6) return 1;
950 if (c >= 0xd8 && c <= 0xf6) return 1;
951 if (c >= 0xf8 && c <= 0xfe) return 1;
952 break;
953
954 case ISO_8859_3:
955 if (c == 0xa1) return 1;
956 if (c == 0xa6) return 1;
957 if (c >= 0xa9 && c <= 0xac) return 1;
958 if (c == 0xaf) return 1;
959 if (c >= 0xb1 && c <= 0xb3) return 1;
960 if (c == 0xb5 || c == 0xb6) return 1;
961 if (c >= 0xb9 && c <= 0xbd) return 1;
962 if (c == 0xbf) return 1;
963 if (c >= 0xc0 && c <= 0xc2) return 1;
964 if (c >= 0xc4 && c <= 0xcf) return 1;
965 if (c >= 0xd1 && c <= 0xd6) return 1;
966 if (c >= 0xd8 && c <= 0xe2) return 1;
967 if (c >= 0xe4 && c <= 0xef) return 1;
968 if (c >= 0xf1 && c <= 0xf6) return 1;
969 if (c >= 0xf8 && c <= 0xfe) return 1;
970 break;
971
972 case ISO_8859_4:
973 if (c >= 0xa1 && c <= 0xa3) return 1;
974 if (c == 0xa5 || c == 0xa6) return 1;
975 if (c >= 0xa9 && c <= 0xac) return 1;
976 if (c == 0xae) return 1;
977 if (c == 0xb1 || c == 0xb3) return 1;
978 if (c == 0xb5 || c == 0xb6) return 1;
979 if (c >= 0xb9 && c <= 0xbf) return 1;
980 if (c >= 0xc0 && c <= 0xd6) return 1;
981 if (c >= 0xd8 && c <= 0xf6) return 1;
982 if (c >= 0xf8 && c <= 0xfe) return 1;
983 break;
984
985 case ISO_8859_5:
986 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
987 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
988 break;
989
990 case ISO_8859_6:
991 if (c >= 0xc1 && c <= 0xda) return 1;
992 if (c >= 0xe0 && c <= 0xea) return 1;
993 if (c >= 0xeb && c <= 0xf2) return 1;
994 break;
995
996 case ISO_8859_7:
997 if (c == 0xb2 || c == 0xb3) return 1;
998 if (c == 0xb6) return 1;
999 if (c >= 0xb8 && c <= 0xba) return 1;
1000 if (c >= 0xbc && c <= 0xbf) return 1;
1001 if (c == 0xc0) return 1;
1002 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
1003 if (c >= 0xdc && c <= 0xfe) return 1;
1004 break;
1005
1006 case ISO_8859_8:
1007 if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
1008 if (c >= 0xbc && c <= 0xbe) return 1;
1009 if (c >= 0xe0 && c <= 0xfa) return 1;
1010 break;
1011
1012 case ISO_8859_10:
1013 if (c >= 0xa1 && c <= 0xff) {
1014 if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
1015 return 1;
1016 }
1017 break;
1018
1019 case ISO_8859_11:
1020 if (c >= 0xa1 && c <= 0xda) return 1;
1021 if (c >= 0xdf && c <= 0xfb) return 1;
1022 break;
1023
1024 case ISO_8859_13:
1025 if (c == 0xa8) return 1;
1026 if (c == 0xaa) return 1;
1027 if (c == 0xaf) return 1;
1028 if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
1029 if (c >= 0xbc && c <= 0xbe) return 1;
1030 if (c == 0xb8) return 1;
1031 if (c == 0xba) return 1;
1032 if (c >= 0xbf && c <= 0xd6) return 1;
1033 if (c >= 0xd8 && c <= 0xf6) return 1;
1034 if (c >= 0xf8 && c <= 0xfe) return 1;
1035 break;
1036
1037 case ISO_8859_14:
1038 if (c >= 0xa1 && c <= 0xff) {
1039 if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
1040 c == 0xb6) return 0;
1041 return 1;
1042 }
1043 break;
1044
1045 case ISO_8859_15:
1046 if (c == 0xaa) return 1;
1047 if (c >= 0xb2 && c <= 0xb3) return 1;
1048 if (c == 0xb5) return 1;
1049 if (c >= 0xb9 && c <= 0xba) return 1;
1050 if (c >= 0xbc && c <= 0xbe) return 1;
1051 if (c >= 0xc0 && c <= 0xd6) return 1;
1052 if (c >= 0xd8 && c <= 0xf6) return 1;
1053 if (c >= 0xf8 && c <= 0xff) return 1;
1054 if (c == 0xa6) return 1;
1055 if (c == 0xa8) return 1;
1056 if (c == 0xb4) return 1;
1057 if (c == 0xb8) return 1;
1058 break;
1059
1060 case ISO_8859_16:
1061 if (c == 0xa1) return 1;
1062 if (c == 0xa2) return 1;
1063 if (c == 0xa3) return 1;
1064 if (c == 0xa6) return 1;
1065 if (c == 0xa8) return 1;
1066 if (c == 0xaa) return 1;
1067 if (c == 0xac) return 1;
1068 if (c == 0xae) return 1;
1069 if (c == 0xaf) return 1;
1070 if (c == 0xb2) return 1;
1071 if (c == 0xb3) return 1;
1072 if (c == 0xb4) return 1;
1073 if (c >= 0xb8 && c <= 0xba) return 1;
1074 if (c == 0xbc) return 1;
1075 if (c == 0xbd) return 1;
1076 if (c == 0xbe) return 1;
1077 if (c == 0xbf) return 1;
1078 if (c >= 0xc0 && c <= 0xde) return 1;
1079 if (c >= 0xdf && c <= 0xff) return 1;
1080 break;
1081
1082 case KOI8_R:
1083 if (c == 0x9d) return 1;
1084 if (c == 0xa3 || c == 0xb3) return 1;
1085 /* fall */
1086 case KOI8:
1087 if (c >= 0xc0 && c <= 0xff) return 1;
1088 break;
1089
1090 default:
1091 exit(-1);
1092 }
1093
1094 return 0;
1095 }
1096
IsAscii(int enc ARG_UNUSED,int c)1097 static int IsAscii(int enc ARG_UNUSED, int c)
1098 {
1099 if (c >= 0x00 && c <= 0x7f) return 1;
1100 return 0;
1101 }
1102
IsNewline(int enc ARG_UNUSED,int c)1103 static int IsNewline(int enc ARG_UNUSED, int c)
1104 {
1105 if (c == 0x0a) return 1;
1106 return 0;
1107 }
1108
exec(FILE * fp,ENC_INFO * einfo)1109 static int exec(FILE* fp, ENC_INFO* einfo)
1110 {
1111 #define NCOL 8
1112
1113 int c, val, enc;
1114 int r;
1115
1116 enc = einfo->num;
1117
1118 r = fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
1119 einfo->name);
1120 if (r < 0) return -1;
1121
1122 for (c = 0; c < 256; c++) {
1123 val = 0;
1124 if (IsNewline(enc, c)) val |= BIT_CTYPE_NEWLINE;
1125 if (IsAlpha (enc, c)) val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM);
1126 if (IsBlank (enc, c)) val |= BIT_CTYPE_BLANK;
1127 if (IsCntrl (enc, c)) val |= BIT_CTYPE_CNTRL;
1128 if (IsDigit (enc, c)) val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM);
1129 if (IsGraph (enc, c)) val |= BIT_CTYPE_GRAPH;
1130 if (IsLower (enc, c)) val |= BIT_CTYPE_LOWER;
1131 if (IsPrint (enc, c)) val |= BIT_CTYPE_PRINT;
1132 if (IsPunct (enc, c)) val |= BIT_CTYPE_PUNCT;
1133 if (IsSpace (enc, c)) val |= BIT_CTYPE_SPACE;
1134 if (IsUpper (enc, c)) val |= BIT_CTYPE_UPPER;
1135 if (IsXDigit(enc, c)) val |= BIT_CTYPE_XDIGIT;
1136 if (IsWord (enc, c)) val |= BIT_CTYPE_WORD;
1137 if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII;
1138
1139 if (c % NCOL == 0) {
1140 r = fputs(" ", fp);
1141 if (r < 0) return -1;
1142 }
1143 r = fprintf(fp, "0x%04x", val);
1144 if (r < 0) return -1;
1145
1146 if (c != 255) {
1147 r = fputs(",", fp);
1148 if (r < 0) return -1;
1149 }
1150 if (c != 0 && c % NCOL == (NCOL-1))
1151 r = fputs("\n", fp);
1152 else
1153 r = fputs(" ", fp);
1154
1155 if (r < 0) return -1;
1156 }
1157 r = fprintf(fp, "};\n");
1158 if (r < 0) return -1;
1159
1160 return 0;
1161 }
1162
main(int argc ARG_UNUSED,char * argv[]ARG_UNUSED)1163 extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
1164 {
1165 int r;
1166 int i;
1167 FILE* fp = stdout;
1168
1169 setlocale(LC_ALL, "C");
1170 /* setlocale(LC_ALL, "POSIX"); */
1171 /* setlocale(LC_ALL, "en_GB.iso88591"); */
1172 /* setlocale(LC_ALL, "de_BE.iso88591"); */
1173 /* setlocale(LC_ALL, "fr_FR.iso88591"); */
1174
1175 for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
1176 r = exec(fp, &Info[i]);
1177 if (r < 0) {
1178 fprintf(stderr, "FAIL exec(): %d\n", r);
1179 return -1;
1180 }
1181 }
1182
1183 return 0;
1184 }
1185