1 #include <stdio.h>
2 #include <stddef.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <errno.h>
6
7 struct mappings_entry {
8 int cp_uni;
9 int n;
10 int cp_932[16];
11 };
12
13 struct mappings {
14 size_t n;
15 size_t nalloc;
16 struct mappings_entry *entries;
17 };
18
mappings_init(struct mappings * map)19 static void mappings_init(struct mappings *map)
20 {
21 map->n = 0;
22 map->nalloc = 0;
23 map->entries = 0;
24 }
25
mappings_destroy(struct mappings * map)26 static void mappings_destroy(struct mappings *map)
27 {
28 if (map->entries)
29 free(map->entries);
30 }
31
mappings_grow(struct mappings * map)32 static int mappings_grow(struct mappings *map)
33 {
34 if (map->n >= map->nalloc) {
35 struct mappings_entry *new_entries;
36 size_t n = map->nalloc << 1, a;
37 if (n == 0)
38 n = 1;
39 else if (n <= map->n)
40 return 2;
41 a = sizeof(*map->entries) * n;
42 if (a / n != sizeof(*map->entries))
43 return 2;
44 new_entries = realloc(map->entries, a);
45 if (!new_entries)
46 return 2;
47 map->entries = new_entries;
48 map->nalloc = n;
49 }
50 return 0;
51 }
52
mappings_add(struct mappings * map,int cp_uni,int cp_932)53 static int mappings_add(struct mappings *map, int cp_uni, int cp_932)
54 {
55 size_t i;
56 size_t s = 0, e = map->n;
57 struct mappings_entry *entry;
58
59 for (;;) {
60 i = (s + e) / 2;
61 entry = &map->entries[i];
62 if (e == i || entry->cp_uni > cp_uni) {
63 if (e == i) {
64 int r = mappings_grow(map);
65 if (r)
66 return r;
67 if (map->n > i) {
68 size_t n = map->n - i, a = sizeof(*map->entries) * n;
69 if (a / n != sizeof(*map->entries))
70 return 2;
71 memmove(&map->entries[i + 1], &map->entries[i], a);
72 }
73 ++map->n;
74 entry = &map->entries[i];
75 entry->cp_uni = cp_uni;
76 entry->n = 0;
77 break;
78 }
79 e = i;
80 } else if (entry->cp_uni < cp_uni) {
81 if (s == i) {
82 int r = mappings_grow(map);
83 if (r)
84 return r;
85 if (map->n > i + 1) {
86 size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n;
87 if (a / n != sizeof(*map->entries))
88 return 2;
89 memmove(&map->entries[i + 2], &map->entries[i + 1], a);
90 }
91 ++map->n;
92 entry = &map->entries[i + 1];
93 entry->cp_uni = cp_uni;
94 entry->n = 0;
95 break;
96 }
97 s = i;
98 } else {
99 break;
100 }
101 }
102 if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932))
103 return 1;
104 entry->cp_932[entry->n++] = cp_932;
105 return 0;
106 }
107
108 struct generator_entry {
109 const char *name;
110 const char *prologue;
111 const char *epilogue;
112 void(*visitor)(const struct mappings_entry *);
113 };
114
utf32_utf8(char * buf,int k)115 static int utf32_utf8(char *buf, int k)
116 {
117 int retval = 0;
118
119 if (k < 0x80) {
120 buf[0] = k;
121 retval = 1;
122 } else if (k < 0x800) {
123 buf[0] = 0xc0 | (k >> 6);
124 buf[1] = 0x80 | (k & 0x3f);
125 retval = 2;
126 } else if (k < 0x10000) {
127 buf[0] = 0xe0 | (k >> 12);
128 buf[1] = 0x80 | ((k >> 6) & 0x3f);
129 buf[2] = 0x80 | (k & 0x3f);
130 retval = 3;
131 } else if (k < 0x200000) {
132 buf[0] = 0xf0 | (k >> 18);
133 buf[1] = 0x80 | ((k >> 12) & 0x3f);
134 buf[2] = 0x80 | ((k >> 6) & 0x3f);
135 buf[3] = 0x80 | (k & 0x3f);
136 retval = 4;
137 } else if (k < 0x4000000) {
138 buf[0] = 0xf8 | (k >> 24);
139 buf[1] = 0x80 | ((k >> 18) & 0x3f);
140 buf[2] = 0x80 | ((k >> 12) & 0x3f);
141 buf[3] = 0x80 | ((k >> 6) & 0x3f);
142 buf[4] = 0x80 | (k & 0x3f);
143 retval = 5;
144 } else {
145 buf[0] = 0xfc | (k >> 30);
146 buf[1] = 0x80 | ((k >> 24) & 0x3f);
147 buf[2] = 0x80 | ((k >> 18) & 0x3f);
148 buf[3] = 0x80 | ((k >> 12) & 0x3f);
149 buf[4] = 0x80 | ((k >> 6) & 0x3f);
150 buf[5] = 0x80 | (k & 0x3f);
151 retval = 6;
152 }
153 buf[retval] = '\0';
154
155 return retval;
156 }
157
158 static const char epilogue[] =
159 "close\n";
160
161 static const char prologue_to_cp932[] =
162 "#!/usr/bin/expect -f\n"
163 "spawn tests/conv_encoding Japanese CP932 UTF-8\n"
164 "set timeout 1\n"
165 "\n"
166 "expect_after {\n"
167 " \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
168 "}\n";
169
170 static const char prologue_to_cp50220[] =
171 "#!/usr/bin/expect -f\n"
172 "spawn tests/conv_encoding Japanese CP50220 UTF-8\n"
173 "set timeout 1\n"
174 "\n"
175 "expect_after {\n"
176 " \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
177 "}\n";
178
179 static const char prologue_to_cp50222[] =
180 "#!/usr/bin/expect -f\n"
181 "spawn tests/conv_encoding Japanese CP50222 UTF-8\n"
182 "set timeout 1\n"
183 "\n"
184 "expect_after {\n"
185 " \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
186 "}\n";
187
188 static const char prologue_from_cp932[] =
189 "#!/usr/bin/expect -f\n"
190 "spawn tests/conv_encoding Japanese UTF-8 CP932\n"
191 "set timeout 1\n"
192 "\n"
193 "expect_after {\n"
194 " \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
195 "}\n";
196
to_cp932_visitor(const struct mappings_entry * entry)197 static void to_cp932_visitor(const struct mappings_entry *entry)
198 {
199 char buf_uni[32], buf_cp932[8];
200 int i;
201
202 if (entry->cp_uni < 32 || entry->cp_uni == 127)
203 return;
204
205 i = utf32_utf8(buf_uni, entry->cp_uni);
206 buf_uni[i * 4] = '\0';
207 while (--i >= 0) {
208 unsigned char c = ((unsigned char *)buf_uni)[i];
209 buf_uni[i * 4] = '\\';
210 buf_uni[i * 4 + 1] = 'x';
211 buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
212 buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
213 }
214
215 printf("set test \"U+%06X\"\n"
216 "send -- \"%s\r\"\n"
217 "sleep 0.001\n"
218 "expect {\n", entry->cp_uni, buf_uni);
219
220 for (i = 0; i < entry->n; ++i) {
221 int len = 0;
222 const int c = entry->cp_932[i];
223 if (c >= 0x100) {
224 len = 2;
225 sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff);
226 } else {
227 len = 1;
228 sprintf(buf_cp932, "%%%02x", c);
229 }
230 printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len);
231 }
232
233 printf("}\n");
234 }
235
from_cp932_visitor(const struct mappings_entry * entry)236 static void from_cp932_visitor(const struct mappings_entry *entry)
237 {
238 char buf_uni[32], buf_cp932[8];
239 int i, len;
240
241 if (entry->cp_uni < 32 || entry->cp_uni == 127)
242 return;
243
244 len = utf32_utf8(buf_uni, entry->cp_uni);
245 buf_uni[len * 3] = '\0';
246 i = len;
247 while (--i >= 0) {
248 unsigned char c = ((unsigned char *)buf_uni)[i];
249 buf_uni[i * 3] = '%';
250 buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4];
251 buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15];
252 }
253
254 for (i = 0; i < entry->n; ++i) {
255 const int c = entry->cp_932[i];
256 if (c >= 0x100)
257 sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff);
258 else
259 sprintf(buf_cp932, "\\x%02x", c);
260 printf("set test \"U+%06X\"\n"
261 "send -- \"%s\r\"\n"
262 "sleep 0.001\n"
263 "expect {\n"
264 " \"%s (%d)\\r\\n\" { pass $test }\n"
265 "}\n",
266 entry->cp_uni, buf_cp932, buf_uni, len);
267 }
268 }
269
to_cp50220_visitor(const struct mappings_entry * entry)270 static void to_cp50220_visitor(const struct mappings_entry *entry)
271 {
272 char buf_uni[32], buf_cp50220[32];
273 int i;
274
275 if (entry->cp_uni < 32 || entry->cp_uni == 127)
276 return;
277
278 i = utf32_utf8(buf_uni, entry->cp_uni);
279 buf_uni[i * 4] = '\0';
280 while (--i >= 0) {
281 unsigned char c = ((unsigned char *)buf_uni)[i];
282 buf_uni[i * 4] = '\\';
283 buf_uni[i * 4 + 1] = 'x';
284 buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
285 buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
286 }
287
288 printf("set test \"U+%06X\"\n"
289 "send -- \"%s\r\"\n"
290 "sleep 0.001\n"
291 "expect {\n", entry->cp_uni, buf_uni);
292
293 for (i = 0; i < entry->n; ++i) {
294 int len = 0;
295 const int c = entry->cp_932[i];
296 if (c >= 0xa1 && c < 0xe0) {
297 static const int jisx0208_tl_map[] = {
298 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
299 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
300 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
301 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
302 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
303 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
304 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
305 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
306 };
307 const int j = jisx0208_tl_map[c - 0xa0];
308 len = 8;
309 sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
310 } else if (c >= 0x100) {
311 const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
312 len = 8;
313 sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
314 } else {
315 len = 1;
316 sprintf(buf_cp50220, "%%%02x", c);
317 }
318 printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
319 }
320
321 printf("}\n");
322 }
323
to_cp50222_visitor(const struct mappings_entry * entry)324 static void to_cp50222_visitor(const struct mappings_entry *entry)
325 {
326 char buf_uni[32], buf_cp50220[32];
327 int i;
328
329 if (entry->cp_uni < 32 || entry->cp_uni == 127)
330 return;
331
332 i = utf32_utf8(buf_uni, entry->cp_uni);
333 buf_uni[i * 4] = '\0';
334 while (--i >= 0) {
335 unsigned char c = ((unsigned char *)buf_uni)[i];
336 buf_uni[i * 4] = '\\';
337 buf_uni[i * 4 + 1] = 'x';
338 buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
339 buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
340 }
341
342 printf("set test \"U+%06X\"\n"
343 "send -- \"%s\r\"\n"
344 "sleep 0.001\n"
345 "expect {\n", entry->cp_uni, buf_uni);
346
347 for (i = 0; i < entry->n; ++i) {
348 int len = 0;
349 const int c = entry->cp_932[i];
350 if (c >= 0xa1 && c < 0xe0) {
351 len = 3;
352 sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80);
353 } else if (c >= 0x100) {
354 const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
355 len = 8;
356 sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
357 } else {
358 len = 1;
359 sprintf(buf_cp50220, "%%%02x", c);
360 }
361 printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
362 }
363
364 printf("}\n");
365 }
366
367
368 static struct generator_entry entries[] = {
369 { "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor },
370 { "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor },
371 { "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor },
372 { "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor },
373 { NULL }
374 };
375
376 static const char cp932_txt[] = "CP932.TXT";
377
main(int argc,char ** argv)378 int main(int argc, char **argv)
379 {
380 int retval = 0;
381 FILE *fp;
382 char buf[1024];
383 struct generator_entry* gen;
384 struct mappings map;
385
386 if (argc <= 1) {
387 fprintf(stderr, "usage: %s generator\n", argv[0]);
388 return 255;
389 }
390
391 for (gen = entries;; ++gen) {
392 if (!gen->name) {
393 fprintf(stderr, "Unknown generator: %s\n", argv[1]);
394 return 1;
395 }
396 if (strcmp(gen->name, argv[1]) == 0)
397 break;
398 }
399
400 fp = fopen(cp932_txt, "r");
401 if (!fp) {
402 fprintf(stderr, "Failed to open %s\n", cp932_txt);
403 return 2;
404 }
405
406 mappings_init(&map);
407
408 while (fgets(buf, sizeof(buf), fp)) {
409 const char *fields[16];
410 char *p = buf;
411 int field = 0;
412 int cp_932, cp_uni;
413 for (;;) {
414 char *q = 0;
415 int eol = 0;
416
417 if (field >= sizeof(fields) / sizeof(*fields)) {
418 fprintf(stderr, "Too many fields (incorrect file?)\n");
419 retval = 3;
420 goto out;
421 }
422
423 for (;;) {
424 if (*p == '\0' || *p == '#' || *p == 0x0a) {
425 eol = 1;
426 break;
427 } else if (*p != ' ' && *p != '\t') {
428 break;
429 }
430 ++p;
431 }
432
433 if (eol)
434 break;
435
436 q = p;
437
438 for (;;) {
439 if (*p == '\0' || *p == '#' || *p == 0x0a) {
440 eol = 1;
441 break;
442 } else if (*p == ' ' || *p == '\t') {
443 break;
444 }
445 ++p;
446 }
447
448 *p = '\0';
449 fields[field++] = q;
450
451 if (eol)
452 break;
453 ++p;
454 }
455 if (field == 0 || field == 1) {
456 continue;
457 } else if (field != 2) {
458 fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field);
459 retval = 3;
460 goto out;
461 }
462 cp_932 = strtol(fields[0], NULL, 0);
463 if (errno == ERANGE || errno == EINVAL) {
464 fprintf(stderr, "Malformed field value: %s\n", fields[0]);
465 retval = 4;
466 goto out;
467 }
468 cp_uni = strtol(fields[1], NULL, 0);
469 if (errno == ERANGE || errno == EINVAL) {
470 fprintf(stderr, "Malformed field value: %s\n", fields[1]);
471 retval = 4;
472 goto out;
473 }
474
475 if (mappings_add(&map, cp_uni, cp_932)) {
476 fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni);
477 retval = 4;
478 goto out;
479 }
480 }
481
482 {
483 size_t i;
484 printf("%s", gen->prologue);
485 for (i = 0; i < map.n; ++i)
486 gen->visitor(&map.entries[i]);
487 printf("%s", gen->epilogue);
488 }
489
490 out:
491 mappings_destroy(&map);
492 return retval;
493 }
494
495 /*
496 * vim: sts=4 sw=4 ts=4 noet
497 */
498