1 #include <stdio.h>
2 #include <stddef.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <errno.h>
6 
7 struct mappings_entry {
8 	int cp_uni;
9 	int n;
10 	int cp_932[16];
11 };
12 
13 struct mappings {
14 	size_t n;
15 	size_t nalloc;
16 	struct mappings_entry *entries;
17 };
18 
mappings_init(struct mappings * map)19 static void mappings_init(struct mappings *map)
20 {
21 	map->n = 0;
22 	map->nalloc = 0;
23 	map->entries = 0;
24 }
25 
mappings_destroy(struct mappings * map)26 static void mappings_destroy(struct mappings *map)
27 {
28 	if (map->entries)
29 		free(map->entries);
30 }
31 
mappings_grow(struct mappings * map)32 static int mappings_grow(struct mappings *map)
33 {
34 	if (map->n >= map->nalloc) {
35 		struct mappings_entry *new_entries;
36 		size_t n = map->nalloc << 1, a;
37 		if (n == 0)
38 			n = 1;
39 		else if (n <= map->n)
40 			return 2;
41 		a = sizeof(*map->entries) * n;
42 		if (a / n != sizeof(*map->entries))
43 			return 2;
44 		new_entries = realloc(map->entries, a);
45 		if (!new_entries)
46 			return 2;
47 		map->entries = new_entries;
48 		map->nalloc = n;
49 	}
50 	return 0;
51 }
52 
mappings_add(struct mappings * map,int cp_uni,int cp_932)53 static int mappings_add(struct mappings *map, int cp_uni, int cp_932)
54 {
55 	size_t i;
56 	size_t s = 0, e = map->n;
57 	struct mappings_entry *entry;
58 
59 	for (;;) {
60 		i = (s + e) / 2;
61 		entry = &map->entries[i];
62 		if (e == i || entry->cp_uni > cp_uni) {
63 			if (e == i) {
64 				int r = mappings_grow(map);
65 				if (r)
66 					return r;
67 				if (map->n > i) {
68 					size_t n = map->n - i, a = sizeof(*map->entries) * n;
69 					if (a / n != sizeof(*map->entries))
70 						return 2;
71 					memmove(&map->entries[i + 1], &map->entries[i], a);
72 				}
73 				++map->n;
74 				entry = &map->entries[i];
75 				entry->cp_uni = cp_uni;
76 				entry->n = 0;
77 				break;
78 			}
79 			e = i;
80 		} else if (entry->cp_uni < cp_uni) {
81 			if (s == i) {
82 				int r = mappings_grow(map);
83 				if (r)
84 					return r;
85 				if (map->n > i + 1) {
86 					size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n;
87 					if (a / n != sizeof(*map->entries))
88 						return 2;
89 					memmove(&map->entries[i + 2], &map->entries[i + 1], a);
90 				}
91 				++map->n;
92 				entry = &map->entries[i + 1];
93 				entry->cp_uni = cp_uni;
94 				entry->n = 0;
95 				break;
96 			}
97 			s = i;
98 		} else {
99 			break;
100 		}
101 	}
102 	if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932))
103 		return 1;
104 	entry->cp_932[entry->n++] = cp_932;
105 	return 0;
106 }
107 
108 struct generator_entry {
109 	const char *name;
110 	const char *prologue;
111 	const char *epilogue;
112 	void(*visitor)(const struct mappings_entry *);
113 };
114 
utf32_utf8(char * buf,int k)115 static int utf32_utf8(char *buf, int k)
116 {
117 	int retval = 0;
118 
119 	if (k < 0x80) {
120 		buf[0] = k;
121 		retval = 1;
122 	} else if (k < 0x800) {
123 		buf[0] = 0xc0 | (k >> 6);
124 		buf[1] = 0x80 | (k & 0x3f);
125 		retval = 2;
126 	} else if (k < 0x10000) {
127 		buf[0] = 0xe0 | (k >> 12);
128 		buf[1] = 0x80 | ((k >> 6) & 0x3f);
129 		buf[2] = 0x80 | (k & 0x3f);
130 		retval = 3;
131 	} else if (k < 0x200000) {
132 		buf[0] = 0xf0 | (k >> 18);
133 		buf[1] = 0x80 | ((k >> 12) & 0x3f);
134 		buf[2] = 0x80 | ((k >> 6) & 0x3f);
135 		buf[3] = 0x80 | (k & 0x3f);
136 		retval = 4;
137 	} else if (k < 0x4000000) {
138 		buf[0] = 0xf8 | (k >> 24);
139 		buf[1] = 0x80 | ((k >> 18) & 0x3f);
140 		buf[2] = 0x80 | ((k >> 12) & 0x3f);
141 		buf[3] = 0x80 | ((k >> 6) & 0x3f);
142 		buf[4] = 0x80 | (k & 0x3f);
143 		retval = 5;
144 	} else {
145 		buf[0] = 0xfc | (k >> 30);
146 		buf[1] = 0x80 | ((k >> 24) & 0x3f);
147 		buf[2] = 0x80 | ((k >> 18) & 0x3f);
148 		buf[3] = 0x80 | ((k >> 12) & 0x3f);
149 		buf[4] = 0x80 | ((k >> 6) & 0x3f);
150 		buf[5] = 0x80 | (k & 0x3f);
151 		retval = 6;
152 	}
153 	buf[retval] = '\0';
154 
155 	return retval;
156 }
157 
158 static const char epilogue[] =
159 "close\n";
160 
161 static const char prologue_to_cp932[] =
162 "#!/usr/bin/expect -f\n"
163 "spawn tests/conv_encoding Japanese CP932 UTF-8\n"
164 "set timeout 1\n"
165 "\n"
166 "expect_after {\n"
167 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
168 "}\n";
169 
170 static const char prologue_to_cp50220[] =
171 "#!/usr/bin/expect -f\n"
172 "spawn tests/conv_encoding Japanese CP50220 UTF-8\n"
173 "set timeout 1\n"
174 "\n"
175 "expect_after {\n"
176 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
177 "}\n";
178 
179 static const char prologue_to_cp50222[] =
180 "#!/usr/bin/expect -f\n"
181 "spawn tests/conv_encoding Japanese CP50222 UTF-8\n"
182 "set timeout 1\n"
183 "\n"
184 "expect_after {\n"
185 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
186 "}\n";
187 
188 static const char prologue_from_cp932[] =
189 "#!/usr/bin/expect -f\n"
190 "spawn tests/conv_encoding Japanese UTF-8 CP932\n"
191 "set timeout 1\n"
192 "\n"
193 "expect_after {\n"
194 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
195 "}\n";
196 
to_cp932_visitor(const struct mappings_entry * entry)197 static void to_cp932_visitor(const struct mappings_entry *entry)
198 {
199 	char buf_uni[32], buf_cp932[8];
200 	int i;
201 
202 	if (entry->cp_uni < 32 || entry->cp_uni == 127)
203 		return;
204 
205 	i = utf32_utf8(buf_uni, entry->cp_uni);
206 	buf_uni[i * 4] = '\0';
207 	while (--i >= 0) {
208 		unsigned char c = ((unsigned char *)buf_uni)[i];
209 		buf_uni[i * 4] = '\\';
210 		buf_uni[i * 4 + 1] = 'x';
211 		buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
212 		buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
213 	}
214 
215 	printf("set test \"U+%06X\"\n"
216 	       "send -- \"%s\r\"\n"
217 		   "sleep 0.001\n"
218 	       "expect {\n", entry->cp_uni, buf_uni);
219 
220 	for (i = 0; i < entry->n; ++i) {
221 		int len = 0;
222 		const int c = entry->cp_932[i];
223 		if (c >= 0x100) {
224 			len = 2;
225 			sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff);
226 		} else {
227 			len = 1;
228 			sprintf(buf_cp932, "%%%02x", c);
229 		}
230 		printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len);
231 	}
232 
233 	printf("}\n");
234 }
235 
from_cp932_visitor(const struct mappings_entry * entry)236 static void from_cp932_visitor(const struct mappings_entry *entry)
237 {
238 	char buf_uni[32], buf_cp932[8];
239 	int i, len;
240 
241 	if (entry->cp_uni < 32 || entry->cp_uni == 127)
242 		return;
243 
244 	len = utf32_utf8(buf_uni, entry->cp_uni);
245 	buf_uni[len * 3] = '\0';
246 	i = len;
247 	while (--i >= 0) {
248 		unsigned char c = ((unsigned char *)buf_uni)[i];
249 		buf_uni[i * 3] = '%';
250 		buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4];
251 		buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15];
252 	}
253 
254 	for (i = 0; i < entry->n; ++i) {
255 		const int c = entry->cp_932[i];
256 		if (c >= 0x100)
257 			sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff);
258 		else
259 			sprintf(buf_cp932, "\\x%02x", c);
260 		printf("set test \"U+%06X\"\n"
261 			   "send -- \"%s\r\"\n"
262 			   "sleep 0.001\n"
263 			   "expect {\n"
264 		       "    \"%s (%d)\\r\\n\" { pass $test }\n"
265 		       "}\n",
266 			   entry->cp_uni, buf_cp932, buf_uni, len);
267 	}
268 }
269 
to_cp50220_visitor(const struct mappings_entry * entry)270 static void to_cp50220_visitor(const struct mappings_entry *entry)
271 {
272 	char buf_uni[32], buf_cp50220[32];
273 	int i;
274 
275 	if (entry->cp_uni < 32 || entry->cp_uni == 127)
276 		return;
277 
278 	i = utf32_utf8(buf_uni, entry->cp_uni);
279 	buf_uni[i * 4] = '\0';
280 	while (--i >= 0) {
281 		unsigned char c = ((unsigned char *)buf_uni)[i];
282 		buf_uni[i * 4] = '\\';
283 		buf_uni[i * 4 + 1] = 'x';
284 		buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
285 		buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
286 	}
287 
288 	printf("set test \"U+%06X\"\n"
289 	       "send -- \"%s\r\"\n"
290 		   "sleep 0.001\n"
291 	       "expect {\n", entry->cp_uni, buf_uni);
292 
293 	for (i = 0; i < entry->n; ++i) {
294 		int len = 0;
295 		const int c = entry->cp_932[i];
296 		if (c >= 0xa1 && c < 0xe0) {
297 			static const int jisx0208_tl_map[] = {
298 				0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
299 				0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
300 				0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
301 				0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
302 				0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
303 				0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
304 				0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
305 				0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
306 			};
307 			const int j = jisx0208_tl_map[c - 0xa0];
308 			len = 8;
309 			sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
310 		} else if (c >= 0x100) {
311 			const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
312 			len = 8;
313 			sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
314 		} else {
315 			len = 1;
316 			sprintf(buf_cp50220, "%%%02x", c);
317 		}
318 		printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
319 	}
320 
321 	printf("}\n");
322 }
323 
to_cp50222_visitor(const struct mappings_entry * entry)324 static void to_cp50222_visitor(const struct mappings_entry *entry)
325 {
326 	char buf_uni[32], buf_cp50220[32];
327 	int i;
328 
329 	if (entry->cp_uni < 32 || entry->cp_uni == 127)
330 		return;
331 
332 	i = utf32_utf8(buf_uni, entry->cp_uni);
333 	buf_uni[i * 4] = '\0';
334 	while (--i >= 0) {
335 		unsigned char c = ((unsigned char *)buf_uni)[i];
336 		buf_uni[i * 4] = '\\';
337 		buf_uni[i * 4 + 1] = 'x';
338 		buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
339 		buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
340 	}
341 
342 	printf("set test \"U+%06X\"\n"
343 	       "send -- \"%s\r\"\n"
344 		   "sleep 0.001\n"
345 	       "expect {\n", entry->cp_uni, buf_uni);
346 
347 	for (i = 0; i < entry->n; ++i) {
348 		int len = 0;
349 		const int c = entry->cp_932[i];
350 		if (c >= 0xa1 && c < 0xe0) {
351 			len = 3;
352 			sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80);
353 		} else if (c >= 0x100) {
354 			const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
355 			len = 8;
356 			sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
357 		} else {
358 			len = 1;
359 			sprintf(buf_cp50220, "%%%02x", c);
360 		}
361 		printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
362 	}
363 
364 	printf("}\n");
365 }
366 
367 
368 static struct generator_entry entries[] = {
369 	{ "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor },
370 	{ "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor },
371 	{ "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor },
372 	{ "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor },
373 	{ NULL }
374 };
375 
376 static const char cp932_txt[] = "CP932.TXT";
377 
main(int argc,char ** argv)378 int main(int argc, char **argv)
379 {
380 	int retval = 0;
381 	FILE *fp;
382 	char buf[1024];
383 	struct generator_entry* gen;
384 	struct mappings map;
385 
386 	if (argc <= 1) {
387 		fprintf(stderr, "usage: %s generator\n", argv[0]);
388 		return 255;
389 	}
390 
391 	for (gen = entries;; ++gen) {
392 		if (!gen->name) {
393 			fprintf(stderr, "Unknown generator: %s\n", argv[1]);
394 			return 1;
395 		}
396 		if (strcmp(gen->name, argv[1]) == 0)
397 			break;
398 	}
399 
400     fp = fopen(cp932_txt, "r");
401 	if (!fp) {
402 		fprintf(stderr, "Failed to open %s\n", cp932_txt);
403 		return 2;
404 	}
405 
406 	mappings_init(&map);
407 
408 	while (fgets(buf, sizeof(buf), fp)) {
409 		const char *fields[16];
410 		char *p = buf;
411 		int field = 0;
412 		int cp_932, cp_uni;
413 		for (;;) {
414 			char *q = 0;
415 			int eol = 0;
416 
417 			if (field >= sizeof(fields) / sizeof(*fields)) {
418 				fprintf(stderr, "Too many fields (incorrect file?)\n");
419 				retval = 3;
420 				goto out;
421 			}
422 
423 			for (;;) {
424 				if (*p == '\0' || *p == '#' || *p == 0x0a) {
425 					eol = 1;
426 					break;
427 				} else if (*p != ' ' && *p != '\t') {
428 					break;
429 				}
430 				++p;
431 			}
432 
433 			if (eol)
434 				break;
435 
436 			q = p;
437 
438 			for (;;) {
439 				if (*p == '\0' || *p == '#' || *p == 0x0a) {
440 					eol = 1;
441 					break;
442 				} else if (*p == ' ' || *p == '\t') {
443 					break;
444 				}
445 				++p;
446 			}
447 
448 			*p = '\0';
449 			fields[field++] = q;
450 
451 			if (eol)
452 				break;
453 			++p;
454 		}
455 		if (field == 0 || field == 1) {
456 			continue;
457 		} else if (field != 2) {
458 			fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field);
459 			retval = 3;
460 			goto out;
461 		}
462 		cp_932 = strtol(fields[0], NULL, 0);
463 		if (errno == ERANGE || errno == EINVAL) {
464 			fprintf(stderr, "Malformed field value: %s\n", fields[0]);
465 			retval = 4;
466 			goto out;
467 		}
468 		cp_uni = strtol(fields[1], NULL, 0);
469 		if (errno == ERANGE || errno == EINVAL) {
470 			fprintf(stderr, "Malformed field value: %s\n", fields[1]);
471 			retval = 4;
472 			goto out;
473 		}
474 
475 		if (mappings_add(&map, cp_uni, cp_932)) {
476 			fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni);
477 			retval = 4;
478 			goto out;
479 		}
480 	}
481 
482 	{
483 		size_t i;
484 		printf("%s", gen->prologue);
485 		for (i = 0; i < map.n; ++i)
486 			gen->visitor(&map.entries[i]);
487 		printf("%s", gen->epilogue);
488 	}
489 
490 out:
491 	mappings_destroy(&map);
492 	return retval;
493 }
494 
495 /*
496  * vim: sts=4 sw=4 ts=4 noet
497  */
498