1#!/usr/bin/perl
2# script to generate Shift_JIS encoded Emoji to/from Unicode conversion table.
3# Rui Hirokawa <hirokawa@php.net>
4#
5# usage: mktbl.pl EmojiSources.txt
6#
7# Unicoe;DoCoMo;KDDI;SoftBank
8
9@docomo = ();
10@kddi = ();
11@softbank = ();
12
13@to_docomo = ();
14@to_kddi = ();
15@to_sb = ();
16
17$fname = "emoji2uni.h";
18open(OUT,">$fname") or die $!;
19
20sub sjis2code {
21  my @c = unpack("C*", pack("H4", $_[0]));
22
23  # Shift_JIS -> JIS
24  $c[0] = (($c[0]-($c[0]<160?112:176))<<1)-($c[1]<159?1:0);
25  $c[1] -= ($c[1]<159?($c[1]>127?32:31):126);
26
27  $s = ($c[0] - 0x21)*94 + $c[1]-0x21;
28
29  return $s;
30}
31
32sub show_code {
33    my @c = @_;
34    $s = "\t";
35    for ($i=0; $i<=$#c; $i++) {#
36	if ($c[$i]) {
37	    @v = split(' ',$c[$i]);
38	    $s .= "0x$v[0], \t";
39	    if ($#v > 0) {
40		print "$i $v[0] $v[1]\n";
41	    }
42	} else {
43	    $s .= "0x0000, \t";
44	}
45	if ($i % 4 == 3) {
46	    $s .= "\n\t";
47	}
48    }
49    return $s;
50}
51
52while(<>) {
53    if ($_ =~ /^\d+/) {
54	@v = split(/;/,$_);
55	if ($v[1] =~ /[\dA-F]+/) {
56	    $code = &sjis2code($v[1]);
57	    $docomo{$code} = $v[0];
58	    $to_docomo{$v[0]} = $code;
59	}
60	if ($v[2] =~ /[\dA-F]+/) {
61	    $code = &sjis2code($v[2]);
62	    $kddi{$code} = $v[0];
63	    $to_kddi{$v[0]} = $code;
64	}
65	if ($v[3] =~ /[\dA-F]+/) {
66	    $code = &sjis2code($v[3]);
67	    $softbank{$code} = $v[0];
68	    $to_sb{$v[0]} = $code;
69	}
70    }
71}
72
73print "DoCoMo\n";
74
75$docomo_min = 10434;
76$docomo_max = 10434+281;
77@docomo_v = ();
78
79foreach $key (sort {hex($a) <=> hex($b)} keys(%docomo)) {
80    $s = $key;
81    $pos = $s % 94;
82    $ku = ($s - $pos)/94;
83    $v = $key - $docomo_min;
84    #print "$ku:$pos - ". $v ."=> $docomo{$key}\n";
85    $docomo_v[$key-$docomo_min] = $docomo{$key};
86}
87
88$to_docomo_min = 10434;
89
90$to_docomo_min1 = 0x0023;
91$to_docomo_max1 = 0x00AE;
92$to_docomo_min2 = 0x203C;
93$to_docomo_max2 = 0x3299;
94$to_docomo_min3 = 0x1F17F;
95$to_docomo_max3 = 0x1F6BB;
96
97@r_docomo1_key = ();
98@r_docomo1_val = ();
99
100@r_docomo2_key = ();
101@r_docomo2_val = ();
102
103@r_docomo3_key = ();
104@r_docomo3_val = ();
105
106
107foreach $key (sort {hex($a) <=> hex($b)} keys(%to_docomo)) {
108    $s = $to_docomo{$key};
109
110    $pos = $s % 94;
111    $ku = ($s - $pos)/94;
112    $v = $to_docomo{$key} - $to_docomo_min;
113    $h = sprintf("%x",$s);
114    #print "$ku:$pos = $h ($v) <= $key\n";
115    if (hex($key) <= $to_docomo_max1) {
116	push(@r_docomo1_key, $key);
117	push(@r_docomo1_val, sprintf("%x", $to_docomo{$key}));
118    } elsif (hex($key) <= $to_docomo_max2) {
119	push(@r_docomo2_key, $key);
120	push(@r_docomo2_val, $h);
121    } elsif (hex($key) >= $to_docomo_max3) {
122	push(@r_docomo3_key, $key);
123	push(@r_docomo3_val, $h);
124    }
125}
126
127push(@r_docomo1_key, 0x00);
128push(@r_docomo1_val, 0x00);
129push(@r_docomo2_key, 0x00);
130push(@r_docomo2_val, 0x00);
131push(@r_docomo3_key, 0x00);
132push(@r_docomo3_val, 0x00);
133
134print OUT "int mb_tbl_code2uni_docomo_min = $docomo_min;\n";
135print OUT "int mb_tbl_code2uni_docomo_max = $docomo_max;\n\n";
136
137print OUT "int mb_tbl_code2uni_docomo[] = {\n";
138print OUT &show_code(@docomo_v);
139print OUT "};\n\n";
140
141print OUT "int mb_tbl_uni_docomo2code_min1 = $to_docomo_min1;\n";
142print OUT "int mb_tbl_uni_docomo2code_max1 = $to_docomo_max1;\n";
143print OUT "int mb_tbl_uni_docomo2code_min2 = $to_docomo_min2;\n";
144print OUT "int mb_tbl_uni_docomo2code_max2 = $to_docomo_max2;\n";
145print OUT "int mb_tbl_uni_docomo2code_min3 = $to_docomo_min3;\n";
146print OUT "int mb_tbl_uni_docomo2code_max3 = $to_docomo_max3;\n\n";
147
148#print "DOCOMO reverse 1\n";
149
150print OUT "int mb_tbl_uni_docomo2code_key1[] = {\n";
151print OUT &show_code(@r_docomo1_key),"\n";
152print OUT "};\n\n";
153print OUT "int mb_tbl_uni_docomo2code_val1[] = {\n";
154print OUT &show_code(@r_docomo1_val),"\n";
155print OUT "};\n\n";
156
157#print "DOCOMO reverse 2\n";
158
159print OUT "int mb_tbl_uni_docomo2code_key2[] = {\n";
160print OUT &show_code(@r_docomo2_key),"\n";
161print OUT "};\n\n";
162print OUT "int mb_tbl_uni_docomo2code_val2[] = {\n";
163print OUT &show_code(@r_docomo2_val),"\n";
164print OUT "};\n\n";
165
166print "DOCOMO reverse 3\n";
167
168print OUT "int mb_tbl_uni_docomo2code_key3[] = {\n";
169print OUT &show_code(@r_docomo3_key),"\n";
170print OUT "};\n\n";
171print OUT "int mb_tbl_uni_docomo2code_val3[] = {\n";
172print OUT &show_code(@r_docomo3_val),"\n";
173print OUT "};\n\n";
174
175#print "DOCOMO reverse end \n";
176
177$kddi_min1 = 9400;
178$kddi_max1 = 9400+264;
179$kddi_min2 = 9400+564;
180$kddi_max2 = 9400+939;
181
182@kddi_v1 = ();
183@kddi_v2 = ();
184
185#print "KDDI\n";
186
187foreach $key (sort {hex($a) <=> hex($b)} keys(%kddi)) {
188    $s = $key;
189    $pos = $s % 94;
190    $ku = ($s - $pos)/94;
191    $v = $key - $kddi_min1;
192    $h = sprintf("%x",$key);
193    #print "$ku:$pos :: $v ($h) => $kddi{$key}\n";
194
195    if ($key <= $kddi_max1) {
196	$kddi_v1[$key-$kddi_min1] = $kddi{$key};
197    } elsif ($key <= $kddi_max2) {
198	$kddi_v2[$key-$kddi_min2] = $kddi{$key};
199    }
200}
201
202$to_kddi_min = 9660;
203$to_kddi_min1 = 0x0030;
204$to_kddi_max1 = 0x00AE;
205$to_kddi_min2 = 0x2002;
206$to_kddi_max2 = 0x3299;
207$to_kddi_min3 = 0x1F004;
208$to_kddi_max3 = 0x1F6C0;
209
210@r_kddi1_key = (); @r_kddi1_val = ();
211@r_kddi2_key = (); @r_kddi2_val = ();
212@r_kddi3_key = (); @r_kddi3_val = ();
213
214foreach $key (sort {hex($a) <=> hex($b)} keys(%to_kddi)) {
215    $s = $to_kddi{$key};
216
217    $pos = $s % 94;
218    $ku = ($s - $pos)/94;
219    $v = $to_kddi{$key} - $to_kddi_min;
220    $h = sprintf("%x",$s);
221    #print "$ku:$pos = $h ($v) <= $key\n";
222    if (hex($key) <= $to_kddi_max1) {
223	push(@r_kddi1_key, $key);
224	push(@r_kddi1_val, $h);
225    } elsif (hex($key) <= $to_kddi_max2) {
226	push(@r_kddi2_key, $key);
227	push(@r_kddi2_val, $h);
228    } else {
229	push(@r_kddi3_key, $key);
230	push(@r_kddi3_val, $h);
231    }
232}
233
234push(@r_kddi1_key, 0x00);
235push(@r_kddi1_val, 0x00);
236push(@r_kddi2_key, 0x00);
237push(@r_kddi2_val, 0x00);
238push(@r_kddi3_key, 0x00);
239push(@r_kddi3_val, 0x00);
240
241print OUT "int mb_tbl_code2uni_kddi1_min = $kddi_min1;\n";
242print OUT "int mb_tbl_code2uni_kddi1_max = $kddi_max1;\n";
243print OUT "int mb_tbl_code2uni_kddi2_min = $kddi_min2;\n";
244print OUT "int mb_tbl_code2uni_kddi2_max = $kddi_max2;\n\n";
245
246#print "KDDI 1\n";
247
248print OUT "int mb_tbl_code2uni_kddi1[] = {\n";
249print OUT &show_code(@kddi_v1);
250print OUT "};\n\n";
251
252#print "KDDI 2\n";
253
254print OUT "int mb_tbl_code2uni_kddi2[] = {\n";
255print OUT &show_code(@kddi_v2);
256print OUT "};\n\n";
257
258print OUT "int mb_tbl_uni_kddi2code_min1 = $to_kddi_min1;\n";
259print OUT "int mb_tbl_uni_kddi2code_max1 = $to_kddi_max1;\n";
260print OUT "int mb_tbl_uni_kddi2code_min2 = $to_kddi_min2;\n";
261print OUT "int mb_tbl_uni_kddi2code_max2 = $to_kddi_max2;\n";
262print OUT "int mb_tbl_uni_kddi2code_min3 = $to_kddi_min3;\n";
263print OUT "int mb_tbl_uni_kddi2code_max3 = $to_kddi_max3;\n\n";
264
265#print "KDDI reverse 1\n";
266
267print OUT "int mb_tbl_uni_kddi2code_key1[] = {\n";
268print OUT &show_code(@r_kddi1_key),"\n";
269print OUT "};\n\n";
270print OUT "int mb_tbl_uni_kddi2code_val1[] = {\n";
271print OUT &show_code(@r_kddi1_val),"\n";
272print OUT "};\n\n";
273
274#print "KDDI reverse 1\n";
275
276print OUT "int mb_tbl_uni_kddi2code_key2[] = {\n";
277print OUT &show_code(@r_kddi2_key),"\n";
278print OUT "};\n\n";
279print OUT "int mb_tbl_uni_kddi2code_val2[] = {\n";
280print OUT &show_code(@r_kddi2_val),"\n";
281print OUT "};\n\n";
282
283#print "KDDI reverse 3\n";
284
285print OUT "int mb_tbl_uni_kddi2code_key3[] = {\n";
286print OUT &show_code(@r_kddi3_key),"\n";
287print OUT "};\n\n";
288print OUT "int mb_tbl_uni_kddi2code_val3[] = {\n";
289print OUT &show_code(@r_kddi3_val),"\n";
290print OUT "};\n\n";
291
292
293$sb_min1 = 10153;
294$sb_max1 = 10153+177;
295$sb_min2 = 10153+376;
296$sb_max2 = 10153+547;
297$sb_min3 = 10153+752;
298$sb_max3 = 10153+901;
299
300@sb_v1 = ();
301@sb_v2 = ();
302@sb_v3 = ();
303
304if (1) {
305    print "SoftBank\n";
306
307    foreach $key (sort {hex($a) <=> hex($b)} keys(%softbank)) {
308	$s = $key;
309	$pos = $s % 94;
310	$ku = ($s - $pos)/94;
311	$v = $key - $sb_min1;
312	$h = sprintf("%x",$key);
313	#print "$ku:$pos :: $v ($h) => $softbank{$key}\n";
314	if ($key <= $sb_max1) {
315	    $sb_v1[$key-$sb_min1] = $softbank{$key};
316	} elsif ($key <= $sb_max2) {
317	    $sb_v2[$key-$sb_min2] = $softbank{$key};
318	} elsif ($key <= $sb_max3) {
319	    $sb_v3[$key-$sb_min3] = $softbank{$key};
320	}
321    }
322
323}
324
325$to_sb_min = 10263;
326$to_sb_min1 = 0x0023;
327$to_sb_max1 = 0x00AE;
328$to_sb_min2 = 0x2122;
329$to_sb_max2 = 0x3299;
330$to_sb_min3 = 0x1F004;
331$to_sb_max3 = 0x1F6C0;
332
333@r_sb1_key = (); @r_sb1_val = ();
334@r_sb2_key = (); @r_sb2_val = ();
335@r_sb3_key = (); @r_sb3_val = ();
336
337foreach $key (sort {hex($a) <=> hex($b)} keys(%to_sb)) {
338    $s = $to_sb{$key};
339
340    $pos = $s % 94;
341    $ku = ($s - $pos)/94;
342    $v = $to_sb{$key} - $to_sb_min;
343    $h = sprintf("%x",$s);
344    #print "$ku:$pos = $h ($v) <= $key\n";
345    if (hex($key) <= $to_sb_max1) {
346	push(@r_sb1_key, $key);
347	push(@r_sb1_val, $h);
348    } elsif (hex($key) >= $to_sb_min2 && hex($key) <= $to_sb_max2) {
349	push(@r_sb2_key, $key);
350	push(@r_sb2_val, $h);
351    } else {
352	push(@r_sb3_key, $key);
353	push(@r_sb3_val, $h);
354    }
355}
356
357push(@r_sb1_key, 0x00);
358push(@r_sb1_val, 0x00);
359push(@r_sb2_key, 0x00);
360push(@r_sb2_val, 0x00);
361push(@r_sb3_key, 0x00);
362push(@r_sb3_val, 0x00);
363
364
365print OUT "int mb_tbl_code2uni_sb1_min = $sb_min1;\n";
366print OUT "int mb_tbl_code2uni_sb1_max = $sb_max1;\n";
367print OUT "int mb_tbl_code2uni_sb2_min = $sb_min2;\n";
368print OUT "int mb_tbl_code2uni_sb2_max = $sb_max2;\n";
369print OUT "int mb_tbl_code2uni_sb3_min = $sb_min3;\n";
370print OUT "int mb_tbl_code2uni_sb3_max = $sb_max3;\n\n";
371
372#print "SoftBank 1\n";
373
374print OUT "int mb_tbl_code2uni_sb1[] = {\n";
375print OUT &show_code(@sb_v1);
376print OUT "};\n\n";
377
378#print "SoftBank 2\n";
379
380print OUT "int mb_tbl_code2uni_sb2[] = {\n";
381print OUT &show_code(@sb_v2);
382print OUT "};\n\n";
383
384#print "SoftBank 3\n";
385
386print OUT "int mb_tbl_code2uni_sb3[] = {\n";
387print OUT &show_code(@sb_v3);
388print OUT "};\n\n";
389
390print OUT "int mb_tbl_uni_sb2code_min1 = $to_sb_min1;\n";
391print OUT "int mb_tbl_uni_sb2code_max1 = $to_sb_max1;\n";
392print OUT "int mb_tbl_uni_sb2code_min2 = $to_sb_min2;\n";
393print OUT "int mb_tbl_uni_sb2code_max2 = $to_sb_max2;\n";
394print OUT "int mb_tbl_uni_sb2code_min3 = $to_sb_min3;\n";
395print OUT "int mb_tbl_uni_sb2code_max3 = $to_sb_max3;\n\n";
396
397#print "SB reverse 1\n";
398
399print OUT "int mb_tbl_uni_sb2code_key1[] = {\n";
400print OUT &show_code(@r_sb1_key),"\n";
401print OUT "};\n\n";
402print OUT "int mb_tbl_uni_sb2code_val1[] = {\n";
403print OUT &show_code(@r_sb1_val),"\n";
404print OUT "};\n\n";
405
406#print "SB reverse 2\n";
407
408print OUT "int mb_tbl_uni_sb2code_key2[] = {\n";
409print OUT &show_code(@r_sb2_key),"\n";
410print OUT "};\n\n";
411print OUT "int mb_tbl_uni_sb2code_val2[] = {\n";
412print OUT &show_code(@r_sb2_val),"\n";
413print OUT "};\n\n";
414
415#print "SB reverse 3\n";
416
417print OUT "int mb_tbl_uni_sb2code_key3[] = {\n";
418print OUT &show_code(@r_sb3_key),"\n";
419print OUT "};\n\n";
420print OUT "int mb_tbl_uni_sb2code_val3[] = {\n";
421print OUT &show_code(@r_sb3_val),"\n";
422print OUT "};\n\n";
423
424
425close(OUT);
426