1#!/usr/bin/perl 2# script to generate Shift_JIS encoded Emoji to/from Unicode conversion table. 3# Rui Hirokawa <hirokawa@php.net> 4# 5# usage: mktbl.pl EmojiSources.txt 6# 7# Unicoe;DoCoMo;KDDI;SoftBank 8 9@docomo = (); 10@kddi = (); 11@softbank = (); 12 13@to_docomo = (); 14@to_kddi = (); 15@to_sb = (); 16 17$fname = "emoji2uni.h"; 18open(OUT,">$fname") or die $!; 19 20sub sjis2code { 21 my @c = unpack("C*", pack("H4", $_[0])); 22 23 # Shift_JIS -> JIS 24 $c[0] = (($c[0]-($c[0]<160?112:176))<<1)-($c[1]<159?1:0); 25 $c[1] -= ($c[1]<159?($c[1]>127?32:31):126); 26 27 $s = ($c[0] - 0x21)*94 + $c[1]-0x21; 28 29 return $s; 30} 31 32sub show_code { 33 my @c = @_; 34 $s = "\t"; 35 for ($i=0; $i<=$#c; $i++) {# 36 if ($c[$i]) { 37 @v = split(' ',$c[$i]); 38 $s .= "0x$v[0], \t"; 39 if ($#v > 0) { 40 print "$i $v[0] $v[1]\n"; 41 } 42 } else { 43 $s .= "0x0000, \t"; 44 } 45 if ($i % 4 == 3) { 46 $s .= "\n\t"; 47 } 48 } 49 return $s; 50} 51 52while(<>) { 53 if ($_ =~ /^\d+/) { 54 @v = split(/;/,$_); 55 if ($v[1] =~ /[\dA-F]+/) { 56 $code = &sjis2code($v[1]); 57 $docomo{$code} = $v[0]; 58 $to_docomo{$v[0]} = $code; 59 } 60 if ($v[2] =~ /[\dA-F]+/) { 61 $code = &sjis2code($v[2]); 62 $kddi{$code} = $v[0]; 63 $to_kddi{$v[0]} = $code; 64 } 65 if ($v[3] =~ /[\dA-F]+/) { 66 $code = &sjis2code($v[3]); 67 $softbank{$code} = $v[0]; 68 $to_sb{$v[0]} = $code; 69 } 70 } 71} 72 73print "DoCoMo\n"; 74 75$docomo_min = 10434; 76$docomo_max = 10434+281; 77@docomo_v = (); 78 79foreach $key (sort {hex($a) <=> hex($b)} keys(%docomo)) { 80 $s = $key; 81 $pos = $s % 94; 82 $ku = ($s - $pos)/94; 83 $v = $key - $docomo_min; 84 #print "$ku:$pos - ". $v ."=> $docomo{$key}\n"; 85 $docomo_v[$key-$docomo_min] = $docomo{$key}; 86} 87 88$to_docomo_min = 10434; 89 90$to_docomo_min1 = 0x0023; 91$to_docomo_max1 = 0x00AE; 92$to_docomo_min2 = 0x203C; 93$to_docomo_max2 = 0x3299; 94$to_docomo_min3 = 0x1F17F; 95$to_docomo_max3 = 0x1F6BB; 96 97@r_docomo1_key = (); 98@r_docomo1_val = (); 99 100@r_docomo2_key = (); 101@r_docomo2_val = (); 102 103@r_docomo3_key = (); 104@r_docomo3_val = (); 105 106 107foreach $key (sort {hex($a) <=> hex($b)} keys(%to_docomo)) { 108 $s = $to_docomo{$key}; 109 110 $pos = $s % 94; 111 $ku = ($s - $pos)/94; 112 $v = $to_docomo{$key} - $to_docomo_min; 113 $h = sprintf("%x",$s); 114 #print "$ku:$pos = $h ($v) <= $key\n"; 115 if (hex($key) <= $to_docomo_max1) { 116 push(@r_docomo1_key, $key); 117 push(@r_docomo1_val, sprintf("%x", $to_docomo{$key})); 118 } elsif (hex($key) <= $to_docomo_max2) { 119 push(@r_docomo2_key, $key); 120 push(@r_docomo2_val, $h); 121 } elsif (hex($key) >= $to_docomo_max3) { 122 push(@r_docomo3_key, $key); 123 push(@r_docomo3_val, $h); 124 } 125} 126 127push(@r_docomo1_key, 0x00); 128push(@r_docomo1_val, 0x00); 129push(@r_docomo2_key, 0x00); 130push(@r_docomo2_val, 0x00); 131push(@r_docomo3_key, 0x00); 132push(@r_docomo3_val, 0x00); 133 134print OUT "int mb_tbl_code2uni_docomo_min = $docomo_min;\n"; 135print OUT "int mb_tbl_code2uni_docomo_max = $docomo_max;\n\n"; 136 137print OUT "int mb_tbl_code2uni_docomo[] = {\n"; 138print OUT &show_code(@docomo_v); 139print OUT "};\n\n"; 140 141print OUT "int mb_tbl_uni_docomo2code_min1 = $to_docomo_min1;\n"; 142print OUT "int mb_tbl_uni_docomo2code_max1 = $to_docomo_max1;\n"; 143print OUT "int mb_tbl_uni_docomo2code_min2 = $to_docomo_min2;\n"; 144print OUT "int mb_tbl_uni_docomo2code_max2 = $to_docomo_max2;\n"; 145print OUT "int mb_tbl_uni_docomo2code_min3 = $to_docomo_min3;\n"; 146print OUT "int mb_tbl_uni_docomo2code_max3 = $to_docomo_max3;\n\n"; 147 148#print "DOCOMO reverse 1\n"; 149 150print OUT "int mb_tbl_uni_docomo2code_key1[] = {\n"; 151print OUT &show_code(@r_docomo1_key),"\n"; 152print OUT "};\n\n"; 153print OUT "int mb_tbl_uni_docomo2code_val1[] = {\n"; 154print OUT &show_code(@r_docomo1_val),"\n"; 155print OUT "};\n\n"; 156 157#print "DOCOMO reverse 2\n"; 158 159print OUT "int mb_tbl_uni_docomo2code_key2[] = {\n"; 160print OUT &show_code(@r_docomo2_key),"\n"; 161print OUT "};\n\n"; 162print OUT "int mb_tbl_uni_docomo2code_val2[] = {\n"; 163print OUT &show_code(@r_docomo2_val),"\n"; 164print OUT "};\n\n"; 165 166print "DOCOMO reverse 3\n"; 167 168print OUT "int mb_tbl_uni_docomo2code_key3[] = {\n"; 169print OUT &show_code(@r_docomo3_key),"\n"; 170print OUT "};\n\n"; 171print OUT "int mb_tbl_uni_docomo2code_val3[] = {\n"; 172print OUT &show_code(@r_docomo3_val),"\n"; 173print OUT "};\n\n"; 174 175#print "DOCOMO reverse end \n"; 176 177$kddi_min1 = 9400; 178$kddi_max1 = 9400+264; 179$kddi_min2 = 9400+564; 180$kddi_max2 = 9400+939; 181 182@kddi_v1 = (); 183@kddi_v2 = (); 184 185#print "KDDI\n"; 186 187foreach $key (sort {hex($a) <=> hex($b)} keys(%kddi)) { 188 $s = $key; 189 $pos = $s % 94; 190 $ku = ($s - $pos)/94; 191 $v = $key - $kddi_min1; 192 $h = sprintf("%x",$key); 193 #print "$ku:$pos :: $v ($h) => $kddi{$key}\n"; 194 195 if ($key <= $kddi_max1) { 196 $kddi_v1[$key-$kddi_min1] = $kddi{$key}; 197 } elsif ($key <= $kddi_max2) { 198 $kddi_v2[$key-$kddi_min2] = $kddi{$key}; 199 } 200} 201 202$to_kddi_min = 9660; 203$to_kddi_min1 = 0x0030; 204$to_kddi_max1 = 0x00AE; 205$to_kddi_min2 = 0x2002; 206$to_kddi_max2 = 0x3299; 207$to_kddi_min3 = 0x1F004; 208$to_kddi_max3 = 0x1F6C0; 209 210@r_kddi1_key = (); @r_kddi1_val = (); 211@r_kddi2_key = (); @r_kddi2_val = (); 212@r_kddi3_key = (); @r_kddi3_val = (); 213 214foreach $key (sort {hex($a) <=> hex($b)} keys(%to_kddi)) { 215 $s = $to_kddi{$key}; 216 217 $pos = $s % 94; 218 $ku = ($s - $pos)/94; 219 $v = $to_kddi{$key} - $to_kddi_min; 220 $h = sprintf("%x",$s); 221 #print "$ku:$pos = $h ($v) <= $key\n"; 222 if (hex($key) <= $to_kddi_max1) { 223 push(@r_kddi1_key, $key); 224 push(@r_kddi1_val, $h); 225 } elsif (hex($key) <= $to_kddi_max2) { 226 push(@r_kddi2_key, $key); 227 push(@r_kddi2_val, $h); 228 } else { 229 push(@r_kddi3_key, $key); 230 push(@r_kddi3_val, $h); 231 } 232} 233 234push(@r_kddi1_key, 0x00); 235push(@r_kddi1_val, 0x00); 236push(@r_kddi2_key, 0x00); 237push(@r_kddi2_val, 0x00); 238push(@r_kddi3_key, 0x00); 239push(@r_kddi3_val, 0x00); 240 241print OUT "int mb_tbl_code2uni_kddi1_min = $kddi_min1;\n"; 242print OUT "int mb_tbl_code2uni_kddi1_max = $kddi_max1;\n"; 243print OUT "int mb_tbl_code2uni_kddi2_min = $kddi_min2;\n"; 244print OUT "int mb_tbl_code2uni_kddi2_max = $kddi_max2;\n\n"; 245 246#print "KDDI 1\n"; 247 248print OUT "int mb_tbl_code2uni_kddi1[] = {\n"; 249print OUT &show_code(@kddi_v1); 250print OUT "};\n\n"; 251 252#print "KDDI 2\n"; 253 254print OUT "int mb_tbl_code2uni_kddi2[] = {\n"; 255print OUT &show_code(@kddi_v2); 256print OUT "};\n\n"; 257 258print OUT "int mb_tbl_uni_kddi2code_min1 = $to_kddi_min1;\n"; 259print OUT "int mb_tbl_uni_kddi2code_max1 = $to_kddi_max1;\n"; 260print OUT "int mb_tbl_uni_kddi2code_min2 = $to_kddi_min2;\n"; 261print OUT "int mb_tbl_uni_kddi2code_max2 = $to_kddi_max2;\n"; 262print OUT "int mb_tbl_uni_kddi2code_min3 = $to_kddi_min3;\n"; 263print OUT "int mb_tbl_uni_kddi2code_max3 = $to_kddi_max3;\n\n"; 264 265#print "KDDI reverse 1\n"; 266 267print OUT "int mb_tbl_uni_kddi2code_key1[] = {\n"; 268print OUT &show_code(@r_kddi1_key),"\n"; 269print OUT "};\n\n"; 270print OUT "int mb_tbl_uni_kddi2code_val1[] = {\n"; 271print OUT &show_code(@r_kddi1_val),"\n"; 272print OUT "};\n\n"; 273 274#print "KDDI reverse 1\n"; 275 276print OUT "int mb_tbl_uni_kddi2code_key2[] = {\n"; 277print OUT &show_code(@r_kddi2_key),"\n"; 278print OUT "};\n\n"; 279print OUT "int mb_tbl_uni_kddi2code_val2[] = {\n"; 280print OUT &show_code(@r_kddi2_val),"\n"; 281print OUT "};\n\n"; 282 283#print "KDDI reverse 3\n"; 284 285print OUT "int mb_tbl_uni_kddi2code_key3[] = {\n"; 286print OUT &show_code(@r_kddi3_key),"\n"; 287print OUT "};\n\n"; 288print OUT "int mb_tbl_uni_kddi2code_val3[] = {\n"; 289print OUT &show_code(@r_kddi3_val),"\n"; 290print OUT "};\n\n"; 291 292 293$sb_min1 = 10153; 294$sb_max1 = 10153+177; 295$sb_min2 = 10153+376; 296$sb_max2 = 10153+547; 297$sb_min3 = 10153+752; 298$sb_max3 = 10153+901; 299 300@sb_v1 = (); 301@sb_v2 = (); 302@sb_v3 = (); 303 304if (1) { 305 print "SoftBank\n"; 306 307 foreach $key (sort {hex($a) <=> hex($b)} keys(%softbank)) { 308 $s = $key; 309 $pos = $s % 94; 310 $ku = ($s - $pos)/94; 311 $v = $key - $sb_min1; 312 $h = sprintf("%x",$key); 313 #print "$ku:$pos :: $v ($h) => $softbank{$key}\n"; 314 if ($key <= $sb_max1) { 315 $sb_v1[$key-$sb_min1] = $softbank{$key}; 316 } elsif ($key <= $sb_max2) { 317 $sb_v2[$key-$sb_min2] = $softbank{$key}; 318 } elsif ($key <= $sb_max3) { 319 $sb_v3[$key-$sb_min3] = $softbank{$key}; 320 } 321 } 322 323} 324 325$to_sb_min = 10263; 326$to_sb_min1 = 0x0023; 327$to_sb_max1 = 0x00AE; 328$to_sb_min2 = 0x2122; 329$to_sb_max2 = 0x3299; 330$to_sb_min3 = 0x1F004; 331$to_sb_max3 = 0x1F6C0; 332 333@r_sb1_key = (); @r_sb1_val = (); 334@r_sb2_key = (); @r_sb2_val = (); 335@r_sb3_key = (); @r_sb3_val = (); 336 337foreach $key (sort {hex($a) <=> hex($b)} keys(%to_sb)) { 338 $s = $to_sb{$key}; 339 340 $pos = $s % 94; 341 $ku = ($s - $pos)/94; 342 $v = $to_sb{$key} - $to_sb_min; 343 $h = sprintf("%x",$s); 344 #print "$ku:$pos = $h ($v) <= $key\n"; 345 if (hex($key) <= $to_sb_max1) { 346 push(@r_sb1_key, $key); 347 push(@r_sb1_val, $h); 348 } elsif (hex($key) >= $to_sb_min2 && hex($key) <= $to_sb_max2) { 349 push(@r_sb2_key, $key); 350 push(@r_sb2_val, $h); 351 } else { 352 push(@r_sb3_key, $key); 353 push(@r_sb3_val, $h); 354 } 355} 356 357push(@r_sb1_key, 0x00); 358push(@r_sb1_val, 0x00); 359push(@r_sb2_key, 0x00); 360push(@r_sb2_val, 0x00); 361push(@r_sb3_key, 0x00); 362push(@r_sb3_val, 0x00); 363 364 365print OUT "int mb_tbl_code2uni_sb1_min = $sb_min1;\n"; 366print OUT "int mb_tbl_code2uni_sb1_max = $sb_max1;\n"; 367print OUT "int mb_tbl_code2uni_sb2_min = $sb_min2;\n"; 368print OUT "int mb_tbl_code2uni_sb2_max = $sb_max2;\n"; 369print OUT "int mb_tbl_code2uni_sb3_min = $sb_min3;\n"; 370print OUT "int mb_tbl_code2uni_sb3_max = $sb_max3;\n\n"; 371 372#print "SoftBank 1\n"; 373 374print OUT "int mb_tbl_code2uni_sb1[] = {\n"; 375print OUT &show_code(@sb_v1); 376print OUT "};\n\n"; 377 378#print "SoftBank 2\n"; 379 380print OUT "int mb_tbl_code2uni_sb2[] = {\n"; 381print OUT &show_code(@sb_v2); 382print OUT "};\n\n"; 383 384#print "SoftBank 3\n"; 385 386print OUT "int mb_tbl_code2uni_sb3[] = {\n"; 387print OUT &show_code(@sb_v3); 388print OUT "};\n\n"; 389 390print OUT "int mb_tbl_uni_sb2code_min1 = $to_sb_min1;\n"; 391print OUT "int mb_tbl_uni_sb2code_max1 = $to_sb_max1;\n"; 392print OUT "int mb_tbl_uni_sb2code_min2 = $to_sb_min2;\n"; 393print OUT "int mb_tbl_uni_sb2code_max2 = $to_sb_max2;\n"; 394print OUT "int mb_tbl_uni_sb2code_min3 = $to_sb_min3;\n"; 395print OUT "int mb_tbl_uni_sb2code_max3 = $to_sb_max3;\n\n"; 396 397#print "SB reverse 1\n"; 398 399print OUT "int mb_tbl_uni_sb2code_key1[] = {\n"; 400print OUT &show_code(@r_sb1_key),"\n"; 401print OUT "};\n\n"; 402print OUT "int mb_tbl_uni_sb2code_val1[] = {\n"; 403print OUT &show_code(@r_sb1_val),"\n"; 404print OUT "};\n\n"; 405 406#print "SB reverse 2\n"; 407 408print OUT "int mb_tbl_uni_sb2code_key2[] = {\n"; 409print OUT &show_code(@r_sb2_key),"\n"; 410print OUT "};\n\n"; 411print OUT "int mb_tbl_uni_sb2code_val2[] = {\n"; 412print OUT &show_code(@r_sb2_val),"\n"; 413print OUT "};\n\n"; 414 415#print "SB reverse 3\n"; 416 417print OUT "int mb_tbl_uni_sb2code_key3[] = {\n"; 418print OUT &show_code(@r_sb3_key),"\n"; 419print OUT "};\n\n"; 420print OUT "int mb_tbl_uni_sb2code_val3[] = {\n"; 421print OUT &show_code(@r_sb3_val),"\n"; 422print OUT "};\n\n"; 423 424 425close(OUT); 426