1#!/usr/bin/env php 2<?php error_reporting(E_ALL); 3 4/** 5 * This is based on the ucgendat.c file from the OpenLDAP project, licensed as 6 * follows. This file is not necessary to build PHP. It's only necessary to 7 * rebuild unicode_data.h and eaw_width.h from Unicode ucd files. 8 * 9 * Example usage: 10 * php ucgendat.php path/to/Unicode/data/files 11 */ 12 13/* Copyright 1998-2007 The OpenLDAP Foundation. 14 * All rights reserved. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted only as authorized by the OpenLDAP 18 * Public License. 19 * 20 * A copy of this license is available at 21 * <http://www.OpenLDAP.org/license.html>. 22 */ 23 24/* Copyright 2001 Computing Research Labs, New Mexico State University 25 * 26 * Permission is hereby granted, free of charge, to any person obtaining a 27 * copy of this software and associated documentation files (the "Software"), 28 * to deal in the Software without restriction, including without limitation 29 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 30 * and/or sell copies of the Software, and to permit persons to whom the 31 * Software is furnished to do so, subject to the following conditions: 32 * 33 * The above copyright notice and this permission notice shall be included in 34 * all copies or substantial portions of the Software. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 39 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY 40 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 41 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 42 * THE USE OR OTHER DEALINGS IN THE SOFTWARE. 43 */ 44 45if ($argc < 2) { 46 echo "Usage: php ucgendata.php ./datadir\n"; 47 echo "./datadir must contain:\n"; 48 echo "UnicodeData.txt, CaseFolding.txt, SpecialCasing.txt, DerivedCoreProperties.txt, and EastAsianWidth.txt\n"; 49 return; 50} 51 52$dir = $argv[1]; 53$unicodeDataFile = $dir . '/UnicodeData.txt'; 54$caseFoldingFile = $dir . '/CaseFolding.txt'; 55$specialCasingFile = $dir . '/SpecialCasing.txt'; 56$derivedCorePropertiesFile = $dir . '/DerivedCoreProperties.txt'; 57$eastAsianWidthFile = $dir . '/EastAsianWidth.txt'; 58 59$files = [$unicodeDataFile, $caseFoldingFile, $specialCasingFile, $derivedCorePropertiesFile, $eastAsianWidthFile]; 60foreach ($files as $file) { 61 if (!file_exists($file)) { 62 echo "File $file does not exist.\n"; 63 return; 64 } 65} 66 67$outputFile = __DIR__ . "/../unicode_data.h"; 68 69$data = new UnicodeData; 70parseUnicodeData($data, file_get_contents($unicodeDataFile)); 71parseCaseFolding($data, file_get_contents($caseFoldingFile)); 72parseSpecialCasing($data, file_get_contents($specialCasingFile)); 73parseDerivedCoreProperties($data, file_get_contents($derivedCorePropertiesFile)); 74file_put_contents($outputFile, generateData($data)); 75 76$eawFile = __DIR__ . "/../libmbfl/mbfl/eaw_table.h"; 77 78$eawData = parseEastAsianWidth(file_get_contents($eastAsianWidthFile)); 79file_put_contents($eawFile, generateEastAsianWidthData($eawData)); 80 81class Range { 82 public $start; 83 public $end; 84 85 public function __construct(int $start, int $end) { 86 $this->start = $start; 87 $this->end = $end; 88 } 89} 90 91class UnicodeData { 92 public $propIndexes; 93 public $numProps; 94 public $propRanges; 95 public $caseMaps; 96 public $extraCaseData; 97 98 public function __construct() { 99 /* 100 * List of properties expected to be found in the Unicode Character Database. 101 */ 102 $this->propIndexes = array_flip([ 103 "Mn", "Mc", "Me", "Nd", "Nl", "No", 104 "Zs", "Zl", "Zp", "Cs", "Co", "Cn", 105 "Lu", "Ll", "Lt", "Lm", "Lo", "Sm", 106 "Sc", "Sk", "So", "L", "R", "EN", 107 "ES", "ET", "AN", "CS", "B", "S", 108 "WS", "ON", "AL", 109 "C", "P", "Cased", "Case_Ignorable" 110 ]); 111 $this->numProps = count($this->propIndexes); 112 113 $this->propRanges = array_fill(0, $this->numProps, []); 114 $this->caseMaps = [ 115 'upper' => [], 116 'lower' => [], 117 'title' => [], 118 'fold' => [], 119 ]; 120 $this->extraCaseData = []; 121 } 122 123 function propToIndex(string $prop) : int { 124 /* Deal with directionality codes introduced in Unicode 3.0. */ 125 if (in_array($prop, ["BN", "NSM", "PDF", "LRE", "LRO", "RLE", "RLO", "LRI", "RLI", "FSI", "PDI"])) { 126 /* 127 * Mark all of these as Other Neutral to preserve compatibility with 128 * older versions. 129 */ 130 $prop = "ON"; 131 } 132 133 /* Merge all punctuation into a single category for efficiency of access. 134 * We're currently not interested in distinguishing different kinds of punctuation. */ 135 if (in_array($prop, ["Pc", "Pd", "Ps", "Pe", "Po", "Pi", "Pf"])) { 136 $prop = "P"; 137 } 138 /* Same for control. */ 139 if (in_array($prop, ["Cc", "Cf"])) { 140 $prop = "C"; 141 } 142 143 if (!isset($this->propIndexes[$prop])) { 144 throw new Exception("Unknown property $prop"); 145 } 146 147 return $this->propIndexes[$prop]; 148 } 149 150 public function addProp(int $code, string $prop) { 151 $propIdx = self::propToIndex($prop); 152 153 // Check if this extends the last range 154 $ranges = $this->propRanges[$propIdx]; 155 if (!empty($ranges)) { 156 $lastRange = $ranges[count($ranges) - 1]; 157 if ($code === $lastRange->end + 1) { 158 $lastRange->end++; 159 return; 160 } 161 } 162 163 $this->propRanges[$propIdx][] = new Range($code, $code); 164 } 165 166 public function addPropRange(int $startCode, int $endCode, string $prop) { 167 $propIdx = self::propToIndex($prop); 168 $this->propRanges[$propIdx][] = new Range($startCode, $endCode); 169 } 170 171 public function addCaseMapping(string $case, int $origCode, int $mappedCode) { 172 $this->caseMaps[$case][$origCode] = $mappedCode; 173 } 174 175 public function compactRangeArray(array $ranges) : array { 176 // Sort by start codepoint 177 usort($ranges, function (Range $r1, Range $r2) { 178 return $r1->start <=> $r2->start; 179 }); 180 181 $lastRange = new Range(-1, -1); 182 $newRanges = []; 183 foreach ($ranges as $range) { 184 if ($lastRange->end == -1) { 185 $lastRange = $range; 186 } else if ($range->start == $lastRange->end + 1) { 187 $lastRange->end = $range->end; 188 } else if ($range->start > $lastRange->end + 1) { 189 $newRanges[] = $lastRange; 190 $lastRange = $range; 191 } else { 192 throw new Exception(sprintf( 193 "Overlapping ranges [%x, %x] and [%x, %x]", 194 $lastRange->start, $lastRange->end, 195 $range->start, $range->end 196 )); 197 } 198 } 199 if ($lastRange->end != -1) { 200 $newRanges[] = $lastRange; 201 } 202 return $newRanges; 203 } 204 205 public function compactPropRanges() { 206 foreach ($this->propRanges as &$ranges) { 207 $ranges = $this->compactRangeArray($ranges); 208 } 209 } 210} 211 212function parseDataFile(string $input) { 213 $lines = explode("\n", $input); 214 foreach ($lines as $line) { 215 // Strip comments 216 if (false !== $hashPos = strpos($line, '#')) { 217 $line = substr($line, 0, $hashPos); 218 } 219 220 // Skip empty lines 221 $line = trim($line); 222 if ($line === '') { 223 continue; 224 } 225 226 $fields = array_map('trim', explode(';', $line)); 227 yield $fields; 228 } 229} 230 231function parseUnicodeData(UnicodeData $data, string $input) : void { 232 $lines = parseDataFile($input); 233 foreach ($lines as $fields) { 234 if (count($fields) != 15) { 235 throw new Exception("Line does not contain 15 fields"); 236 } 237 238 $code = intval($fields[0], 16); 239 240 $name = $fields[1]; 241 if ($name === '') { 242 throw new Exception("Empty name"); 243 } 244 245 if ($name[0] === '<' && $name !== '<control>') { 246 // This is a character range 247 $lines->next(); 248 $nextFields = $lines->current(); 249 $nextCode = intval($nextFields[0], 16); 250 251 $generalCategory = $fields[2]; 252 $data->addPropRange($code, $nextCode, $generalCategory); 253 254 $bidiClass = $fields[4]; 255 $data->addPropRange($code, $nextCode, $bidiClass); 256 continue; 257 } 258 259 $generalCategory = $fields[2]; 260 $data->addProp($code, $generalCategory); 261 262 $bidiClass = $fields[4]; 263 $data->addProp($code, $bidiClass); 264 265 $upperCase = intval($fields[12], 16); 266 $lowerCase = intval($fields[13], 16); 267 $titleCase = intval($fields[14], 16) ?: $upperCase; 268 if ($upperCase) { 269 $data->addCaseMapping('upper', $code, $upperCase); 270 } 271 if ($lowerCase) { 272 $data->addCaseMapping('lower', $code, $lowerCase); 273 } 274 if ($titleCase) { 275 $data->addCaseMapping('title', $code, $titleCase); 276 } 277 } 278} 279 280function parseCodes(string $strCodes) : array { 281 $codes = []; 282 foreach (explode(' ', $strCodes) as $strCode) { 283 $codes[] = intval($strCode, 16); 284 } 285 return $codes; 286} 287 288function parseCaseFolding(UnicodeData $data, string $input) : void { 289 foreach (parseDataFile($input) as $fields) { 290 if (count($fields) != 4) { 291 throw new Exception("Line does not contain 4 fields"); 292 } 293 294 $code = intval($fields[0], 16); 295 $status = $fields[1]; 296 if ($status == 'T') { 297 // Use language-agnostic case folding 298 continue; 299 } 300 301 if ($status == 'C' || $status == 'S') { 302 $foldCode = intval($fields[2], 16); 303 if (!isset($data->caseMaps['fold'][$code])) { 304 $data->addCaseMapping('fold', $code, $foldCode); 305 } else { 306 // Add simple mapping to full mapping data 307 assert(is_array($data->caseMaps['fold'][$code])); 308 $data->caseMaps['fold'][$code][0] = $foldCode; 309 } 310 } else if ($status == 'F') { 311 $foldCodes = parseCodes($fields[2]); 312 $existingFoldCode = $data->caseMaps['fold'][$code] ?? $code; 313 $data->caseMaps['fold'][$code] = array_merge([$code], $foldCodes); 314 } else { 315 assert(0); 316 } 317 } 318} 319 320function addSpecialCasing(UnicodeData $data, string $type, int $code, array $caseCodes) : void { 321 $simpleCaseCode = $data->caseMaps[$type][$code] ?? $code; 322 if (count($caseCodes) == 1) { 323 if ($caseCodes[0] != $simpleCaseCode) { 324 throw new Exception("Simple case code in special casing does not match"); 325 } 326 327 // Special case: If a title-case character maps to itself, we may still have to store it, 328 // if there is a non-trivial upper-case mapping for it 329 if ($type == 'title' && $code == $caseCodes[0] 330 && ($data->caseMaps['upper'][$code] ?? $code) != $code) { 331 $data->caseMaps['title'][$code] = $code; 332 } 333 return; 334 } 335 336 if (count($caseCodes) > 3) { 337 throw new Exception("Special case mapping with more than 3 code points"); 338 } 339 340 $data->caseMaps[$type][$code] = array_merge([$simpleCaseCode], $caseCodes); 341} 342 343function parseSpecialCasing(UnicodeData $data, string $input) : void { 344 foreach (parseDataFile($input) as $fields) { 345 if (count($fields) != 5 && count($fields) != 6) { 346 throw new Exception("Line does not contain 5 or 6 fields"); 347 } 348 349 $code = intval($fields[0], 16); 350 $lower = parseCodes($fields[1]); 351 $title = parseCodes($fields[2]); 352 $upper = parseCodes($fields[3]); 353 354 $cond = $fields[4]; 355 if ($cond) { 356 // Only use unconditional mappings 357 continue; 358 } 359 360 addSpecialCasing($data, 'lower', $code, $lower); 361 addSpecialCasing($data, 'upper', $code, $upper); 362 363 // Should happen last 364 addSpecialCasing($data, 'title', $code, $title); 365 } 366} 367 368function parseDerivedCoreProperties(UnicodeData $data, string $input) : void { 369 foreach (parseDataFile($input) as $fields) { 370 $fieldCount = count($fields); 371 if ($fieldCount != 2 && $fieldCount !== 3) { 372 throw new Exception("Line does not contain 2 or 3 fields"); 373 } 374 375 $usedProperties = ['Cased', 'Case_Ignorable']; 376 if (isset($fields[2]) && in_array($fields[2], $usedProperties, true)) { 377 $property = $fields[2]; 378 } 379 elseif (!in_array($fields[1], $usedProperties, true)) { 380 continue; 381 } 382 else{ 383 $property = $fields[1]; 384 } 385 386 387 $range = explode('..', $fields[0]); 388 if (count($range) == 2) { 389 $data->addPropRange(intval($range[0], 16), intval($range[1], 16), $property); 390 } else if (count($range) == 1) { 391 $data->addProp(intval($range[0], 16), $property); 392 } else { 393 throw new Exception("Invalid range"); 394 } 395 } 396} 397 398function parseEastAsianWidth(string $input) : array { 399 $wideRanges = []; 400 401 foreach (parseDataFile($input) as $fields) { 402 if ($fields[1] == 'W' || $fields[1] == 'F') { 403 if ($dotsPos = strpos($fields[0], '..')) { 404 $startCode = intval(substr($fields[0], 0, $dotsPos), 16); 405 $endCode = intval(substr($fields[0], $dotsPos + 2), 16); 406 407 if (!empty($wideRanges)) { 408 $lastRange = $wideRanges[count($wideRanges) - 1]; 409 if ($startCode == $lastRange->end + 1) { 410 $lastRange->end = $endCode; 411 continue; 412 } 413 } 414 415 $wideRanges[] = new Range($startCode, $endCode); 416 } else { 417 $code = intval($fields[0], 16); 418 419 if (!empty($wideRanges)) { 420 $lastRange = $wideRanges[count($wideRanges) - 1]; 421 if ($code == $lastRange->end + 1) { 422 $lastRange->end++; 423 continue; 424 } 425 } 426 427 $wideRanges[] = new Range($code, $code); 428 } 429 } 430 } 431 432 return $wideRanges; 433} 434 435function formatArray(array $values, int $width, string $format) : string { 436 $result = ''; 437 $i = 0; 438 $c = count($values); 439 for ($i = 0; $i < $c; $i++) { 440 if ($i != 0) { 441 $result .= ','; 442 } 443 444 $result .= $i % $width == 0 ? "\n\t" : " "; 445 $result .= sprintf($format, $values[$i]); 446 } 447 return $result; 448} 449 450function formatShortHexArray(array $values, int $width) : string { 451 return formatArray($values, $width, "0x%04x"); 452} 453function formatShortDecArray(array $values, int $width) : string { 454 return formatArray($values, $width, "% 5d"); 455} 456function formatIntArray(array $values, int $width) : string { 457 return formatArray($values, $width, "0x%08x"); 458} 459 460function generatePropData(UnicodeData $data) { 461 $data->compactPropRanges(); 462 463 $propOffsets = []; 464 $idx = 0; 465 foreach ($data->propRanges as $ranges) { 466 $num = count($ranges); 467 $propOffsets[] = $idx; 468 $idx += 2*$num; 469 } 470 471 // Add sentinel for binary search 472 $propOffsets[] = $idx; 473 474 // TODO ucgendat.c pads the prop offsets to the next multiple of 4 475 // for rather dubious reasons of alignment. This should probably be 476 // dropped 477 while (count($propOffsets) % 4 != 0) { 478 $propOffsets[] = 0; 479 } 480 481 $totalRanges = $idx; 482 483 $result = ""; 484 $result .= "static const unsigned short _ucprop_size = $data->numProps;\n\n"; 485 $result .= "static const unsigned short _ucprop_offsets[] = {"; 486 $result .= formatShortHexArray($propOffsets, 8); 487 $result .= "\n};\n\n"; 488 489 $values = []; 490 foreach ($data->propRanges as $ranges) { 491 foreach ($ranges as $range) { 492 $values[] = $range->start; 493 $values[] = $range->end; 494 } 495 } 496 497 $result .= "static const unsigned int _ucprop_ranges[] = {"; 498 $result .= formatIntArray($values, 4); 499 $result .= "\n};\n\n"; 500 return $result; 501} 502 503function flatten(array $array) { 504 $result = []; 505 foreach ($array as $arr) { 506 foreach ($arr as $v) { 507 $result[] = $v; 508 } 509 } 510 return $result; 511} 512 513function prepareCaseData(UnicodeData $data) { 514 // Don't store titlecase if it's the same as uppercase 515 foreach ($data->caseMaps['title'] as $code => $titleCode) { 516 if ($titleCode == ($data->caseMaps['upper'][$code] ?? $code)) { 517 unset($data->caseMaps['title'][$code]); 518 } 519 } 520 521 // Store full (multi-char) case mappings in a separate table and only 522 // store an index into it 523 foreach ($data->caseMaps as $type => $caseMap) { 524 foreach ($caseMap as $code => $caseCode) { 525 if (is_array($caseCode)) { 526 // -1 because the first entry is the simple case mapping 527 $len = count($caseCode) - 1; 528 $idx = count($data->extraCaseData); 529 $data->caseMaps[$type][$code] = ($len << 24) | $idx; 530 531 foreach ($caseCode as $c) { 532 $data->extraCaseData[] = $c; 533 } 534 } 535 } 536 } 537} 538 539function generateCaseMPH(string $name, array $map) { 540 $prefix = "_uccase_" . $name; 541 list($gTable, $table) = generateMPH($map, $fast = false); 542 echo "$name: n=", count($table), ", g=", count($gTable), "\n"; 543 544 $result = ""; 545 $result .= "static const unsigned {$prefix}_g_size = " . count($gTable) . ";\n"; 546 $result .= "static const short {$prefix}_g[] = {"; 547 $result .= formatShortDecArray($gTable, 8); 548 $result .= "\n};\n\n"; 549 $result .= "static const unsigned {$prefix}_table_size = " . count($table) . ";\n"; 550 $result .= "static const unsigned {$prefix}_table[] = {"; 551 $result .= formatIntArray(flatten($table), 4); 552 $result .= "\n};\n\n"; 553 return $result; 554} 555 556function generateCaseData(UnicodeData $data) { 557 prepareCaseData($data); 558 559 $result = ""; 560 $result .= generateCaseMPH('upper', $data->caseMaps['upper']); 561 $result .= generateCaseMPH('lower', $data->caseMaps['lower']); 562 $result .= generateCaseMPH('title', $data->caseMaps['title']); 563 $result .= generateCaseMPH('fold', $data->caseMaps['fold']); 564 $result .= "static const unsigned _uccase_extra_table[] = {"; 565 $result .= formatIntArray($data->extraCaseData, 4); 566 $result .= "\n};\n\n"; 567 return $result; 568} 569 570function generateData(UnicodeData $data) { 571 $result = <<<'HEADER' 572/* This file was generated from a modified version of UCData's ucgendat. 573 * 574 * DO NOT EDIT THIS FILE! 575 * 576 * Instead, download the appropriate UnicodeData-x.x.x.txt and 577 * CompositionExclusions-x.x.x.txt files from http://www.unicode.org/Public/ 578 * and run ext/mbstring/ucgendat/ucgendat.php. 579 * 580 * More information can be found in the UCData package. Unfortunately, 581 * the project's page doesn't seem to be live anymore, so you can use 582 * OpenLDAP's modified copy (look in libraries/liblunicode/ucdata) */ 583HEADER; 584 $result .= "\n\n" . generatePropData($data); 585 $result .= generateCaseData($data); 586 587 return $result; 588} 589 590/* 591 * Minimal Perfect Hash Generation 592 * 593 * Based on "Hash, displace, and compress" algorithm due to 594 * Belazzougui, Botelho and Dietzfelbinger. 595 * 596 * Hash function based on https://stackoverflow.com/a/12996028/385378. 597 * MPH implementation based on http://stevehanov.ca/blog/index.php?id=119. 598 */ 599 600function hashInt(int $d, int $x) { 601 $x ^= $d; 602 $x = (($x >> 16) ^ $x) * 0x45d9f3b; 603 return $x & 0xffffffff; 604} 605 606function tryGenerateMPH(array $map, int $gSize) { 607 $tableSize = count($map); 608 $table = []; 609 $gTable = array_fill(0, $gSize, 0x7fff); 610 $buckets = []; 611 612 foreach ($map as $k => $v) { 613 $h = hashInt(0, $k) % $gSize; 614 $buckets[$h][] = [$k, $v]; 615 } 616 617 // Sort by descending number of collisions 618 usort($buckets, function ($b1, $b2) { 619 return -(count($b1) <=> count($b2)); 620 }); 621 622 foreach ($buckets as $bucket) { 623 $collisions = count($bucket); 624 if ($collisions <= 1) { 625 continue; 626 } 627 628 // Try values of $d until all elements placed in different slots 629 $d = 1; 630 $i = 0; 631 $used = []; 632 while ($i < $collisions) { 633 if ($d > 0x7fff) { 634 return []; 635 } 636 637 list($k) = $bucket[$i]; 638 $slot = hashInt($d, $k) % $tableSize; 639 if (isset($table[$slot]) || isset($used[$slot])) { 640 $d++; 641 $i = 0; 642 $used = []; 643 } else { 644 $i++; 645 $used[$slot] = true; 646 } 647 } 648 649 $g = hashInt(0, $bucket[0][0]) % $gSize; 650 $gTable[$g] = $d; 651 foreach ($bucket as $elem) { 652 $table[hashInt($d, $elem[0]) % $tableSize] = $elem; 653 } 654 } 655 656 $freeSlots = []; 657 for ($i = 0; $i < $tableSize; $i++) { 658 if (!isset($table[$i])) { 659 $freeSlots[] = $i; 660 } 661 } 662 663 // For buckets with only one element, we directly store the index 664 $freeIdx = 0; 665 foreach ($buckets as $bucket) { 666 if (count($bucket) != 1) { 667 continue; 668 } 669 670 $elem = $bucket[0]; 671 $slot = $freeSlots[$freeIdx++]; 672 $table[$slot] = $elem; 673 674 $g = hashInt(0, $elem[0]) % $gSize; 675 $gTable[$g] = -$slot; 676 } 677 678 ksort($gTable); 679 ksort($table); 680 681 return [$gTable, $table]; 682} 683 684function generateMPH(array $map, bool $fast) { 685 if ($fast) { 686 // Check size starting lambda=5.0 in 0.5 increments 687 for ($lambda = 5.0;; $lambda -= 0.5) { 688 $m = (int) (count($map) / $lambda); 689 $tmpMph = tryGenerateMPH($map, $m); 690 if (!empty($tmpMph)) { 691 $mph = $tmpMph; 692 break; 693 } 694 } 695 } else { 696 // Check all sizes starting lambda=7.0 697 $m = (int) (count($map) / 7.0); 698 for (;; $m++) { 699 $tmpMph = tryGenerateMPH($map, $m); 700 if (!empty($tmpMph)) { 701 $mph = $tmpMph; 702 break; 703 } 704 } 705 } 706 707 return $mph; 708} 709 710function generateEastAsianWidthData(array $wideRanges) { 711 $result = <<<'HEADER' 712/* This file was generated by ext/mbstring/ucgendat/ucgendat.php. 713 * 714 * DO NOT EDIT THIS FILE! 715 * 716 * East Asian Width table 717 * 718 * Some characters in East Asian languages are intended to be displayed in a space 719 * which is roughly square. (This contrasts with others such as the Latin alphabet, 720 * which are taller than they are wide.) To display these East Asian characters 721 * properly, twice the horizontal space is used. This must be taken into account 722 * when doing things like wrapping text to a specific width. 723 * 724 * Each pair of numbers in the below table is a range of Unicode codepoints 725 * which should be displayed as double-width. 726 */ 727 728HEADER; 729 730 $result .= "\n#define FIRST_DOUBLEWIDTH_CODEPOINT 0x" . dechex($wideRanges[0]->start) . "\n\n"; 731 732 $result .= <<<'TABLESTART' 733static const struct { 734 int begin; 735 int end; 736} mbfl_eaw_table[] = { 737 738TABLESTART; 739 740 foreach ($wideRanges as $range) { 741 $startCode = dechex($range->start); 742 $endCode = dechex($range->end); 743 $result .= "\t{ 0x{$startCode}, 0x{$endCode} },\n"; 744 } 745 746 $result .= "};\n"; 747 return $result; 748} 749