xref: /php-src/ext/mbstring/ucgendat/ucgendat.php (revision 23f99f08)
1#!/usr/bin/env php
2<?php error_reporting(E_ALL);
3
4/**
5 * This is based on the ucgendat.c file from the OpenLDAP project, licensed as
6 * follows. This file is not necessary to build PHP. It's only necessary to
7 * rebuild unicode_data.h and eaw_width.h from Unicode ucd files.
8 *
9 * Example usage:
10 * php ucgendat.php path/to/Unicode/data/files
11 */
12
13/* Copyright 1998-2007 The OpenLDAP Foundation.
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted only as authorized by the OpenLDAP
18 * Public License.
19 *
20 * A copy of this license is available at
21 * <http://www.OpenLDAP.org/license.html>.
22 */
23
24/* Copyright 2001 Computing Research Labs, New Mexico State University
25 *
26 * Permission is hereby granted, free of charge, to any person obtaining a
27 * copy of this software and associated documentation files (the "Software"),
28 * to deal in the Software without restriction, including without limitation
29 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
30 * and/or sell copies of the Software, and to permit persons to whom the
31 * Software is furnished to do so, subject to the following conditions:
32 *
33 * The above copyright notice and this permission notice shall be included in
34 * all copies or substantial portions of the Software.
35 *
36 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
39 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
40 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
41 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
42 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44
45if ($argc < 2) {
46    echo "Usage: php ucgendata.php ./datadir\n";
47    echo "./datadir must contain:\n";
48    echo "UnicodeData.txt, CaseFolding.txt, SpecialCasing.txt, DerivedCoreProperties.txt, and EastAsianWidth.txt\n";
49    return;
50}
51
52$dir = $argv[1];
53$unicodeDataFile = $dir . '/UnicodeData.txt';
54$caseFoldingFile = $dir . '/CaseFolding.txt';
55$specialCasingFile = $dir . '/SpecialCasing.txt';
56$derivedCorePropertiesFile = $dir . '/DerivedCoreProperties.txt';
57$eastAsianWidthFile = $dir . '/EastAsianWidth.txt';
58
59$files = [$unicodeDataFile, $caseFoldingFile, $specialCasingFile, $derivedCorePropertiesFile, $eastAsianWidthFile];
60foreach ($files as $file) {
61    if (!file_exists($file)) {
62        echo "File $file does not exist.\n";
63        return;
64    }
65}
66
67$outputFile = __DIR__ . "/../unicode_data.h";
68
69$data = new UnicodeData;
70parseUnicodeData($data, file_get_contents($unicodeDataFile));
71parseCaseFolding($data, file_get_contents($caseFoldingFile));
72parseSpecialCasing($data, file_get_contents($specialCasingFile));
73parseDerivedCoreProperties($data, file_get_contents($derivedCorePropertiesFile));
74file_put_contents($outputFile, generateData($data));
75
76$eawFile = __DIR__ . "/../libmbfl/mbfl/eaw_table.h";
77
78$eawData = parseEastAsianWidth(file_get_contents($eastAsianWidthFile));
79file_put_contents($eawFile, generateEastAsianWidthData($eawData));
80
81class Range {
82    public $start;
83    public $end;
84
85    public function __construct(int $start, int $end) {
86        $this->start = $start;
87        $this->end = $end;
88    }
89}
90
91class UnicodeData {
92    public $propIndexes;
93    public $numProps;
94    public $propRanges;
95    public $caseMaps;
96    public $extraCaseData;
97
98    public function __construct() {
99        /*
100         * List of properties expected to be found in the Unicode Character Database.
101         */
102        $this->propIndexes = array_flip([
103            "Mn", "Mc", "Me", "Nd", "Nl", "No",
104            "Zs", "Zl", "Zp", "Cs", "Co", "Cn",
105            "Lu", "Ll", "Lt", "Lm", "Lo", "Sm",
106            "Sc", "Sk", "So", "L", "R", "EN",
107            "ES", "ET", "AN", "CS", "B", "S",
108            "WS", "ON", "AL",
109            "C", "P", "Cased", "Case_Ignorable"
110        ]);
111        $this->numProps = count($this->propIndexes);
112
113        $this->propRanges = array_fill(0, $this->numProps, []);
114        $this->caseMaps = [
115            'upper' => [],
116            'lower' => [],
117            'title' => [],
118            'fold' => [],
119        ];
120        $this->extraCaseData = [];
121    }
122
123    function propToIndex(string $prop) : int {
124        /* Deal with directionality codes introduced in Unicode 3.0. */
125        if (in_array($prop, ["BN", "NSM", "PDF", "LRE", "LRO", "RLE", "RLO", "LRI", "RLI", "FSI", "PDI"])) {
126            /*
127             * Mark all of these as Other Neutral to preserve compatibility with
128             * older versions.
129             */
130            $prop = "ON";
131        }
132
133        /* Merge all punctuation into a single category for efficiency of access.
134         * We're currently not interested in distinguishing different kinds of punctuation. */
135        if (in_array($prop, ["Pc", "Pd", "Ps", "Pe", "Po", "Pi", "Pf"])) {
136            $prop = "P";
137        }
138        /* Same for control. */
139        if (in_array($prop, ["Cc", "Cf"])) {
140            $prop = "C";
141        }
142
143        if (!isset($this->propIndexes[$prop])) {
144            throw new Exception("Unknown property $prop");
145        }
146
147        return $this->propIndexes[$prop];
148    }
149
150    public function addProp(int $code, string $prop) {
151        $propIdx = self::propToIndex($prop);
152
153        // Check if this extends the last range
154        $ranges = $this->propRanges[$propIdx];
155        if (!empty($ranges)) {
156            $lastRange = $ranges[count($ranges) - 1];
157            if ($code === $lastRange->end + 1) {
158                $lastRange->end++;
159                return;
160            }
161        }
162
163        $this->propRanges[$propIdx][] = new Range($code, $code);
164    }
165
166    public function addPropRange(int $startCode, int $endCode, string $prop) {
167        $propIdx = self::propToIndex($prop);
168        $this->propRanges[$propIdx][] = new Range($startCode, $endCode);
169    }
170
171    public function addCaseMapping(string $case, int $origCode, int $mappedCode) {
172        $this->caseMaps[$case][$origCode] = $mappedCode;
173    }
174
175    public function compactRangeArray(array $ranges) : array {
176        // Sort by start codepoint
177        usort($ranges, function (Range $r1, Range $r2) {
178            return $r1->start <=> $r2->start;
179        });
180
181        $lastRange = new Range(-1, -1);
182        $newRanges = [];
183        foreach ($ranges as $range) {
184            if ($lastRange->end == -1) {
185                $lastRange = $range;
186            } else if ($range->start == $lastRange->end + 1) {
187                $lastRange->end = $range->end;
188            } else if ($range->start > $lastRange->end + 1) {
189                $newRanges[] = $lastRange;
190                $lastRange = $range;
191            } else {
192                throw new Exception(sprintf(
193                    "Overlapping ranges [%x, %x] and [%x, %x]",
194                    $lastRange->start, $lastRange->end,
195                    $range->start, $range->end
196                ));
197            }
198        }
199        if ($lastRange->end != -1) {
200            $newRanges[] = $lastRange;
201        }
202        return $newRanges;
203    }
204
205    public function compactPropRanges() {
206        foreach ($this->propRanges as &$ranges) {
207            $ranges = $this->compactRangeArray($ranges);
208        }
209    }
210}
211
212function parseDataFile(string $input) {
213    $lines = explode("\n", $input);
214    foreach ($lines as $line) {
215        // Strip comments
216        if (false !== $hashPos = strpos($line, '#')) {
217            $line = substr($line, 0, $hashPos);
218        }
219
220        // Skip empty lines
221        $line = trim($line);
222        if ($line === '') {
223            continue;
224        }
225
226        $fields = array_map('trim', explode(';', $line));
227        yield $fields;
228    }
229}
230
231function parseUnicodeData(UnicodeData $data, string $input) : void {
232    $lines = parseDataFile($input);
233    foreach ($lines as $fields) {
234        if (count($fields) != 15) {
235            throw new Exception("Line does not contain 15 fields");
236        }
237
238        $code = intval($fields[0], 16);
239
240        $name = $fields[1];
241        if ($name === '') {
242            throw new Exception("Empty name");
243        }
244
245        if ($name[0] === '<' && $name !== '<control>') {
246            // This is a character range
247            $lines->next();
248            $nextFields = $lines->current();
249            $nextCode = intval($nextFields[0], 16);
250
251            $generalCategory = $fields[2];
252            $data->addPropRange($code, $nextCode, $generalCategory);
253
254            $bidiClass = $fields[4];
255            $data->addPropRange($code, $nextCode, $bidiClass);
256            continue;
257        }
258
259        $generalCategory = $fields[2];
260        $data->addProp($code, $generalCategory);
261
262        $bidiClass = $fields[4];
263        $data->addProp($code, $bidiClass);
264
265        $upperCase = intval($fields[12], 16);
266        $lowerCase = intval($fields[13], 16);
267        $titleCase = intval($fields[14], 16) ?: $upperCase;
268        if ($upperCase) {
269            $data->addCaseMapping('upper', $code, $upperCase);
270        }
271        if ($lowerCase) {
272            $data->addCaseMapping('lower', $code, $lowerCase);
273        }
274        if ($titleCase) {
275            $data->addCaseMapping('title', $code, $titleCase);
276        }
277    }
278}
279
280function parseCodes(string $strCodes) : array {
281    $codes = [];
282    foreach (explode(' ', $strCodes) as $strCode) {
283        $codes[] = intval($strCode, 16);
284    }
285    return $codes;
286}
287
288function parseCaseFolding(UnicodeData $data, string $input) : void {
289    foreach (parseDataFile($input) as $fields) {
290        if (count($fields) != 4) {
291            throw new Exception("Line does not contain 4 fields");
292        }
293
294        $code = intval($fields[0], 16);
295        $status = $fields[1];
296        if ($status == 'T') {
297            // Use language-agnostic case folding
298            continue;
299        }
300
301        if ($status == 'C' || $status == 'S') {
302            $foldCode = intval($fields[2], 16);
303            if (!isset($data->caseMaps['fold'][$code])) {
304                $data->addCaseMapping('fold', $code, $foldCode);
305            } else {
306                // Add simple mapping to full mapping data
307                assert(is_array($data->caseMaps['fold'][$code]));
308                $data->caseMaps['fold'][$code][0] = $foldCode;
309            }
310        } else if ($status == 'F') {
311            $foldCodes = parseCodes($fields[2]);
312            $existingFoldCode = $data->caseMaps['fold'][$code] ?? $code;
313            $data->caseMaps['fold'][$code] = array_merge([$code], $foldCodes);
314        } else {
315            assert(0);
316        }
317    }
318}
319
320function addSpecialCasing(UnicodeData $data, string $type, int $code, array $caseCodes) : void {
321    $simpleCaseCode = $data->caseMaps[$type][$code] ?? $code;
322    if (count($caseCodes) == 1) {
323        if ($caseCodes[0] != $simpleCaseCode) {
324            throw new Exception("Simple case code in special casing does not match");
325        }
326
327        // Special case: If a title-case character maps to itself, we may still have to store it,
328        // if there is a non-trivial upper-case mapping for it
329        if ($type == 'title' && $code == $caseCodes[0]
330                && ($data->caseMaps['upper'][$code] ?? $code) != $code) {
331            $data->caseMaps['title'][$code] = $code;
332        }
333        return;
334    }
335
336    if (count($caseCodes) > 3) {
337        throw new Exception("Special case mapping with more than 3 code points");
338    }
339
340    $data->caseMaps[$type][$code] = array_merge([$simpleCaseCode], $caseCodes);
341}
342
343function parseSpecialCasing(UnicodeData $data, string $input) : void {
344    foreach (parseDataFile($input) as $fields) {
345        if (count($fields) != 5 && count($fields) != 6) {
346            throw new Exception("Line does not contain 5 or 6 fields");
347        }
348
349        $code = intval($fields[0], 16);
350        $lower = parseCodes($fields[1]);
351        $title = parseCodes($fields[2]);
352        $upper = parseCodes($fields[3]);
353
354        $cond = $fields[4];
355        if ($cond) {
356            // Only use unconditional mappings
357            continue;
358        }
359
360        addSpecialCasing($data, 'lower', $code, $lower);
361        addSpecialCasing($data, 'upper', $code, $upper);
362
363        // Should happen last
364        addSpecialCasing($data, 'title', $code, $title);
365    }
366}
367
368function parseDerivedCoreProperties(UnicodeData $data, string $input) : void {
369    foreach (parseDataFile($input) as $fields) {
370        $fieldCount = count($fields);
371        if ($fieldCount != 2 && $fieldCount !== 3) {
372            throw new Exception("Line does not contain 2 or 3 fields");
373        }
374
375        $usedProperties = ['Cased', 'Case_Ignorable'];
376        if (isset($fields[2]) && in_array($fields[2], $usedProperties, true)) {
377            $property = $fields[2];
378        }
379        elseif (!in_array($fields[1], $usedProperties, true)) {
380            continue;
381        }
382        else{
383            $property = $fields[1];
384        }
385
386
387        $range = explode('..', $fields[0]);
388        if (count($range) == 2) {
389            $data->addPropRange(intval($range[0], 16), intval($range[1], 16), $property);
390        } else if (count($range) == 1) {
391            $data->addProp(intval($range[0], 16), $property);
392        } else {
393            throw new Exception("Invalid range");
394        }
395    }
396}
397
398function parseEastAsianWidth(string $input) : array {
399    $wideRanges = [];
400
401    foreach (parseDataFile($input) as $fields) {
402        if ($fields[1] == 'W' || $fields[1] == 'F') {
403            if ($dotsPos = strpos($fields[0], '..')) {
404                $startCode = intval(substr($fields[0], 0, $dotsPos), 16);
405                $endCode = intval(substr($fields[0], $dotsPos + 2), 16);
406
407                if (!empty($wideRanges)) {
408                    $lastRange = $wideRanges[count($wideRanges) - 1];
409                    if ($startCode == $lastRange->end + 1) {
410                        $lastRange->end = $endCode;
411                        continue;
412                    }
413                }
414
415                $wideRanges[] = new Range($startCode, $endCode);
416            } else {
417                $code = intval($fields[0], 16);
418
419                if (!empty($wideRanges)) {
420                    $lastRange = $wideRanges[count($wideRanges) - 1];
421                    if ($code == $lastRange->end + 1) {
422                        $lastRange->end++;
423                        continue;
424                    }
425                }
426
427                $wideRanges[] = new Range($code, $code);
428            }
429        }
430    }
431
432    return $wideRanges;
433}
434
435function formatArray(array $values, int $width, string $format) : string {
436    $result = '';
437    $i = 0;
438    $c = count($values);
439    for ($i = 0; $i < $c; $i++) {
440        if ($i != 0) {
441            $result .= ',';
442        }
443
444        $result .= $i % $width == 0 ? "\n\t" : " ";
445        $result .= sprintf($format, $values[$i]);
446    }
447    return $result;
448}
449
450function formatShortHexArray(array $values, int $width) : string {
451    return formatArray($values, $width, "0x%04x");
452}
453function formatShortDecArray(array $values, int $width) : string {
454    return formatArray($values, $width, "% 5d");
455}
456function formatIntArray(array $values, int $width) : string {
457    return formatArray($values, $width, "0x%08x");
458}
459
460function generatePropData(UnicodeData $data) {
461    $data->compactPropRanges();
462
463    $propOffsets = [];
464    $idx = 0;
465    foreach ($data->propRanges as $ranges) {
466        $num = count($ranges);
467        $propOffsets[] = $idx;
468        $idx += 2*$num;
469    }
470
471    // Add sentinel for binary search
472    $propOffsets[] = $idx;
473
474    // TODO ucgendat.c pads the prop offsets to the next multiple of 4
475    // for rather dubious reasons of alignment. This should probably be
476    // dropped
477    while (count($propOffsets) % 4 != 0) {
478        $propOffsets[] = 0;
479    }
480
481    $totalRanges = $idx;
482
483    $result = "";
484    $result .= "static const unsigned short _ucprop_size = $data->numProps;\n\n";
485    $result .= "static const unsigned short  _ucprop_offsets[] = {";
486    $result .= formatShortHexArray($propOffsets, 8);
487    $result .= "\n};\n\n";
488
489    $values = [];
490    foreach ($data->propRanges as $ranges) {
491        foreach ($ranges as $range) {
492            $values[] = $range->start;
493            $values[] = $range->end;
494        }
495    }
496
497    $result .= "static const unsigned int _ucprop_ranges[] = {";
498    $result .= formatIntArray($values, 4);
499    $result .= "\n};\n\n";
500    return $result;
501}
502
503function flatten(array $array) {
504    $result = [];
505    foreach ($array as $arr) {
506        foreach ($arr as $v) {
507            $result[] = $v;
508        }
509    }
510    return $result;
511}
512
513function prepareCaseData(UnicodeData $data) {
514    // Don't store titlecase if it's the same as uppercase
515    foreach ($data->caseMaps['title'] as $code => $titleCode) {
516        if ($titleCode == ($data->caseMaps['upper'][$code] ?? $code)) {
517            unset($data->caseMaps['title'][$code]);
518        }
519    }
520
521    // Store full (multi-char) case mappings in a separate table and only
522    // store an index into it
523    foreach ($data->caseMaps as $type => $caseMap) {
524        foreach ($caseMap as $code => $caseCode) {
525            if (is_array($caseCode)) {
526                // -1 because the first entry is the simple case mapping
527                $len = count($caseCode) - 1;
528                $idx = count($data->extraCaseData);
529                $data->caseMaps[$type][$code] = ($len << 24) | $idx;
530
531                foreach ($caseCode as $c) {
532                    $data->extraCaseData[] = $c;
533                }
534            }
535        }
536    }
537}
538
539function generateCaseMPH(string $name, array $map) {
540    $prefix = "_uccase_" . $name;
541    list($gTable, $table) = generateMPH($map, $fast = false);
542    echo "$name: n=", count($table), ", g=", count($gTable), "\n";
543
544    $result = "";
545    $result .= "static const unsigned {$prefix}_g_size = " . count($gTable) . ";\n";
546    $result .= "static const short {$prefix}_g[] = {";
547    $result .= formatShortDecArray($gTable, 8);
548    $result .= "\n};\n\n";
549    $result .= "static const unsigned {$prefix}_table_size = " . count($table) . ";\n";
550    $result .= "static const unsigned {$prefix}_table[] = {";
551    $result .= formatIntArray(flatten($table), 4);
552    $result .= "\n};\n\n";
553    return $result;
554}
555
556function generateCaseData(UnicodeData $data) {
557    prepareCaseData($data);
558
559    $result = "";
560    $result .= generateCaseMPH('upper', $data->caseMaps['upper']);
561    $result .= generateCaseMPH('lower', $data->caseMaps['lower']);
562    $result .= generateCaseMPH('title', $data->caseMaps['title']);
563    $result .= generateCaseMPH('fold', $data->caseMaps['fold']);
564    $result .= "static const unsigned _uccase_extra_table[] = {";
565    $result .= formatIntArray($data->extraCaseData, 4);
566    $result .= "\n};\n\n";
567    return $result;
568}
569
570function generateData(UnicodeData $data) {
571    $result = <<<'HEADER'
572/* This file was generated from a modified version of UCData's ucgendat.
573 *
574 *                     DO NOT EDIT THIS FILE!
575 *
576 * Instead, download the appropriate UnicodeData-x.x.x.txt and
577 * CompositionExclusions-x.x.x.txt files from http://www.unicode.org/Public/
578 * and run ext/mbstring/ucgendat/ucgendat.php.
579 *
580 * More information can be found in the UCData package. Unfortunately,
581 * the project's page doesn't seem to be live anymore, so you can use
582 * OpenLDAP's modified copy (look in libraries/liblunicode/ucdata) */
583HEADER;
584    $result .= "\n\n" . generatePropData($data);
585    $result .= generateCaseData($data);
586
587    return $result;
588}
589
590/*
591 * Minimal Perfect Hash Generation
592 *
593 * Based on "Hash, displace, and compress" algorithm due to
594 * Belazzougui, Botelho and Dietzfelbinger.
595 *
596 * Hash function based on https://stackoverflow.com/a/12996028/385378.
597 * MPH implementation based on http://stevehanov.ca/blog/index.php?id=119.
598 */
599
600function hashInt(int $d, int $x) {
601    $x ^= $d;
602    $x = (($x >> 16) ^ $x) * 0x45d9f3b;
603    return $x & 0xffffffff;
604}
605
606function tryGenerateMPH(array $map, int $gSize) {
607    $tableSize = count($map);
608    $table = [];
609    $gTable = array_fill(0, $gSize, 0x7fff);
610    $buckets = [];
611
612    foreach ($map as $k => $v) {
613        $h = hashInt(0, $k) % $gSize;
614        $buckets[$h][] = [$k, $v];
615    }
616
617    // Sort by descending number of collisions
618    usort($buckets, function ($b1, $b2) {
619        return -(count($b1) <=> count($b2));
620    });
621
622    foreach ($buckets as $bucket) {
623        $collisions = count($bucket);
624        if ($collisions <= 1) {
625            continue;
626        }
627
628        // Try values of $d until all elements placed in different slots
629        $d = 1;
630        $i = 0;
631        $used = [];
632        while ($i < $collisions) {
633            if ($d > 0x7fff) {
634                return [];
635            }
636
637            list($k) = $bucket[$i];
638            $slot = hashInt($d, $k) % $tableSize;
639            if (isset($table[$slot]) || isset($used[$slot])) {
640                $d++;
641                $i = 0;
642                $used = [];
643            } else {
644                $i++;
645                $used[$slot] = true;
646            }
647        }
648
649        $g = hashInt(0, $bucket[0][0]) % $gSize;
650        $gTable[$g] = $d;
651        foreach ($bucket as $elem) {
652            $table[hashInt($d, $elem[0]) % $tableSize] = $elem;
653        }
654    }
655
656    $freeSlots = [];
657    for ($i = 0; $i < $tableSize; $i++) {
658        if (!isset($table[$i])) {
659            $freeSlots[] = $i;
660        }
661    }
662
663    // For buckets with only one element, we directly store the index
664    $freeIdx = 0;
665    foreach ($buckets as $bucket) {
666        if (count($bucket) != 1) {
667            continue;
668        }
669
670        $elem = $bucket[0];
671        $slot = $freeSlots[$freeIdx++];
672        $table[$slot] = $elem;
673
674        $g = hashInt(0, $elem[0]) % $gSize;
675        $gTable[$g] = -$slot;
676    }
677
678    ksort($gTable);
679    ksort($table);
680
681    return [$gTable, $table];
682}
683
684function generateMPH(array $map, bool $fast) {
685    if ($fast) {
686        // Check size starting lambda=5.0 in 0.5 increments
687        for ($lambda = 5.0;; $lambda -= 0.5) {
688            $m = (int) (count($map) / $lambda);
689            $tmpMph = tryGenerateMPH($map, $m);
690            if (!empty($tmpMph)) {
691                $mph = $tmpMph;
692                break;
693            }
694        }
695    } else {
696        // Check all sizes starting lambda=7.0
697        $m = (int) (count($map) / 7.0);
698        for (;; $m++) {
699            $tmpMph = tryGenerateMPH($map, $m);
700            if (!empty($tmpMph)) {
701                $mph = $tmpMph;
702                break;
703            }
704        }
705    }
706
707    return $mph;
708}
709
710function generateEastAsianWidthData(array $wideRanges) {
711    $result = <<<'HEADER'
712/* This file was generated by ext/mbstring/ucgendat/ucgendat.php.
713 *
714 *                     DO NOT EDIT THIS FILE!
715 *
716 * East Asian Width table
717 *
718 * Some characters in East Asian languages are intended to be displayed in a space
719 * which is roughly square. (This contrasts with others such as the Latin alphabet,
720 * which are taller than they are wide.) To display these East Asian characters
721 * properly, twice the horizontal space is used. This must be taken into account
722 * when doing things like wrapping text to a specific width.
723 *
724 * Each pair of numbers in the below table is a range of Unicode codepoints
725 * which should be displayed as double-width.
726 */
727
728HEADER;
729
730    $result .= "\n#define FIRST_DOUBLEWIDTH_CODEPOINT 0x" . dechex($wideRanges[0]->start) . "\n\n";
731
732    $result .= <<<'TABLESTART'
733static const struct {
734	int begin;
735	int end;
736} mbfl_eaw_table[] = {
737
738TABLESTART;
739
740    foreach ($wideRanges as $range) {
741        $startCode = dechex($range->start);
742        $endCode = dechex($range->end);
743        $result .= "\t{ 0x{$startCode}, 0x{$endCode} },\n";
744    }
745
746    $result .= "};\n";
747    return $result;
748}
749