xref: /PHP-8.2/ext/mbstring/ucgendat/ucgendat.php (revision 0b32a15e)
1#!/usr/bin/env php
2<?php error_reporting(E_ALL);
3
4/**
5 * This is based on the ucgendat.c file from the OpenLDAP project, licensed as
6 * follows. This file is not necessary to build PHP. It's only necessary to
7 * rebuild unicode_data.h and eaw_width.h from Unicode ucd files.
8 *
9 * Example usage:
10 * php ucgendat.php path/to/Unicode/data/files
11 */
12
13/* Copyright 1998-2007 The OpenLDAP Foundation.
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted only as authorized by the OpenLDAP
18 * Public License.
19 *
20 * A copy of this license is available at
21 * <http://www.OpenLDAP.org/license.html>.
22 */
23
24/* Copyright 2001 Computing Research Labs, New Mexico State University
25 *
26 * Permission is hereby granted, free of charge, to any person obtaining a
27 * copy of this software and associated documentation files (the "Software"),
28 * to deal in the Software without restriction, including without limitation
29 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
30 * and/or sell copies of the Software, and to permit persons to whom the
31 * Software is furnished to do so, subject to the following conditions:
32 *
33 * The above copyright notice and this permission notice shall be included in
34 * all copies or substantial portions of the Software.
35 *
36 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
39 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
40 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
41 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
42 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44
45if ($argc < 2) {
46    echo "Usage: php ucgendata.php ./datadir\n";
47    echo "./datadir must contain:\n";
48    echo "UnicodeData.txt, CaseFolding.txt, SpecialCasing.txt, DerivedCoreProperties.txt, and EastAsianWidth.txt\n";
49    return;
50}
51
52$dir = $argv[1];
53$unicodeDataFile = $dir . '/UnicodeData.txt';
54$caseFoldingFile = $dir . '/CaseFolding.txt';
55$specialCasingFile = $dir . '/SpecialCasing.txt';
56$derivedCorePropertiesFile = $dir . '/DerivedCoreProperties.txt';
57$eastAsianWidthFile = $dir . '/EastAsianWidth.txt';
58
59$files = [$unicodeDataFile, $caseFoldingFile, $specialCasingFile, $derivedCorePropertiesFile, $eastAsianWidthFile];
60foreach ($files as $file) {
61    if (!file_exists($file)) {
62        echo "File $file does not exist.\n";
63        return;
64    }
65}
66
67$outputFile = __DIR__ . "/../unicode_data.h";
68
69$data = new UnicodeData;
70parseUnicodeData($data, file_get_contents($unicodeDataFile));
71parseCaseFolding($data, file_get_contents($caseFoldingFile));
72parseSpecialCasing($data, file_get_contents($specialCasingFile));
73parseDerivedCoreProperties($data, file_get_contents($derivedCorePropertiesFile));
74file_put_contents($outputFile, generateData($data));
75
76$eawFile = __DIR__ . "/../libmbfl/mbfl/eaw_table.h";
77
78$eawData = parseEastAsianWidth(file_get_contents($eastAsianWidthFile));
79file_put_contents($eawFile, generateEastAsianWidthData($eawData));
80
81class Range {
82    public $start;
83    public $end;
84
85    public function __construct(int $start, int $end) {
86        $this->start = $start;
87        $this->end = $end;
88    }
89}
90
91class UnicodeData {
92    public $propIndexes;
93    public $numProps;
94    public $propRanges;
95    public $caseMaps;
96    public $extraCaseData;
97
98    public function __construct() {
99        /*
100         * List of properties expected to be found in the Unicode Character Database.
101         */
102        $this->propIndexes = array_flip([
103            "Mn", "Mc", "Me", "Nd", "Nl", "No",
104            "Zs", "Zl", "Zp", "Cs", "Co", "Cn",
105            "Lu", "Ll", "Lt", "Lm", "Lo", "Sm",
106            "Sc", "Sk", "So", "L", "R", "EN",
107            "ES", "ET", "AN", "CS", "B", "S",
108            "WS", "ON", "AL",
109            "C", "P", "Cased", "Case_Ignorable"
110        ]);
111        $this->numProps = count($this->propIndexes);
112
113        $this->propRanges = array_fill(0, $this->numProps, []);
114        $this->caseMaps = [
115            'upper' => [],
116            'lower' => [],
117            'title' => [],
118            'fold' => [],
119        ];
120        $this->extraCaseData = [];
121    }
122
123    function propToIndex(string $prop) : int {
124        /* Deal with directionality codes introduced in Unicode 3.0. */
125        if (in_array($prop, ["BN", "NSM", "PDF", "LRE", "LRO", "RLE", "RLO", "LRI", "RLI", "FSI", "PDI"])) {
126            /*
127             * Mark all of these as Other Neutral to preserve compatibility with
128             * older versions.
129             */
130            $prop = "ON";
131        }
132
133        /* Merge all punctuation into a single category for efficiency of access.
134         * We're currently not interested in distinguishing different kinds of punctuation. */
135        if (in_array($prop, ["Pc", "Pd", "Ps", "Pe", "Po", "Pi", "Pf"])) {
136            $prop = "P";
137        }
138        /* Same for control. */
139        if (in_array($prop, ["Cc", "Cf"])) {
140            $prop = "C";
141        }
142
143        if (!isset($this->propIndexes[$prop])) {
144            throw new Exception("Unknown property $prop");
145        }
146
147        return $this->propIndexes[$prop];
148    }
149
150    public function addProp(int $code, string $prop) {
151        $propIdx = self::propToIndex($prop);
152
153        // Check if this extends the last range
154        $ranges = $this->propRanges[$propIdx];
155        if (!empty($ranges)) {
156            $lastRange = $ranges[count($ranges) - 1];
157            if ($code === $lastRange->end + 1) {
158                $lastRange->end++;
159                return;
160            }
161        }
162
163        $this->propRanges[$propIdx][] = new Range($code, $code);
164    }
165
166    public function addPropRange(int $startCode, int $endCode, string $prop) {
167        $propIdx = self::propToIndex($prop);
168        $this->propRanges[$propIdx][] = new Range($startCode, $endCode);
169    }
170
171    public function addCaseMapping(string $case, int $origCode, int $mappedCode) {
172        $this->caseMaps[$case][$origCode] = $mappedCode;
173    }
174
175    public function compactRangeArray(array $ranges) : array {
176        // Sort by start codepoint
177        usort($ranges, function (Range $r1, Range $r2) {
178            return $r1->start <=> $r2->start;
179        });
180
181        $lastRange = new Range(-1, -1);
182        $newRanges = [];
183        foreach ($ranges as $range) {
184            if ($lastRange->end == -1) {
185                $lastRange = $range;
186            } else if ($range->start == $lastRange->end + 1) {
187                $lastRange->end = $range->end;
188            } else if ($range->start > $lastRange->end + 1) {
189                $newRanges[] = $lastRange;
190                $lastRange = $range;
191            } else {
192                throw new Exception(sprintf(
193                    "Overlapping ranges [%x, %x] and [%x, %x]",
194                    $lastRange->start, $lastRange->end,
195                    $range->start, $range->end
196                ));
197            }
198        }
199        if ($lastRange->end != -1) {
200            $newRanges[] = $lastRange;
201        }
202        return $newRanges;
203    }
204
205    public function compactPropRanges() {
206        foreach ($this->propRanges as &$ranges) {
207            $ranges = $this->compactRangeArray($ranges);
208        }
209    }
210}
211
212function parseDataFile(string $input) {
213    $lines = explode("\n", $input);
214    foreach ($lines as $line) {
215        // Strip comments
216        if (false !== $hashPos = strpos($line, '#')) {
217            $line = substr($line, 0, $hashPos);
218        }
219
220        // Skip empty lines
221        $line = trim($line);
222        if ($line === '') {
223            continue;
224        }
225
226        $fields = array_map('trim', explode(';', $line));
227        yield $fields;
228    }
229}
230
231function parseUnicodeData(UnicodeData $data, string $input) : void {
232    $lines = parseDataFile($input);
233    foreach ($lines as $fields) {
234        if (count($fields) != 15) {
235            throw new Exception("Line does not contain 15 fields");
236        }
237
238        $code = intval($fields[0], 16);
239
240        $name = $fields[1];
241        if ($name === '') {
242            throw new Exception("Empty name");
243        }
244
245        if ($name[0] === '<' && $name !== '<control>') {
246            // This is a character range
247            $lines->next();
248            $nextFields = $lines->current();
249            $nextCode = intval($nextFields[0], 16);
250
251            $generalCategory = $fields[2];
252            $data->addPropRange($code, $nextCode, $generalCategory);
253
254            $bidiClass = $fields[4];
255            $data->addPropRange($code, $nextCode, $bidiClass);
256            continue;
257        }
258
259        $generalCategory = $fields[2];
260        $data->addProp($code, $generalCategory);
261
262        $bidiClass = $fields[4];
263        $data->addProp($code, $bidiClass);
264
265        $upperCase = intval($fields[12], 16);
266        $lowerCase = intval($fields[13], 16);
267        $titleCase = intval($fields[14], 16) ?: $upperCase;
268        if ($upperCase) {
269            $data->addCaseMapping('upper', $code, $upperCase);
270        }
271        if ($lowerCase) {
272            $data->addCaseMapping('lower', $code, $lowerCase);
273        }
274        if ($titleCase) {
275            $data->addCaseMapping('title', $code, $titleCase);
276        }
277    }
278}
279
280function parseCodes(string $strCodes) : array {
281    $codes = [];
282    foreach (explode(' ', $strCodes) as $strCode) {
283        $codes[] = intval($strCode, 16);
284    }
285    return $codes;
286}
287
288function parseCaseFolding(UnicodeData $data, string $input) : void {
289    foreach (parseDataFile($input) as $fields) {
290        if (count($fields) != 4) {
291            throw new Exception("Line does not contain 4 fields");
292        }
293
294        $code = intval($fields[0], 16);
295        $status = $fields[1];
296        if ($status == 'T') {
297            // Use language-agnostic case folding
298            continue;
299        }
300
301        if ($status == 'C' || $status == 'S') {
302            $foldCode = intval($fields[2], 16);
303            if (!isset($data->caseMaps['fold'][$code])) {
304                $data->addCaseMapping('fold', $code, $foldCode);
305            } else {
306                // Add simple mapping to full mapping data
307                assert(is_array($data->caseMaps['fold'][$code]));
308                $data->caseMaps['fold'][$code][0] = $foldCode;
309            }
310        } else if ($status == 'F') {
311            $foldCodes = parseCodes($fields[2]);
312            $existingFoldCode = $data->caseMaps['fold'][$code] ?? $code;
313            $data->caseMaps['fold'][$code] = array_merge([$code], $foldCodes);
314        } else {
315            assert(0);
316        }
317    }
318}
319
320function addSpecialCasing(UnicodeData $data, string $type, int $code, array $caseCodes) : void {
321    $simpleCaseCode = $data->caseMaps[$type][$code] ?? $code;
322    if (count($caseCodes) == 1) {
323        if ($caseCodes[0] != $simpleCaseCode) {
324            throw new Exception("Simple case code in special casing does not match");
325        }
326
327        // Special case: If a title-case character maps to itself, we may still have to store it,
328        // if there is a non-trivial upper-case mapping for it
329        if ($type == 'title' && $code == $caseCodes[0]
330                && ($data->caseMaps['upper'][$code] ?? $code) != $code) {
331            $data->caseMaps['title'][$code] = $code;
332        }
333        return;
334    }
335
336    if (count($caseCodes) > 3) {
337        throw new Exception("Special case mapping with more than 3 code points");
338    }
339
340    $data->caseMaps[$type][$code] = array_merge([$simpleCaseCode], $caseCodes);
341}
342
343function parseSpecialCasing(UnicodeData $data, string $input) : void {
344    foreach (parseDataFile($input) as $fields) {
345        if (count($fields) != 5 && count($fields) != 6) {
346            throw new Exception("Line does not contain 5 or 6 fields");
347        }
348
349        $code = intval($fields[0], 16);
350        $lower = parseCodes($fields[1]);
351        $title = parseCodes($fields[2]);
352        $upper = parseCodes($fields[3]);
353
354        $cond = $fields[4];
355        if ($cond) {
356            // Only use unconditional mappings
357            continue;
358        }
359
360        addSpecialCasing($data, 'lower', $code, $lower);
361        addSpecialCasing($data, 'upper', $code, $upper);
362
363        // Should happen last
364        addSpecialCasing($data, 'title', $code, $title);
365    }
366}
367
368function parseDerivedCoreProperties(UnicodeData $data, string $input) : void {
369    foreach (parseDataFile($input) as $fields) {
370        if (count($fields) != 2) {
371            throw new Exception("Line does not contain 2 fields");
372        }
373
374        $property = $fields[1];
375        if ($property != 'Cased' && $property != 'Case_Ignorable') {
376            continue;
377        }
378
379        $range = explode('..', $fields[0]);
380        if (count($range) == 2) {
381            $data->addPropRange(intval($range[0], 16), intval($range[1], 16), $property);
382        } else if (count($range) == 1) {
383            $data->addProp(intval($range[0], 16), $property);
384        } else {
385            throw new Exception("Invalid range");
386        }
387    }
388}
389
390function parseEastAsianWidth(string $input) : array {
391    $wideRanges = [];
392
393    foreach (parseDataFile($input) as $fields) {
394        if ($fields[1] == 'W' || $fields[1] == 'F') {
395            if ($dotsPos = strpos($fields[0], '..')) {
396                $startCode = intval(substr($fields[0], 0, $dotsPos), 16);
397                $endCode = intval(substr($fields[0], $dotsPos + 2), 16);
398
399                if (!empty($wideRanges)) {
400                    $lastRange = $wideRanges[count($wideRanges) - 1];
401                    if ($startCode == $lastRange->end + 1) {
402                        $lastRange->end = $endCode;
403                        continue;
404                    }
405                }
406
407                $wideRanges[] = new Range($startCode, $endCode);
408            } else {
409                $code = intval($fields[0], 16);
410
411                if (!empty($wideRanges)) {
412                    $lastRange = $wideRanges[count($wideRanges) - 1];
413                    if ($code == $lastRange->end + 1) {
414                        $lastRange->end++;
415                        continue;
416                    }
417                }
418
419                $wideRanges[] = new Range($code, $code);
420            }
421        }
422    }
423
424    return $wideRanges;
425}
426
427function formatArray(array $values, int $width, string $format) : string {
428    $result = '';
429    $i = 0;
430    $c = count($values);
431    for ($i = 0; $i < $c; $i++) {
432        if ($i != 0) {
433            $result .= ',';
434        }
435
436        $result .= $i % $width == 0 ? "\n\t" : " ";
437        $result .= sprintf($format, $values[$i]);
438    }
439    return $result;
440}
441
442function formatShortHexArray(array $values, int $width) : string {
443    return formatArray($values, $width, "0x%04x");
444}
445function formatShortDecArray(array $values, int $width) : string {
446    return formatArray($values, $width, "% 5d");
447}
448function formatIntArray(array $values, int $width) : string {
449    return formatArray($values, $width, "0x%08x");
450}
451
452function generatePropData(UnicodeData $data) {
453    $data->compactPropRanges();
454
455    $propOffsets = [];
456    $idx = 0;
457    foreach ($data->propRanges as $ranges) {
458        $num = count($ranges);
459        $propOffsets[] = $idx;
460        $idx += 2*$num;
461    }
462
463    // Add sentinel for binary search
464    $propOffsets[] = $idx;
465
466    // TODO ucgendat.c pads the prop offsets to the next multiple of 4
467    // for rather dubious reasons of alignment. This should probably be
468    // dropped
469    while (count($propOffsets) % 4 != 0) {
470        $propOffsets[] = 0;
471    }
472
473    $totalRanges = $idx;
474
475    $result = "";
476    $result .= "static const unsigned short _ucprop_size = $data->numProps;\n\n";
477    $result .= "static const unsigned short  _ucprop_offsets[] = {";
478    $result .= formatShortHexArray($propOffsets, 8);
479    $result .= "\n};\n\n";
480
481    $values = [];
482    foreach ($data->propRanges as $ranges) {
483        foreach ($ranges as $range) {
484            $values[] = $range->start;
485            $values[] = $range->end;
486        }
487    }
488
489    $result .= "static const unsigned int _ucprop_ranges[] = {";
490    $result .= formatIntArray($values, 4);
491    $result .= "\n};\n\n";
492    return $result;
493}
494
495function flatten(array $array) {
496    $result = [];
497    foreach ($array as $arr) {
498        foreach ($arr as $v) {
499            $result[] = $v;
500        }
501    }
502    return $result;
503}
504
505function prepareCaseData(UnicodeData $data) {
506    // Don't store titlecase if it's the same as uppercase
507    foreach ($data->caseMaps['title'] as $code => $titleCode) {
508        if ($titleCode == ($data->caseMaps['upper'][$code] ?? $code)) {
509            unset($data->caseMaps['title'][$code]);
510        }
511    }
512
513    // Store full (multi-char) case mappings in a separate table and only
514    // store an index into it
515    foreach ($data->caseMaps as $type => $caseMap) {
516        foreach ($caseMap as $code => $caseCode) {
517            if (is_array($caseCode)) {
518                // -1 because the first entry is the simple case mapping
519                $len = count($caseCode) - 1;
520                $idx = count($data->extraCaseData);
521                $data->caseMaps[$type][$code] = ($len << 24) | $idx;
522
523                foreach ($caseCode as $c) {
524                    $data->extraCaseData[] = $c;
525                }
526            }
527        }
528    }
529}
530
531function generateCaseMPH(string $name, array $map) {
532    $prefix = "_uccase_" . $name;
533    list($gTable, $table) = generateMPH($map, $fast = false);
534    echo "$name: n=", count($table), ", g=", count($gTable), "\n";
535
536    $result = "";
537    $result .= "static const unsigned {$prefix}_g_size = " . count($gTable) . ";\n";
538    $result .= "static const short {$prefix}_g[] = {";
539    $result .= formatShortDecArray($gTable, 8);
540    $result .= "\n};\n\n";
541    $result .= "static const unsigned {$prefix}_table_size = " . count($table) . ";\n";
542    $result .= "static const unsigned {$prefix}_table[] = {";
543    $result .= formatIntArray(flatten($table), 4);
544    $result .= "\n};\n\n";
545    return $result;
546}
547
548function generateCaseData(UnicodeData $data) {
549    prepareCaseData($data);
550
551    $result = "";
552    $result .= generateCaseMPH('upper', $data->caseMaps['upper']);
553    $result .= generateCaseMPH('lower', $data->caseMaps['lower']);
554    $result .= generateCaseMPH('title', $data->caseMaps['title']);
555    $result .= generateCaseMPH('fold', $data->caseMaps['fold']);
556    $result .= "static const unsigned _uccase_extra_table[] = {";
557    $result .= formatIntArray($data->extraCaseData, 4);
558    $result .= "\n};\n\n";
559    return $result;
560}
561
562function generateData(UnicodeData $data) {
563    $result = <<<'HEADER'
564/* This file was generated from a modified version of UCData's ucgendat.
565 *
566 *                     DO NOT EDIT THIS FILE!
567 *
568 * Instead, download the appropriate UnicodeData-x.x.x.txt and
569 * CompositionExclusions-x.x.x.txt files from http://www.unicode.org/Public/
570 * and run ext/mbstring/ucgendat/ucgendat.php.
571 *
572 * More information can be found in the UCData package. Unfortunately,
573 * the project's page doesn't seem to be live anymore, so you can use
574 * OpenLDAP's modified copy (look in libraries/liblunicode/ucdata) */
575HEADER;
576    $result .= "\n\n" . generatePropData($data);
577    $result .= generateCaseData($data);
578
579    return $result;
580}
581
582/*
583 * Minimal Perfect Hash Generation
584 *
585 * Based on "Hash, displace, and compress" algorithm due to
586 * Belazzougui, Botelho and Dietzfelbinger.
587 *
588 * Hash function based on https://stackoverflow.com/a/12996028/385378.
589 * MPH implementation based on http://stevehanov.ca/blog/index.php?id=119.
590 */
591
592function hashInt(int $d, int $x) {
593    $x ^= $d;
594    $x = (($x >> 16) ^ $x) * 0x45d9f3b;
595    return $x & 0xffffffff;
596}
597
598function tryGenerateMPH(array $map, int $gSize) {
599    $tableSize = count($map);
600    $table = [];
601    $gTable = array_fill(0, $gSize, 0x7fff);
602    $buckets = [];
603
604    foreach ($map as $k => $v) {
605        $h = hashInt(0, $k) % $gSize;
606        $buckets[$h][] = [$k, $v];
607    }
608
609    // Sort by descending number of collisions
610    usort($buckets, function ($b1, $b2) {
611        return -(count($b1) <=> count($b2));
612    });
613
614    foreach ($buckets as $bucket) {
615        $collisions = count($bucket);
616        if ($collisions <= 1) {
617            continue;
618        }
619
620        // Try values of $d until all elements placed in different slots
621        $d = 1;
622        $i = 0;
623        $used = [];
624        while ($i < $collisions) {
625            if ($d > 0x7fff) {
626                return [];
627            }
628
629            list($k) = $bucket[$i];
630            $slot = hashInt($d, $k) % $tableSize;
631            if (isset($table[$slot]) || isset($used[$slot])) {
632                $d++;
633                $i = 0;
634                $used = [];
635            } else {
636                $i++;
637                $used[$slot] = true;
638            }
639        }
640
641        $g = hashInt(0, $bucket[0][0]) % $gSize;
642        $gTable[$g] = $d;
643        foreach ($bucket as $elem) {
644            $table[hashInt($d, $elem[0]) % $tableSize] = $elem;
645        }
646    }
647
648    $freeSlots = [];
649    for ($i = 0; $i < $tableSize; $i++) {
650        if (!isset($table[$i])) {
651            $freeSlots[] = $i;
652        }
653    }
654
655    // For buckets with only one element, we directly store the index
656    $freeIdx = 0;
657    foreach ($buckets as $bucket) {
658        if (count($bucket) != 1) {
659            continue;
660        }
661
662        $elem = $bucket[0];
663        $slot = $freeSlots[$freeIdx++];
664        $table[$slot] = $elem;
665
666        $g = hashInt(0, $elem[0]) % $gSize;
667        $gTable[$g] = -$slot;
668    }
669
670    ksort($gTable);
671    ksort($table);
672
673    return [$gTable, $table];
674}
675
676function generateMPH(array $map, bool $fast) {
677    if ($fast) {
678        // Check size starting lambda=5.0 in 0.5 increments
679        for ($lambda = 5.0;; $lambda -= 0.5) {
680            $m = (int) (count($map) / $lambda);
681            $tmpMph = tryGenerateMPH($map, $m);
682            if (!empty($tmpMph)) {
683                $mph = $tmpMph;
684                break;
685            }
686        }
687    } else {
688        // Check all sizes starting lambda=7.0
689        $m = (int) (count($map) / 7.0);
690        for (;; $m++) {
691            $tmpMph = tryGenerateMPH($map, $m);
692            if (!empty($tmpMph)) {
693                $mph = $tmpMph;
694                break;
695            }
696        }
697    }
698
699    return $mph;
700}
701
702function generateEastAsianWidthData(array $wideRanges) {
703    $result = <<<'HEADER'
704/* This file was generated by ext/mbstring/ucgendat/ucgendat.php.
705 *
706 *                     DO NOT EDIT THIS FILE!
707 *
708 * East Asian Width table
709 *
710 * Some characters in East Asian languages are intended to be displayed in a space
711 * which is roughly square. (This contrasts with others such as the Latin alphabet,
712 * which are taller than they are wide.) To display these East Asian characters
713 * properly, twice the horizontal space is used. This must be taken into account
714 * when doing things like wrapping text to a specific width.
715 *
716 * Each pair of numbers in the below table is a range of Unicode codepoints
717 * which should be displayed as double-width.
718 */
719
720HEADER;
721
722    $result .= "\n#define FIRST_DOUBLEWIDTH_CODEPOINT 0x" . dechex($wideRanges[0]->start) . "\n\n";
723
724    $result .= <<<'TABLESTART'
725static const struct {
726	int begin;
727	int end;
728} mbfl_eaw_table[] = {
729
730TABLESTART;
731
732    foreach ($wideRanges as $range) {
733        $startCode = dechex($range->start);
734        $endCode = dechex($range->end);
735        $result .= "\t{ 0x{$startCode}, 0x{$endCode} },\n";
736    }
737
738    $result .= "};\n";
739    return $result;
740}
741