xref: /PHP-7.3/ext/mbstring/ucgendat/uctest.php (revision 582a65b0)
1<?php error_reporting(E_ALL);
2
3$dir = __DIR__;
4$unicodeDataFile = $dir . '/UnicodeData.txt';
5$caseFoldingFile = $dir . '/CaseFolding.txt';
6$specialCasingFile = $dir . '/SpecialCasing.txt';
7
8$files = [$unicodeDataFile, $caseFoldingFile, $specialCasingFile];
9foreach ($files as $file) {
10    if (!file_exists($file)) {
11        echo "File $file does not exist.\n";
12        return;
13    }
14}
15
16testUnicodeData(file_get_contents($unicodeDataFile));
17testCaseFolding(file_get_contents($caseFoldingFile));
18testSpecialCasing(file_get_contents($specialCasingFile));
19
20function parseDataFile(string $input) {
21    $lines = explode("\n", $input);
22    foreach ($lines as $line) {
23        // Strip comments
24        if (false !== $hashPos = strpos($line, '#')) {
25            $line = substr($line, 0, $hashPos);
26        }
27
28        // Skip empty lines
29        $line = trim($line);
30        if ($line === '') {
31            continue;
32        }
33
34        $fields = array_map('trim', explode(';', $line));
35        yield $fields;
36    }
37}
38
39function parseCodes(string $strCodes) : array {
40    $codes = [];
41    foreach (explode(' ', $strCodes) as $strCode) {
42        $codes[] = intval($strCode, 16);
43    }
44    return $codes;
45}
46
47function testCaseMap($type, int $origCode, array $newCodes) {
48    $origChar = mb_chr($origCode);
49    $newStr = "";
50    foreach ($newCodes as $newCode) {
51        $newStr .= mb_chr($newCode);
52    }
53
54    $mbNewStr = mb_convert_case($origChar, $type);
55    if ($mbNewStr !== $newStr) {
56        echo "$type: $mbNewStr != $newStr\n";
57    }
58}
59
60function testSimpleCaseMap($type, int $origCode, int $newCode) {
61    if ($newCode) {
62        testCaseMap($type, $origCode, [$newCode]);
63    } else {
64        testCaseMap($type, $origCode, [$origCode]);
65    }
66}
67
68function testUnicodeData(string $input) {
69    $uppers = [];
70    $folds = [];
71
72    foreach (parseDataFile($input) as $fields) {
73        assert(count($fields) == 15);
74
75        $code = intval($fields[0], 16);
76        $upperCase = intval($fields[12], 16);
77        $lowerCase = intval($fields[13], 16);
78        $titleCase = intval($fields[14], 16);
79        testSimpleCaseMap(MB_CASE_UPPER_SIMPLE, $code, $upperCase);
80        testSimpleCaseMap(MB_CASE_LOWER_SIMPLE, $code, $lowerCase);
81
82        // Unfortunately MB_CASE_TITLE does not actually return the title case, even when passed
83        // only a single character. It does ad-hoc magic based on the character class, so that
84        // certain characters, such as roman numerals or circled characters will not be
85        // title-cased.
86        //testSimpleCaseMap(MB_CASE_TITLE_SIMPLE, $code, $titleCase ?: $upperCase);
87
88        $chr = mb_chr($code);
89        $upper = mb_strtoupper($chr);
90        $uppers[$upper][] = $chr;
91        $fold = mb_convert_case($chr, 3);
92        $folds[$fold][] = $chr;
93    }
94}
95
96function testCaseFolding(string $input) {
97    foreach (parseDataFile($input) as $fields) {
98        assert(count($fields) == 4);
99
100        $code = intval($fields[0], 16);
101        $status = $fields[1];
102        if ($status == 'C' || $status == 'S') {
103            $foldCode = intval($fields[2], 16);
104            testSimpleCaseMap(MB_CASE_FOLD_SIMPLE, $code, $foldCode);
105        } else if ($status == 'F') {
106            $foldCodes = parseCodes($fields[2]);
107            testCaseMap(MB_CASE_FOLD, $code, $foldCodes);
108        }
109    }
110}
111
112function testSpecialCasing(string $input) {
113    foreach (parseDataFile($input) as $fields) {
114        assert(count($fields) >= 5);
115
116        $code = intval($fields[0], 16);
117        $lower = parseCodes($fields[1]);
118        $title = parseCodes($fields[2]);
119        $upper = parseCodes($fields[3]);
120
121        $cond = $fields[4];
122        if ($cond) {
123            // We don't support conditional mappings
124            continue;
125        }
126
127        testCaseMap(MB_CASE_LOWER, $code, $lower);
128        testCaseMap(MB_CASE_UPPER, $code, $upper);
129        testCaseMap(MB_CASE_TITLE, $code, $title);
130    }
131}
132