1<?php error_reporting(E_ALL); 2 3$dir = __DIR__; 4$unicodeDataFile = $dir . '/UnicodeData.txt'; 5$caseFoldingFile = $dir . '/CaseFolding.txt'; 6$specialCasingFile = $dir . '/SpecialCasing.txt'; 7 8$files = [$unicodeDataFile, $caseFoldingFile, $specialCasingFile]; 9foreach ($files as $file) { 10 if (!file_exists($file)) { 11 echo "File $file does not exist.\n"; 12 return; 13 } 14} 15 16testUnicodeData(file_get_contents($unicodeDataFile)); 17testCaseFolding(file_get_contents($caseFoldingFile)); 18testSpecialCasing(file_get_contents($specialCasingFile)); 19 20function parseDataFile(string $input) { 21 $lines = explode("\n", $input); 22 foreach ($lines as $line) { 23 // Strip comments 24 if (false !== $hashPos = strpos($line, '#')) { 25 $line = substr($line, 0, $hashPos); 26 } 27 28 // Skip empty lines 29 $line = trim($line); 30 if ($line === '') { 31 continue; 32 } 33 34 $fields = array_map('trim', explode(';', $line)); 35 yield $fields; 36 } 37} 38 39function parseCodes(string $strCodes) : array { 40 $codes = []; 41 foreach (explode(' ', $strCodes) as $strCode) { 42 $codes[] = intval($strCode, 16); 43 } 44 return $codes; 45} 46 47function testCaseMap($type, int $origCode, array $newCodes) { 48 $origChar = mb_chr($origCode); 49 $newStr = ""; 50 foreach ($newCodes as $newCode) { 51 $newStr .= mb_chr($newCode); 52 } 53 54 $mbNewStr = mb_convert_case($origChar, $type); 55 if ($mbNewStr !== $newStr) { 56 echo "$type: $mbNewStr != $newStr\n"; 57 } 58} 59 60function testSimpleCaseMap($type, int $origCode, int $newCode) { 61 if ($newCode) { 62 testCaseMap($type, $origCode, [$newCode]); 63 } else { 64 testCaseMap($type, $origCode, [$origCode]); 65 } 66} 67 68function testUnicodeData(string $input) { 69 $uppers = []; 70 $folds = []; 71 72 foreach (parseDataFile($input) as $fields) { 73 assert(count($fields) == 15); 74 75 $code = intval($fields[0], 16); 76 $upperCase = intval($fields[12], 16); 77 $lowerCase = intval($fields[13], 16); 78 $titleCase = intval($fields[14], 16); 79 testSimpleCaseMap(MB_CASE_UPPER_SIMPLE, $code, $upperCase); 80 testSimpleCaseMap(MB_CASE_LOWER_SIMPLE, $code, $lowerCase); 81 82 // Unfortunately MB_CASE_TITLE does not actually return the title case, even when passed 83 // only a single character. It does ad-hoc magic based on the character class, so that 84 // certain characters, such as roman numerals or circled characters will not be 85 // title-cased. 86 //testSimpleCaseMap(MB_CASE_TITLE_SIMPLE, $code, $titleCase ?: $upperCase); 87 88 $chr = mb_chr($code); 89 $upper = mb_strtoupper($chr); 90 $uppers[$upper][] = $chr; 91 $fold = mb_convert_case($chr, 3); 92 $folds[$fold][] = $chr; 93 } 94} 95 96function testCaseFolding(string $input) { 97 foreach (parseDataFile($input) as $fields) { 98 assert(count($fields) == 4); 99 100 $code = intval($fields[0], 16); 101 $status = $fields[1]; 102 if ($status == 'C' || $status == 'S') { 103 $foldCode = intval($fields[2], 16); 104 testSimpleCaseMap(MB_CASE_FOLD_SIMPLE, $code, $foldCode); 105 } else if ($status == 'F') { 106 $foldCodes = parseCodes($fields[2]); 107 testCaseMap(MB_CASE_FOLD, $code, $foldCodes); 108 } 109 } 110} 111 112function testSpecialCasing(string $input) { 113 foreach (parseDataFile($input) as $fields) { 114 assert(count($fields) >= 5); 115 116 $code = intval($fields[0], 16); 117 $lower = parseCodes($fields[1]); 118 $title = parseCodes($fields[2]); 119 $upper = parseCodes($fields[3]); 120 121 $cond = $fields[4]; 122 if ($cond) { 123 // We don't support conditional mappings 124 continue; 125 } 126 127 testCaseMap(MB_CASE_LOWER, $code, $lower); 128 testCaseMap(MB_CASE_UPPER, $code, $upper); 129 testCaseMap(MB_CASE_TITLE, $code, $title); 130 } 131} 132