1#!/usr/bin/env php 2<?php error_reporting(E_ALL); 3 4$dir = __DIR__; 5$unicodeDataFile = $dir . '/UnicodeData.txt'; 6$caseFoldingFile = $dir . '/CaseFolding.txt'; 7$specialCasingFile = $dir . '/SpecialCasing.txt'; 8 9$files = [$unicodeDataFile, $caseFoldingFile, $specialCasingFile]; 10foreach ($files as $file) { 11 if (!file_exists($file)) { 12 echo "File $file does not exist.\n"; 13 return; 14 } 15} 16 17testUnicodeData(file_get_contents($unicodeDataFile)); 18testCaseFolding(file_get_contents($caseFoldingFile)); 19testSpecialCasing(file_get_contents($specialCasingFile)); 20 21function parseDataFile(string $input) { 22 $lines = explode("\n", $input); 23 foreach ($lines as $line) { 24 // Strip comments 25 if (false !== $hashPos = strpos($line, '#')) { 26 $line = substr($line, 0, $hashPos); 27 } 28 29 // Skip empty lines 30 $line = trim($line); 31 if ($line === '') { 32 continue; 33 } 34 35 $fields = array_map('trim', explode(';', $line)); 36 yield $fields; 37 } 38} 39 40function parseCodes(string $strCodes) : array { 41 $codes = []; 42 foreach (explode(' ', $strCodes) as $strCode) { 43 $codes[] = intval($strCode, 16); 44 } 45 return $codes; 46} 47 48function testCaseMap($type, int $origCode, array $newCodes) { 49 $origChar = mb_chr($origCode); 50 $newStr = ""; 51 foreach ($newCodes as $newCode) { 52 $newStr .= mb_chr($newCode); 53 } 54 55 $mbNewStr = mb_convert_case($origChar, $type); 56 if ($mbNewStr !== $newStr) { 57 echo "$type: $mbNewStr != $newStr\n"; 58 } 59} 60 61function testSimpleCaseMap($type, int $origCode, int $newCode) { 62 if ($newCode) { 63 testCaseMap($type, $origCode, [$newCode]); 64 } else { 65 testCaseMap($type, $origCode, [$origCode]); 66 } 67} 68 69function testUnicodeData(string $input) { 70 $uppers = []; 71 $folds = []; 72 73 foreach (parseDataFile($input) as $fields) { 74 assert(count($fields) == 15); 75 76 $code = intval($fields[0], 16); 77 $upperCase = intval($fields[12], 16); 78 $lowerCase = intval($fields[13], 16); 79 $titleCase = intval($fields[14], 16); 80 testSimpleCaseMap(MB_CASE_UPPER_SIMPLE, $code, $upperCase); 81 testSimpleCaseMap(MB_CASE_LOWER_SIMPLE, $code, $lowerCase); 82 83 // Unfortunately MB_CASE_TITLE does not actually return the title case, even when passed 84 // only a single character. It does ad-hoc magic based on the character class, so that 85 // certain characters, such as roman numerals or circled characters will not be 86 // title-cased. 87 //testSimpleCaseMap(MB_CASE_TITLE_SIMPLE, $code, $titleCase ?: $upperCase); 88 89 $chr = mb_chr($code); 90 $upper = mb_strtoupper($chr); 91 $uppers[$upper][] = $chr; 92 $fold = mb_convert_case($chr, 3); 93 $folds[$fold][] = $chr; 94 } 95} 96 97function testCaseFolding(string $input) { 98 foreach (parseDataFile($input) as $fields) { 99 assert(count($fields) == 4); 100 101 $code = intval($fields[0], 16); 102 $status = $fields[1]; 103 if ($status == 'C' || $status == 'S') { 104 $foldCode = intval($fields[2], 16); 105 testSimpleCaseMap(MB_CASE_FOLD_SIMPLE, $code, $foldCode); 106 } else if ($status == 'F') { 107 $foldCodes = parseCodes($fields[2]); 108 testCaseMap(MB_CASE_FOLD, $code, $foldCodes); 109 } 110 } 111} 112 113function testSpecialCasing(string $input) { 114 foreach (parseDataFile($input) as $fields) { 115 assert(count($fields) >= 5); 116 117 $code = intval($fields[0], 16); 118 $lower = parseCodes($fields[1]); 119 $title = parseCodes($fields[2]); 120 $upper = parseCodes($fields[3]); 121 122 $cond = $fields[4]; 123 if ($cond) { 124 // We don't support conditional mappings 125 continue; 126 } 127 128 testCaseMap(MB_CASE_LOWER, $code, $lower); 129 testCaseMap(MB_CASE_UPPER, $code, $upper); 130 testCaseMap(MB_CASE_TITLE, $code, $title); 131 } 132} 133