1<?php 2 3function read_encoding_pointer_array(): array { 4 // read the encoding pointer array 5 $file_content = file_get_contents(__DIR__ . '/mbfl_encoding.c'); 6 $pattern = '/static const mbfl_encoding \*mbfl_encoding_ptr_list\[\][\s\S]*?\{([^}]*)\};/'; 7 preg_match($pattern, $file_content, $matches); 8 assert(isset($matches[1])); 9 $array = explode(",\n", $matches[1]); 10 $array = array_map(function ($item) { 11 return trim($item, "&\n\t "); 12 }, $array); 13 array_pop($array); // Remove NULL 14 return $array; 15} 16 17function search_struct_in_file(array &$result, $struct_names, $file_path) 18{ 19 $fileContent = file_get_contents($file_path); 20 $pattern = '/const mbfl_encoding\s+(' . implode('|', $struct_names) . ')\s* = {([^}]*)}/'; 21 preg_match_all($pattern, $fileContent, $matches, PREG_SET_ORDER); 22 foreach ($matches as $match) { 23 $current_struct_name = $match[1]; 24 $struct_definition = $match[2]; 25 // Note: name is the second file 26 $name = explode(',', $struct_definition)[1]; 27 $result[$current_struct_name] = trim($name, " \n\t\""); 28 } 29} 30 31function search_struct_in_dir($struct_names): array 32{ 33 $result = []; 34 foreach (glob(__DIR__ . "/../**/*.c") as $file) { 35 search_struct_in_file($result, $struct_names, $file); 36 } 37 return $result; 38} 39 40$encoding_pointer_array = read_encoding_pointer_array(); 41$encoding_pointer_array_name_mapping = search_struct_in_dir($encoding_pointer_array); 42 43// The single byte encodings are generated and cannot be found in dedicated generated structs 44$fixed_encodings = [ 45 'mbfl_encoding_cp1251' => 'Windows-1251', 46 'mbfl_encoding_cp1252' => 'Windows-1252', 47 'mbfl_encoding_cp1254' => 'Windows-1254', 48 'mbfl_encoding_8859_1' => 'ISO-8859-1', 49 'mbfl_encoding_8859_2' => 'ISO-8859-2', 50 'mbfl_encoding_8859_3' => 'ISO-8859-3', 51 'mbfl_encoding_8859_4' => 'ISO-8859-4', 52 'mbfl_encoding_8859_5' => 'ISO-8859-5', 53 'mbfl_encoding_8859_6' => 'ISO-8859-6', 54 'mbfl_encoding_8859_7' => 'ISO-8859-7', 55 'mbfl_encoding_8859_8' => 'ISO-8859-8', 56 'mbfl_encoding_8859_9' => 'ISO-8859-9', 57 'mbfl_encoding_8859_10' => 'ISO-8859-10', 58 'mbfl_encoding_8859_13' => 'ISO-8859-13', 59 'mbfl_encoding_8859_14' => 'ISO-8859-14', 60 'mbfl_encoding_8859_15' => 'ISO-8859-15', 61 'mbfl_encoding_8859_16' => 'ISO-8859-16', 62 'mbfl_encoding_cp866' => 'CP866', 63 'mbfl_encoding_cp850' => 'CP850', 64 'mbfl_encoding_koi8r' => 'KOI8-R', 65 'mbfl_encoding_koi8u' => 'KOI8-U', 66 'mbfl_encoding_armscii8' => 'ArmSCII-8', 67 'mbfl_encoding_ascii' => 'ASCII', 68]; 69 70// Add the fixed encodings 71foreach ($fixed_encodings as $encoding_pointer => $encoding_name) { 72 $encoding_pointer_array_name_mapping[$encoding_pointer] = $encoding_name; 73} 74 75// Consistency check: all of the encoding pointer array entries should be found 76foreach ($encoding_pointer_array as $encoding_pointer) { 77 assert(isset($encoding_pointer_array_name_mapping[$encoding_pointer]), "Missing entry for $encoding_pointer"); 78} 79 80$ordered_name_list = array_map(function ($encoding_pointer) use ($encoding_pointer_array_name_mapping) { 81 return $encoding_pointer_array_name_mapping[$encoding_pointer]; 82}, $encoding_pointer_array); 83 84// Write out ordered name list, and invoke gperf for computing the perfect hash table 85file_put_contents(__DIR__ . '/encodings.txt', implode("\n", $ordered_name_list)); 86ob_start(); 87passthru('gperf ' . escapeshellarg(__DIR__ . '/encodings.txt') . ' --readonly-tables --null-strings --ignore-case -m 1000'); 88$output = ob_get_clean(); 89@unlink(__DIR__ . '/encodings.txt'); 90 91// Find asso_values array in $output 92$pattern = '/static const unsigned char asso_values\[\] =([^}]*)\};/'; 93preg_match($pattern, $output, $matches); 94assert(isset($matches[1])); 95$asso_values = trim($matches[1], "\t \n{"); 96echo "===--- Copy and paste the following values in the asso_values array in mbfl_encoding.c ---===\n"; 97echo $asso_values, "\n"; 98 99// Find word_list array in $output 100$pattern = '/static const char \* const wordlist\[\] =([^}]*)\};/'; 101preg_match($pattern, $output, $matches); 102assert(isset($matches[1])); 103$word_list = trim($matches[1], "\t \n{"); 104$word_list = str_replace('(char*)0', '-1', $word_list); 105foreach ($encoding_pointer_array_name_mapping as $key => $value) 106{ 107 $index = array_search($key, $encoding_pointer_array); 108 $word_list = str_replace("\"$value\"", $index, $word_list); 109} 110 111echo "===--- Copy and paste the following values in the mbfl_encoding_ptr_list_after_hashing array in mbfl_encoding.c ---===\n"; 112echo $word_list, "\n"; 113