1<?php
2
3function read_encoding_pointer_array(): array {
4    // read the encoding pointer array
5    $file_content = file_get_contents(__DIR__ . '/mbfl_encoding.c');
6    $pattern = '/static const mbfl_encoding \*mbfl_encoding_ptr_list\[\][\s\S]*?\{([^}]*)\};/';
7    preg_match($pattern, $file_content, $matches);
8    assert(isset($matches[1]));
9    $array = explode(",\n", $matches[1]);
10    $array = array_map(function ($item) {
11        return trim($item, "&\n\t ");
12    }, $array);
13    array_pop($array); // Remove NULL
14    return $array;
15}
16
17function search_struct_in_file(array &$result, $struct_names, $file_path)
18{
19    $fileContent = file_get_contents($file_path);
20    $pattern = '/const mbfl_encoding\s+(' . implode('|', $struct_names) . ')\s* = {([^}]*)}/';
21    preg_match_all($pattern, $fileContent, $matches, PREG_SET_ORDER);
22    foreach ($matches as $match) {
23        $current_struct_name = $match[1];
24        $struct_definition = $match[2];
25        // Note: name is the second file
26        $name = explode(',', $struct_definition)[1];
27        $result[$current_struct_name] = trim($name, " \n\t\"");
28    }
29}
30
31function search_struct_in_dir($struct_names): array
32{
33    $result = [];
34    foreach (glob(__DIR__ . "/../**/*.c") as $file) {
35        search_struct_in_file($result, $struct_names, $file);
36    }
37    return $result;
38}
39
40$encoding_pointer_array = read_encoding_pointer_array();
41$encoding_pointer_array_name_mapping = search_struct_in_dir($encoding_pointer_array);
42
43// The single byte encodings are generated and cannot be found in dedicated generated structs
44$fixed_encodings = [
45    'mbfl_encoding_cp1251' => 'Windows-1251',
46    'mbfl_encoding_cp1252' => 'Windows-1252',
47    'mbfl_encoding_cp1254' => 'Windows-1254',
48    'mbfl_encoding_8859_1' => 'ISO-8859-1',
49    'mbfl_encoding_8859_2' => 'ISO-8859-2',
50    'mbfl_encoding_8859_3' => 'ISO-8859-3',
51    'mbfl_encoding_8859_4' => 'ISO-8859-4',
52    'mbfl_encoding_8859_5' => 'ISO-8859-5',
53    'mbfl_encoding_8859_6' => 'ISO-8859-6',
54    'mbfl_encoding_8859_7' => 'ISO-8859-7',
55    'mbfl_encoding_8859_8' => 'ISO-8859-8',
56    'mbfl_encoding_8859_9' => 'ISO-8859-9',
57    'mbfl_encoding_8859_10' => 'ISO-8859-10',
58    'mbfl_encoding_8859_13' => 'ISO-8859-13',
59    'mbfl_encoding_8859_14' => 'ISO-8859-14',
60    'mbfl_encoding_8859_15' => 'ISO-8859-15',
61    'mbfl_encoding_8859_16' => 'ISO-8859-16',
62    'mbfl_encoding_cp866' => 'CP866',
63    'mbfl_encoding_cp850' => 'CP850',
64    'mbfl_encoding_koi8r' => 'KOI8-R',
65    'mbfl_encoding_koi8u' => 'KOI8-U',
66    'mbfl_encoding_armscii8' => 'ArmSCII-8',
67    'mbfl_encoding_ascii' => 'ASCII',
68];
69
70// Add the fixed encodings
71foreach ($fixed_encodings as $encoding_pointer => $encoding_name) {
72    $encoding_pointer_array_name_mapping[$encoding_pointer] = $encoding_name;
73}
74
75// Consistency check: all of the encoding pointer array entries should be found
76foreach ($encoding_pointer_array as $encoding_pointer) {
77    assert(isset($encoding_pointer_array_name_mapping[$encoding_pointer]), "Missing entry for $encoding_pointer");
78}
79
80$ordered_name_list = array_map(function ($encoding_pointer) use ($encoding_pointer_array_name_mapping) {
81    return $encoding_pointer_array_name_mapping[$encoding_pointer];
82}, $encoding_pointer_array);
83
84// Write out ordered name list, and invoke gperf for computing the perfect hash table
85file_put_contents(__DIR__ . '/encodings.txt', implode("\n", $ordered_name_list));
86ob_start();
87passthru('gperf ' . escapeshellarg(__DIR__ . '/encodings.txt') . ' --readonly-tables --null-strings --ignore-case -m 1000');
88$output = ob_get_clean();
89@unlink(__DIR__ . '/encodings.txt');
90
91// Find asso_values array in $output
92$pattern = '/static const unsigned char asso_values\[\] =([^}]*)\};/';
93preg_match($pattern, $output, $matches);
94assert(isset($matches[1]));
95$asso_values = trim($matches[1], "\t \n{");
96echo "===--- Copy and paste the following values in the asso_values array in mbfl_encoding.c ---===\n";
97echo $asso_values, "\n";
98
99// Find word_list array in $output
100$pattern = '/static const char \* const wordlist\[\] =([^}]*)\};/';
101preg_match($pattern, $output, $matches);
102assert(isset($matches[1]));
103$word_list = trim($matches[1], "\t \n{");
104$word_list = str_replace('(char*)0', '-1', $word_list);
105foreach ($encoding_pointer_array_name_mapping as $key => $value)
106{
107    $index = array_search($key, $encoding_pointer_array);
108    $word_list = str_replace("\"$value\"", $index, $word_list);
109}
110
111echo "===--- Copy and paste the following values in the mbfl_encoding_ptr_list_after_hashing array in mbfl_encoding.c ---===\n";
112echo $word_list, "\n";
113