xref: /web-php/src/LangChooser.php (revision 38ef33f9)
1<?php
2
3namespace phpweb;
4
5class LangChooser
6{
7    private readonly string $preferredLanguage;
8
9    private readonly string $defaultLanguage;
10
11    /**
12     * @param array<string, string> $availableLanguages
13     * @param array<string, string> $inactiveLanguages
14     */
15    public function __construct(
16        private readonly array $availableLanguages,
17        private readonly array $inactiveLanguages,
18        string $preferredLanguage,
19        string $defaultLanguage,
20    )
21    {
22        $this->defaultLanguage = $this->normalize($defaultLanguage);
23        $this->preferredLanguage = $this->normalize($preferredLanguage);
24    }
25
26    /**
27     * @return array{string, string}
28     */
29    public function chooseCode(
30        string|array|null $langParam,
31        string $requestUri,
32        ?string $acceptLanguageHeader,
33    ): array
34    {
35        // Default values for languages
36        $explicitly_specified = '';
37
38        // Specified for the request (GET/POST parameter)
39        if (is_string($langParam)) {
40            $langCode = $this->normalize(htmlspecialchars($langParam, ENT_QUOTES, 'UTF-8'));
41            $explicitly_specified = $langCode;
42            if ($this->isAvailableLanguage($langCode)) {
43                return [$langCode, $explicitly_specified];
44            }
45        }
46
47        // Specified in a shortcut URL (eg. /en/echo or /pt_br/echo)
48        if (preg_match("!^/(\\w{2}(_\\w{2})?)/!", htmlspecialchars($requestUri,ENT_QUOTES, 'UTF-8'), $flang)) {
49            // Put language into preference list
50            $rlang = $this->normalize($flang[1]);
51
52            // Set explicitly specified language
53            if (empty($explicitly_specified)) {
54                $explicitly_specified = $rlang;
55            }
56
57            // Drop out language specification from URL, as this is already handled
58            $_SERVER['STRIPPED_URI'] = preg_replace(
59                "!^/$flang[1]/!", "/", htmlspecialchars($requestUri, ENT_QUOTES, 'UTF-8'),
60            );
61
62            if ($this->isAvailableLanguage($rlang)) {
63                return [$rlang, $explicitly_specified];
64            }
65        }
66
67        // Specified in a manual URL (eg. manual/en/ or manual/pt_br/)
68        if (preg_match("!^/manual/(\\w{2}(_\\w{2})?)(/|$)!", htmlspecialchars($requestUri, ENT_QUOTES, 'UTF-8'), $flang)) {
69            $flang = $this->normalize($flang[1]);
70
71            // Set explicitly specified language
72            if (empty($explicitly_specified)) {
73                $explicitly_specified = $flang;
74            }
75
76            if ($this->isAvailableLanguage($flang)) {
77                return [$flang, $explicitly_specified];
78            }
79        }
80
81        // Honor the users own language setting (if available)
82        if ($this->isAvailableLanguage($this->preferredLanguage)) {
83            return [$this->preferredLanguage, $explicitly_specified];
84        }
85
86        // Specified by the user via the browser's Accept Language setting
87        // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
88        $browser_langs = [];
89
90        // Check if we have $_SERVER['HTTP_ACCEPT_LANGUAGE'] set and
91        // it no longer breaks if you only have one language set :)
92        if (isset($acceptLanguageHeader)) {
93            $browser_accept = explode(",", $acceptLanguageHeader);
94
95            // Go through all language preference specs
96            foreach ($browser_accept as $value) {
97                // The language part is either a code or a code with a quality
98                // We cannot do anything with a * code, so it is skipped
99                // If the quality is missing, it is assumed to be 1 according to the RFC
100                if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($value), $found)) {
101                    $quality = (isset($found[3]) ? (float) $found[3] : 1.0);
102                    $browser_langs[] = [$found[1], $quality];
103                }
104                unset($found);
105            }
106        }
107
108        // Order the codes by quality
109        usort($browser_langs, fn ($a, $b) => $b[1] <=> $a[1]);
110
111        // For all languages found in the accept-language
112        foreach ($browser_langs as $langdata) {
113
114            // Translation table for accept-language codes and phpdoc codes
115            switch ($langdata[0]) {
116                case "pt-br":
117                    $langdata[0] = 'pt_br';
118                    break;
119                case "zh-cn":
120                    $langdata[0] = 'zh';
121                    break;
122                case "zh-hk":
123                    $langdata[0] = 'hk';
124                    break;
125                case "zh-tw":
126                    $langdata[0] = 'tw';
127                    break;
128            }
129
130            // We do not support flavors of languages (except the ones above)
131            // This is not in conformance to the RFC, but it here for user
132            // convenience reasons
133            if (preg_match("!^(.+)-!", $langdata[0], $match)) {
134                $langdata[0] = $match[1];
135            }
136
137            $lang = $this->normalize($langdata[0]);
138            if ($this->isAvailableLanguage($lang)) {
139                return [$lang, $explicitly_specified];
140            }
141        }
142
143        // Language preferred by this mirror site
144        if ($this->isAvailableLanguage($this->defaultLanguage)) {
145            return [$this->defaultLanguage, $explicitly_specified];
146        }
147
148        // Last default language is English
149        return ["en", $explicitly_specified];
150    }
151
152    private function normalize(string $langCode): string
153    {
154        // Make language code lowercase, html encode special chars and remove slashes
155        $langCode = strtolower(htmlspecialchars($langCode));
156
157        // The Brazilian Portuguese code needs special attention
158        if ($langCode == 'pt_br') {
159            return 'pt_BR';
160        }
161        return $langCode;
162    }
163
164    private function isAvailableLanguage(string $langCode): bool
165    {
166        return isset($this->availableLanguages[$langCode]) && !isset($this->inactiveLanguages[$langCode]);
167    }
168}
169