1--TEST--
2GH-10192 (mb_detect_encoding() results for UTF-7 differ between PHP 8.0 and 8.1)
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7
8$testcases = [
9    'non-base64 character after &' => 'A & B',
10    'non-base64 character after -' => 'A - B',
11    'base64 character before &' => 'A 1& B',
12    'base64 character before -' => 'A 1- B',
13    'base64 character after &' => 'A &1 B',
14    'base64 character after -' => 'A -1 B',
15    'base64 character before and after &' => 'A 1&1 B',
16    'base64 character before and after -' => 'A 1-1 B',
17    'string ends with &' => 'A &',
18    'string ends with -' => 'A -',
19    '& and -' => 'A &- B',
20    '- and &' => 'A -& B',
21    'valid direct encoding character ~' => 'A ~ B',
22    'invalid direct encoding character ESC' => "A \x1b B",
23    'valid direct encoding character ~ after &' => 'A &~ B',
24    'invalid direct encoding character ESC after &' => "A &\x1b B",
25    'valid base64 character between & and -' => 'A &ZeVnLIqe- B', // 日本語 in UTF-16BE
26    'invalid base64 character between & and -' => 'A &ZeVnLIq- B', // 日本語 in UTF-16BE without the last character
27    'valid base64 character between & and non-base64 character' => 'A &ZeVnLIqe B',
28    'invalid base64 character between & and non-base64 character' => 'A &ZeVnLIq B',
29    'valid base64 character between & and base64 character' => 'A &ZeVnLIqe1 B',
30    'invalid base64 character between & and base64 character' => 'A &ZeVnLIq1 B',
31    'valid base64 character between & and end of string' => 'A &ZeVnLIqe',
32    'invalid base64 character between & and end of string' => 'A &ZeVnLIq',
33    'valid base64 character using surrogate pair between & and -' => 'A &2GfePQ- B', // �� in UTF-16BE
34    'invalid base64 character using surrogate pair between & and -' => 'A &2Gc- B', // first 16 bits of �� in UTF-16BE
35    'valid base64 character using surrogate pair between & and non-base64 character' => 'A &2GfePQ B',
36    'invalid base64 character using surrogate pair between & and non-base64 character' => 'A &2Gc B',
37    'valid base64 character using surrogate pair between & and base64 character' => 'A &2GfePQ1 B',
38    'invalid base64 character using surrogate pair between & and base64 character' => 'A &2Gc1 B',
39    'valid base64 character using surrogate pair between & and end of string' => 'A &2GfePQ',
40    'invalid base64 character using surrogate pair between & and end of string' => 'A &2Gc'
41];
42
43foreach ($testcases as $title => $case) {
44    echo $title . PHP_EOL;
45    var_dump(mb_detect_encoding($case, 'UTF-8, UTF7-IMAP', true));
46    var_dump(mb_detect_encoding($case, 'UTF-8, UTF7-IMAP', false));
47    var_dump(mb_detect_encoding($case, 'UTF7-IMAP', true));
48    var_dump(mb_detect_encoding($case, 'UTF7-IMAP', false));
49    var_dump(mb_check_encoding($case, 'UTF7-IMAP'));
50    var_dump(addcslashes(mb_convert_encoding($case, 'UTF-8', 'UTF7-IMAP'), "\0..\37\177"));
51    var_dump(mb_get_info('illegal_chars'));
52    echo PHP_EOL;
53}
54
55?>
56--EXPECT--
57non-base64 character after &
58string(5) "UTF-8"
59string(5) "UTF-8"
60bool(false)
61string(9) "UTF7-IMAP"
62bool(false)
63string(4) "A ?B"
64int(1)
65
66non-base64 character after -
67string(5) "UTF-8"
68string(5) "UTF-8"
69string(9) "UTF7-IMAP"
70string(9) "UTF7-IMAP"
71bool(true)
72string(5) "A - B"
73int(1)
74
75base64 character before &
76string(5) "UTF-8"
77string(5) "UTF-8"
78bool(false)
79string(9) "UTF7-IMAP"
80bool(false)
81string(5) "A 1?B"
82int(2)
83
84base64 character before -
85string(5) "UTF-8"
86string(5) "UTF-8"
87string(9) "UTF7-IMAP"
88string(9) "UTF7-IMAP"
89bool(true)
90string(6) "A 1- B"
91int(2)
92
93base64 character after &
94string(5) "UTF-8"
95string(5) "UTF-8"
96bool(false)
97string(9) "UTF7-IMAP"
98bool(false)
99string(4) "A ?B"
100int(3)
101
102base64 character after -
103string(5) "UTF-8"
104string(5) "UTF-8"
105string(9) "UTF7-IMAP"
106string(9) "UTF7-IMAP"
107bool(true)
108string(6) "A -1 B"
109int(3)
110
111base64 character before and after &
112string(5) "UTF-8"
113string(5) "UTF-8"
114bool(false)
115string(9) "UTF7-IMAP"
116bool(false)
117string(5) "A 1?B"
118int(4)
119
120base64 character before and after -
121string(5) "UTF-8"
122string(5) "UTF-8"
123string(9) "UTF7-IMAP"
124string(9) "UTF7-IMAP"
125bool(true)
126string(7) "A 1-1 B"
127int(4)
128
129string ends with &
130string(5) "UTF-8"
131string(5) "UTF-8"
132bool(false)
133string(9) "UTF7-IMAP"
134bool(false)
135string(3) "A ?"
136int(5)
137
138string ends with -
139string(5) "UTF-8"
140string(5) "UTF-8"
141string(9) "UTF7-IMAP"
142string(9) "UTF7-IMAP"
143bool(true)
144string(3) "A -"
145int(5)
146
147& and -
148string(9) "UTF7-IMAP"
149string(9) "UTF7-IMAP"
150string(9) "UTF7-IMAP"
151string(9) "UTF7-IMAP"
152bool(true)
153string(5) "A & B"
154int(5)
155
156- and &
157string(5) "UTF-8"
158string(5) "UTF-8"
159bool(false)
160string(9) "UTF7-IMAP"
161bool(false)
162string(5) "A -?B"
163int(6)
164
165valid direct encoding character ~
166string(5) "UTF-8"
167string(5) "UTF-8"
168string(9) "UTF7-IMAP"
169string(9) "UTF7-IMAP"
170bool(true)
171string(5) "A ~ B"
172int(6)
173
174invalid direct encoding character ESC
175string(5) "UTF-8"
176string(5) "UTF-8"
177bool(false)
178string(9) "UTF7-IMAP"
179bool(false)
180string(5) "A ? B"
181int(7)
182
183valid direct encoding character ~ after &
184string(5) "UTF-8"
185string(5) "UTF-8"
186bool(false)
187string(9) "UTF7-IMAP"
188bool(false)
189string(5) "A ? B"
190int(8)
191
192invalid direct encoding character ESC after &
193string(5) "UTF-8"
194string(5) "UTF-8"
195bool(false)
196string(9) "UTF7-IMAP"
197bool(false)
198string(5) "A ? B"
199int(9)
200
201valid base64 character between & and -
202string(9) "UTF7-IMAP"
203string(9) "UTF7-IMAP"
204string(9) "UTF7-IMAP"
205string(9) "UTF7-IMAP"
206bool(true)
207string(13) "A 日本語 B"
208int(9)
209
210invalid base64 character between & and -
211string(5) "UTF-8"
212string(5) "UTF-8"
213bool(false)
214string(9) "UTF7-IMAP"
215bool(false)
216string(11) "A 日本? B"
217int(10)
218
219valid base64 character between & and non-base64 character
220string(5) "UTF-8"
221string(5) "UTF-8"
222bool(false)
223string(9) "UTF7-IMAP"
224bool(false)
225string(13) "A 日本語?B"
226int(11)
227
228invalid base64 character between & and non-base64 character
229string(5) "UTF-8"
230string(5) "UTF-8"
231bool(false)
232string(9) "UTF7-IMAP"
233bool(false)
234string(10) "A 日本?B"
235int(12)
236
237valid base64 character between & and base64 character
238string(5) "UTF-8"
239string(5) "UTF-8"
240bool(false)
241string(9) "UTF7-IMAP"
242bool(false)
243string(13) "A 日本語?B"
244int(13)
245
246invalid base64 character between & and base64 character
247string(5) "UTF-8"
248string(5) "UTF-8"
249bool(false)
250string(9) "UTF7-IMAP"
251bool(false)
252string(13) "A 日本誵?B"
253int(14)
254
255valid base64 character between & and end of string
256string(5) "UTF-8"
257string(5) "UTF-8"
258bool(false)
259string(9) "UTF7-IMAP"
260bool(false)
261string(12) "A 日本語?"
262int(15)
263
264invalid base64 character between & and end of string
265string(5) "UTF-8"
266string(5) "UTF-8"
267bool(false)
268string(9) "UTF7-IMAP"
269bool(false)
270string(9) "A 日本?"
271int(16)
272
273valid base64 character using surrogate pair between & and -
274string(5) "UTF-8"
275string(5) "UTF-8"
276string(9) "UTF7-IMAP"
277string(9) "UTF7-IMAP"
278bool(true)
279string(8) "A �� B"
280int(16)
281
282invalid base64 character using surrogate pair between & and -
283string(5) "UTF-8"
284string(5) "UTF-8"
285bool(false)
286string(9) "UTF7-IMAP"
287bool(false)
288string(5) "A ? B"
289int(17)
290
291valid base64 character using surrogate pair between & and non-base64 character
292string(5) "UTF-8"
293string(5) "UTF-8"
294bool(false)
295string(9) "UTF7-IMAP"
296bool(false)
297string(8) "A ��?B"
298int(18)
299
300invalid base64 character using surrogate pair between & and non-base64 character
301string(5) "UTF-8"
302string(5) "UTF-8"
303bool(false)
304string(9) "UTF7-IMAP"
305bool(false)
306string(4) "A ?B"
307int(19)
308
309valid base64 character using surrogate pair between & and base64 character
310string(5) "UTF-8"
311string(5) "UTF-8"
312bool(false)
313string(9) "UTF7-IMAP"
314bool(false)
315string(8) "A ��?B"
316int(20)
317
318invalid base64 character using surrogate pair between & and base64 character
319string(5) "UTF-8"
320string(5) "UTF-8"
321bool(false)
322string(9) "UTF7-IMAP"
323bool(false)
324string(4) "A ?B"
325int(21)
326
327valid base64 character using surrogate pair between & and end of string
328string(5) "UTF-8"
329string(5) "UTF-8"
330bool(false)
331string(9) "UTF7-IMAP"
332bool(false)
333string(7) "A ��?"
334int(22)
335
336invalid base64 character using surrogate pair between & and end of string
337string(5) "UTF-8"
338string(5) "UTF-8"
339bool(false)
340string(9) "UTF7-IMAP"
341bool(false)
342string(3) "A ?"
343int(23)
344