1--TEST--
2GH-10192 (mb_detect_encoding() results for UTF-7 differ between PHP 8.0 and 8.1)
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7
8$testcases = [
9    'non-base64 character after +' => 'A + B',
10    'non-base64 character after -' => 'A - B',
11    'base64 character before +' => 'A 1+ B',
12    'base64 character before -' => 'A 1- B',
13    'base64 character after +' => 'A +1 B',
14    'base64 character after -' => 'A -1 B',
15    'base64 character before and after +' => 'A 1+1 B',
16    'base64 character before and after -' => 'A 1-1 B',
17    'string ends with +' => 'A +',
18    'string ends with -' => 'A -',
19    '+ and -' => 'A +- B',
20    '- and +' => 'A -+ B',
21    'valid direct encoding character =' => 'A = B',
22    'invalid direct encoding character ~' => 'A ~ B',
23    'invalid direct encoding character \\' => 'A \\ B',
24    'invalid direct encoding character ESC' => "A \x1b B",
25    'valid direct encoding character = after +' => 'A += B',
26    'invalid direct encoding character ~ after +' => 'A +~ B',
27    'invalid direct encoding character \\ after +' => 'A +\\ B',
28    'invalid direct encoding character ESC after +' => "A +\x1b B",
29    'valid base64 character between + and -' => 'A +ZeVnLIqe- B', // 日本語 in UTF-16BE
30    'invalid base64 character between + and -' => 'A +ZeVnLIq- B', // 日本語 in UTF-16BE without the last character
31    'valid base64 character between + and non-base64 character' => 'A +ZeVnLIqe B',
32    'invalid base64 character between + and non-base64 character' => 'A +ZeVnLIq B',
33    'valid base64 character between + and base64 character' => 'A +ZeVnLIqe1 B',
34    'invalid base64 character between + and base64 character' => 'A +ZeVnLIq1 B',
35    'valid base64 character between + and end of string' => 'A +ZeVnLIqe',
36    'invalid base64 character between + and end of string' => 'A +ZeVnLIq',
37    'valid base64 character consisting only of + between + and -' => 'A +++++++++- B',
38    'invalid base64 character consisting only of + between + and -' => 'A +++++++++- B',
39    'valid base64 character consisting only of + between + and non-base64 character' => 'A +++++++++ B',
40    'invalid base64 character consisting only of + between + and non-base64 character' => 'A +++++++++ B',
41    'valid base64 character consisting only of + between + and base64 character' => 'A +++++++++1 B',
42    'invalid base64 character consisting only of + between + and base64 character' => 'A +++++++++1 B',
43    'valid base64 character consisting only of + between + and end of string' => 'A +++++++++',
44    'invalid base64 character consisting only of + between + and end of string' => 'A +++++++++',
45    'valid base64 character using surrogate pair between + and -' => 'A +2GfePQ- B', // �� in UTF-16BE
46    'invalid base64 character using surrogate pair between + and -' => 'A +2Gc- B', // first 16 bits of �� in UTF-16BE
47    'valid base64 character using surrogate pair between + and non-base64 character' => 'A +2GfePQ B',
48    'invalid base64 character using surrogate pair between + and non-base64 character' => 'A +2Gc B',
49    'valid base64 character using surrogate pair between + and base64 character' => 'A +2GfePQ1 B',
50    'invalid base64 character using surrogate pair between + and base64 character' => 'A +2Gc1 B',
51    'valid base64 character using surrogate pair between + and end of string' => 'A +2GfePQ',
52    'invalid base64 character using surrogate pair between + and end of string' => 'A +2Gc'
53];
54
55foreach ($testcases as $title => $case) {
56    echo $title . PHP_EOL;
57    var_dump(mb_detect_encoding($case, 'UTF-8, UTF-7', true));
58    var_dump(mb_detect_encoding($case, 'UTF-8, UTF-7', false));
59    var_dump(mb_detect_encoding($case, 'UTF-7', true));
60    var_dump(mb_detect_encoding($case, 'UTF-7', false));
61    var_dump(mb_check_encoding($case, 'UTF-7'));
62    var_dump(addcslashes(mb_convert_encoding($case, 'UTF-8', 'UTF-7'), "\0..\37\177"));
63    var_dump(mb_get_info('illegal_chars'));
64    echo PHP_EOL;
65}
66?>
67--EXPECT--
68non-base64 character after +
69string(5) "UTF-8"
70string(5) "UTF-7"
71bool(false)
72string(5) "UTF-7"
73bool(false)
74string(4) "A  B"
75int(0)
76
77non-base64 character after -
78string(5) "UTF-8"
79string(5) "UTF-8"
80string(5) "UTF-7"
81string(5) "UTF-7"
82bool(true)
83string(5) "A - B"
84int(0)
85
86base64 character before +
87string(5) "UTF-8"
88string(5) "UTF-7"
89bool(false)
90string(5) "UTF-7"
91bool(false)
92string(5) "A 1 B"
93int(0)
94
95base64 character before -
96string(5) "UTF-8"
97string(5) "UTF-8"
98string(5) "UTF-7"
99string(5) "UTF-7"
100bool(true)
101string(6) "A 1- B"
102int(0)
103
104base64 character after +
105string(5) "UTF-8"
106string(5) "UTF-8"
107bool(false)
108string(5) "UTF-7"
109bool(false)
110string(5) "A ? B"
111int(1)
112
113base64 character after -
114string(5) "UTF-8"
115string(5) "UTF-8"
116string(5) "UTF-7"
117string(5) "UTF-7"
118bool(true)
119string(6) "A -1 B"
120int(1)
121
122base64 character before and after +
123string(5) "UTF-8"
124string(5) "UTF-8"
125bool(false)
126string(5) "UTF-7"
127bool(false)
128string(6) "A 1? B"
129int(2)
130
131base64 character before and after -
132string(5) "UTF-8"
133string(5) "UTF-8"
134string(5) "UTF-7"
135string(5) "UTF-7"
136bool(true)
137string(7) "A 1-1 B"
138int(2)
139
140string ends with +
141string(5) "UTF-7"
142string(5) "UTF-7"
143string(5) "UTF-7"
144string(5) "UTF-7"
145bool(true)
146string(2) "A "
147int(2)
148
149string ends with -
150string(5) "UTF-8"
151string(5) "UTF-8"
152string(5) "UTF-7"
153string(5) "UTF-7"
154bool(true)
155string(3) "A -"
156int(2)
157
158+ and -
159string(5) "UTF-7"
160string(5) "UTF-7"
161string(5) "UTF-7"
162string(5) "UTF-7"
163bool(true)
164string(5) "A + B"
165int(2)
166
167- and +
168string(5) "UTF-8"
169string(5) "UTF-7"
170bool(false)
171string(5) "UTF-7"
172bool(false)
173string(5) "A - B"
174int(2)
175
176valid direct encoding character =
177string(5) "UTF-8"
178string(5) "UTF-8"
179string(5) "UTF-7"
180string(5) "UTF-7"
181bool(true)
182string(5) "A = B"
183int(2)
184
185invalid direct encoding character ~
186string(5) "UTF-8"
187string(5) "UTF-8"
188bool(false)
189string(5) "UTF-7"
190bool(false)
191string(5) "A ~ B"
192int(2)
193
194invalid direct encoding character \
195string(5) "UTF-8"
196string(5) "UTF-8"
197bool(false)
198string(5) "UTF-7"
199bool(false)
200string(5) "A \ B"
201int(2)
202
203invalid direct encoding character ESC
204string(5) "UTF-8"
205string(5) "UTF-8"
206bool(false)
207string(5) "UTF-7"
208bool(false)
209string(8) "A \033 B"
210int(2)
211
212valid direct encoding character = after +
213string(5) "UTF-8"
214string(5) "UTF-7"
215bool(false)
216string(5) "UTF-7"
217bool(false)
218string(5) "A = B"
219int(2)
220
221invalid direct encoding character ~ after +
222string(5) "UTF-8"
223string(5) "UTF-7"
224bool(false)
225string(5) "UTF-7"
226bool(false)
227string(5) "A ~ B"
228int(2)
229
230invalid direct encoding character \ after +
231string(5) "UTF-8"
232string(5) "UTF-7"
233bool(false)
234string(5) "UTF-7"
235bool(false)
236string(5) "A \ B"
237int(2)
238
239invalid direct encoding character ESC after +
240string(5) "UTF-8"
241string(5) "UTF-7"
242bool(false)
243string(5) "UTF-7"
244bool(false)
245string(8) "A \033 B"
246int(2)
247
248valid base64 character between + and -
249string(5) "UTF-7"
250string(5) "UTF-7"
251string(5) "UTF-7"
252string(5) "UTF-7"
253bool(true)
254string(13) "A 日本語 B"
255int(2)
256
257invalid base64 character between + and -
258string(5) "UTF-8"
259string(5) "UTF-8"
260bool(false)
261string(5) "UTF-7"
262bool(false)
263string(11) "A 日本? B"
264int(3)
265
266valid base64 character between + and non-base64 character
267string(5) "UTF-7"
268string(5) "UTF-7"
269string(5) "UTF-7"
270string(5) "UTF-7"
271bool(true)
272string(13) "A 日本語 B"
273int(3)
274
275invalid base64 character between + and non-base64 character
276string(5) "UTF-8"
277string(5) "UTF-8"
278bool(false)
279string(5) "UTF-7"
280bool(false)
281string(11) "A 日本? B"
282int(4)
283
284valid base64 character between + and base64 character
285string(5) "UTF-8"
286string(5) "UTF-8"
287bool(false)
288string(5) "UTF-7"
289bool(false)
290string(14) "A 日本語? B"
291int(5)
292
293invalid base64 character between + and base64 character
294string(5) "UTF-8"
295string(5) "UTF-8"
296string(5) "UTF-7"
297string(5) "UTF-7"
298bool(true)
299string(13) "A 日本誵 B"
300int(5)
301
302valid base64 character between + and end of string
303string(5) "UTF-7"
304string(5) "UTF-7"
305string(5) "UTF-7"
306string(5) "UTF-7"
307bool(true)
308string(11) "A 日本語"
309int(5)
310
311invalid base64 character between + and end of string
312string(5) "UTF-8"
313string(5) "UTF-8"
314bool(false)
315string(5) "UTF-7"
316bool(false)
317string(9) "A 日本?"
318int(6)
319
320valid base64 character consisting only of + between + and -
321string(5) "UTF-8"
322string(5) "UTF-8"
323string(5) "UTF-7"
324string(5) "UTF-7"
325bool(true)
326string(13) "A ﯯ뻻 B"
327int(6)
328
329invalid base64 character consisting only of + between + and -
330string(5) "UTF-8"
331string(5) "UTF-8"
332string(5) "UTF-7"
333string(5) "UTF-7"
334bool(true)
335string(13) "A ﯯ뻻 B"
336int(6)
337
338valid base64 character consisting only of + between + and non-base64 character
339string(5) "UTF-8"
340string(5) "UTF-8"
341string(5) "UTF-7"
342string(5) "UTF-7"
343bool(true)
344string(13) "A ﯯ뻻 B"
345int(6)
346
347invalid base64 character consisting only of + between + and non-base64 character
348string(5) "UTF-8"
349string(5) "UTF-8"
350string(5) "UTF-7"
351string(5) "UTF-7"
352bool(true)
353string(13) "A ﯯ뻻 B"
354int(6)
355
356valid base64 character consisting only of + between + and base64 character
357string(5) "UTF-8"
358string(5) "UTF-8"
359bool(false)
360string(5) "UTF-7"
361bool(false)
362string(14) "A ﯯ뻻? B"
363int(7)
364
365invalid base64 character consisting only of + between + and base64 character
366string(5) "UTF-8"
367string(5) "UTF-8"
368bool(false)
369string(5) "UTF-7"
370bool(false)
371string(14) "A ﯯ뻻? B"
372int(8)
373
374valid base64 character consisting only of + between + and end of string
375string(5) "UTF-8"
376string(5) "UTF-8"
377string(5) "UTF-7"
378string(5) "UTF-7"
379bool(true)
380string(11) "A ﯯ뻻"
381int(8)
382
383invalid base64 character consisting only of + between + and end of string
384string(5) "UTF-8"
385string(5) "UTF-8"
386string(5) "UTF-7"
387string(5) "UTF-7"
388bool(true)
389string(11) "A ﯯ뻻"
390int(8)
391
392valid base64 character using surrogate pair between + and -
393string(5) "UTF-8"
394string(5) "UTF-8"
395string(5) "UTF-7"
396string(5) "UTF-7"
397bool(true)
398string(8) "A �� B"
399int(8)
400
401invalid base64 character using surrogate pair between + and -
402string(5) "UTF-8"
403string(5) "UTF-8"
404bool(false)
405string(5) "UTF-7"
406bool(false)
407string(5) "A ? B"
408int(9)
409
410valid base64 character using surrogate pair between + and non-base64 character
411string(5) "UTF-8"
412string(5) "UTF-8"
413string(5) "UTF-7"
414string(5) "UTF-7"
415bool(true)
416string(8) "A �� B"
417int(9)
418
419invalid base64 character using surrogate pair between + and non-base64 character
420string(5) "UTF-8"
421string(5) "UTF-8"
422bool(false)
423string(5) "UTF-7"
424bool(false)
425string(5) "A ? B"
426int(10)
427
428valid base64 character using surrogate pair between + and base64 character
429string(5) "UTF-8"
430string(5) "UTF-8"
431bool(false)
432string(5) "UTF-7"
433bool(false)
434string(9) "A ��? B"
435int(11)
436
437invalid base64 character using surrogate pair between + and base64 character
438string(5) "UTF-8"
439string(5) "UTF-8"
440bool(false)
441string(5) "UTF-7"
442bool(false)
443string(5) "A ? B"
444int(12)
445
446valid base64 character using surrogate pair between + and end of string
447string(5) "UTF-8"
448string(5) "UTF-8"
449string(5) "UTF-7"
450string(5) "UTF-7"
451bool(true)
452string(6) "A ��"
453int(12)
454
455invalid base64 character using surrogate pair between + and end of string
456string(5) "UTF-8"
457string(5) "UTF-8"
458bool(false)
459string(5) "UTF-7"
460bool(false)
461string(3) "A ?"
462int(13)
463