1--TEST--
2GH-10192 (mb_detect_encoding() results for UTF-7 differ between PHP 8.0 and 8.1)
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7
8$testcases = [
9    'non-base64 character after +' => 'A + B',
10    'non-base64 character after -' => 'A - B',
11    'base64 character before +' => 'A 1+ B',
12    'base64 character before -' => 'A 1- B',
13    'base64 character after +' => 'A +1 B',
14    'base64 character after -' => 'A -1 B',
15    'base64 character before and after +' => 'A 1+1 B',
16    'base64 character before and after -' => 'A 1-1 B',
17    'string ends with +' => 'A +',
18    'string ends with -' => 'A -',
19    '+ and -' => 'A +- B',
20    '- and +' => 'A -+ B',
21    'valid direct encoding character =' => 'A = B',
22    'invalid direct encoding character ~' => 'A ~ B',
23    'invalid direct encoding character \\' => 'A \\ B',
24    'invalid direct encoding character ESC' => "A \x1b B",
25    'valid direct encoding character = after +' => 'A += B',
26    'invalid direct encoding character ~ after +' => 'A +~ B',
27    'invalid direct encoding character \\ after +' => 'A +\\ B',
28    'invalid direct encoding character ESC after +' => "A +\x1b B",
29    'valid base64 character between + and -' => 'A +ZeVnLIqe- B', // 日本語 in UTF-16BE
30    'invalid base64 character between + and -' => 'A +ZeVnLIq- B', // 日本語 in UTF-16BE without the last character
31    'valid base64 character between + and non-base64 character' => 'A +ZeVnLIqe B',
32    'invalid base64 character between + and non-base64 character' => 'A +ZeVnLIq B',
33    'valid base64 character between + and base64 character' => 'A +ZeVnLIqe1 B',
34    'invalid base64 character between + and base64 character' => 'A +ZeVnLIq1 B',
35    'valid base64 character between + and end of string' => 'A +ZeVnLIqe',
36    'invalid base64 character between + and end of string' => 'A +ZeVnLIq',
37    'valid base64 character consisting only of + between + and -' => 'A +++++++++- B',
38    'invalid base64 character consisting only of + between + and -' => 'A +++++++++- B',
39    'valid base64 character consisting only of + between + and non-base64 character' => 'A +++++++++ B',
40    'invalid base64 character consisting only of + between + and non-base64 character' => 'A +++++++++ B',
41    'valid base64 character consisting only of + between + and base64 character' => 'A +++++++++1 B',
42    'invalid base64 character consisting only of + between + and base64 character' => 'A +++++++++1 B',
43    'valid base64 character consisting only of + between + and end of string' => 'A +++++++++',
44    'invalid base64 character consisting only of + between + and end of string' => 'A +++++++++',
45    'valid base64 character using surrogate pair between + and -' => 'A +2GfePQ- B', // �� in UTF-16BE
46    'first 16 bits of base64 character using surrogate pair between + and -' => 'A +2Gc- B', // first 16 bits of �� in UTF-16BE
47    'valid base64 character using surrogate pair between + and non-base64 character' => 'A +2GfePQ B',
48    'first 16 bits of base64 character using surrogate pair between + and non-base64 character' => 'A +2Gc B',
49    'valid base64 character using surrogate pair between + and base64 character' => 'A +2GfePQ1 B',
50    'first 16 bits of base64 character using surrogate pair between + and base64 character' => 'A +2Gc1 B',
51    'valid base64 character using surrogate pair between + and end of string' => 'A +2GfePQ',
52    'first 16 bits of base64 character using surrogate pair between + and end of string' => 'A +2Gc',
53    'invalid base64 character using surrogate pair in reverse order between + and -' => 'A +3j3YZw- B', // �� in reverse order in UTF-16BE
54    'last 16 bits of base64 character using surrogate pair in reverse order between + and -' => 'A +3j0- B', // last 16 bits of �� in UTF-16BE
55    'invalid base64 character using surrogate pair in reverse order between + and non-base64 character' => 'A +3j3YZw B',
56    'last 16 bits of base64 character using surrogate pair in reverse order between + and non-base64 character' => 'A +3j0 B',
57    'invalid base64 character using surrogate pair in reverse order between + and base64 character' => 'A +3j3YZw1 B',
58    'last 16 bits of base64 character using surrogate pair in reverse order between + and base64 character' => 'A +3j01 B',
59    'invalid base64 character using surrogate pair in reverse order between + and end of string' => 'A +3j3YZw',
60    'last 16 bits of base64 character using surrogate pair in reverse order between + and end of string' => 'A +3j0'
61];
62
63foreach ($testcases as $title => $case) {
64    echo $title . PHP_EOL;
65    var_dump(mb_detect_encoding($case, 'UTF-8, UTF-7', true));
66    var_dump(mb_detect_encoding($case, 'UTF-8, UTF-7', false));
67    var_dump(mb_detect_encoding($case, 'UTF-7', true));
68    var_dump(mb_detect_encoding($case, 'UTF-7', false));
69    var_dump(mb_check_encoding($case, 'UTF-7'));
70    var_dump(addcslashes(mb_convert_encoding($case, 'UTF-8', 'UTF-7'), "\0..\37\177"));
71    var_dump(mb_get_info('illegal_chars'));
72    echo PHP_EOL;
73}
74?>
75--EXPECT--
76non-base64 character after +
77string(5) "UTF-8"
78string(5) "UTF-8"
79bool(false)
80string(5) "UTF-7"
81bool(false)
82string(4) "A  B"
83int(0)
84
85non-base64 character after -
86string(5) "UTF-8"
87string(5) "UTF-8"
88string(5) "UTF-7"
89string(5) "UTF-7"
90bool(true)
91string(5) "A - B"
92int(0)
93
94base64 character before +
95string(5) "UTF-8"
96string(5) "UTF-8"
97bool(false)
98string(5) "UTF-7"
99bool(false)
100string(5) "A 1 B"
101int(0)
102
103base64 character before -
104string(5) "UTF-8"
105string(5) "UTF-8"
106string(5) "UTF-7"
107string(5) "UTF-7"
108bool(true)
109string(6) "A 1- B"
110int(0)
111
112base64 character after +
113string(5) "UTF-8"
114string(5) "UTF-8"
115bool(false)
116string(5) "UTF-7"
117bool(false)
118string(5) "A ? B"
119int(1)
120
121base64 character after -
122string(5) "UTF-8"
123string(5) "UTF-8"
124string(5) "UTF-7"
125string(5) "UTF-7"
126bool(true)
127string(6) "A -1 B"
128int(1)
129
130base64 character before and after +
131string(5) "UTF-8"
132string(5) "UTF-8"
133bool(false)
134string(5) "UTF-7"
135bool(false)
136string(6) "A 1? B"
137int(2)
138
139base64 character before and after -
140string(5) "UTF-8"
141string(5) "UTF-8"
142string(5) "UTF-7"
143string(5) "UTF-7"
144bool(true)
145string(7) "A 1-1 B"
146int(2)
147
148string ends with +
149string(5) "UTF-7"
150string(5) "UTF-7"
151string(5) "UTF-7"
152string(5) "UTF-7"
153bool(true)
154string(2) "A "
155int(2)
156
157string ends with -
158string(5) "UTF-8"
159string(5) "UTF-8"
160string(5) "UTF-7"
161string(5) "UTF-7"
162bool(true)
163string(3) "A -"
164int(2)
165
166+ and -
167string(5) "UTF-7"
168string(5) "UTF-7"
169string(5) "UTF-7"
170string(5) "UTF-7"
171bool(true)
172string(5) "A + B"
173int(2)
174
175- and +
176string(5) "UTF-8"
177string(5) "UTF-8"
178bool(false)
179string(5) "UTF-7"
180bool(false)
181string(5) "A - B"
182int(2)
183
184valid direct encoding character =
185string(5) "UTF-8"
186string(5) "UTF-8"
187string(5) "UTF-7"
188string(5) "UTF-7"
189bool(true)
190string(5) "A = B"
191int(2)
192
193invalid direct encoding character ~
194string(5) "UTF-8"
195string(5) "UTF-8"
196bool(false)
197string(5) "UTF-7"
198bool(false)
199string(5) "A ~ B"
200int(2)
201
202invalid direct encoding character \
203string(5) "UTF-8"
204string(5) "UTF-8"
205bool(false)
206string(5) "UTF-7"
207bool(false)
208string(5) "A \ B"
209int(2)
210
211invalid direct encoding character ESC
212string(5) "UTF-8"
213string(5) "UTF-8"
214bool(false)
215string(5) "UTF-7"
216bool(false)
217string(8) "A \033 B"
218int(2)
219
220valid direct encoding character = after +
221string(5) "UTF-8"
222string(5) "UTF-8"
223bool(false)
224string(5) "UTF-7"
225bool(false)
226string(5) "A = B"
227int(2)
228
229invalid direct encoding character ~ after +
230string(5) "UTF-8"
231string(5) "UTF-8"
232bool(false)
233string(5) "UTF-7"
234bool(false)
235string(5) "A ~ B"
236int(2)
237
238invalid direct encoding character \ after +
239string(5) "UTF-8"
240string(5) "UTF-8"
241bool(false)
242string(5) "UTF-7"
243bool(false)
244string(5) "A \ B"
245int(2)
246
247invalid direct encoding character ESC after +
248string(5) "UTF-8"
249string(5) "UTF-8"
250bool(false)
251string(5) "UTF-7"
252bool(false)
253string(8) "A \033 B"
254int(2)
255
256valid base64 character between + and -
257string(5) "UTF-7"
258string(5) "UTF-7"
259string(5) "UTF-7"
260string(5) "UTF-7"
261bool(true)
262string(13) "A 日本語 B"
263int(2)
264
265invalid base64 character between + and -
266string(5) "UTF-8"
267string(5) "UTF-8"
268bool(false)
269string(5) "UTF-7"
270bool(false)
271string(11) "A 日本? B"
272int(3)
273
274valid base64 character between + and non-base64 character
275string(5) "UTF-7"
276string(5) "UTF-7"
277string(5) "UTF-7"
278string(5) "UTF-7"
279bool(true)
280string(13) "A 日本語 B"
281int(3)
282
283invalid base64 character between + and non-base64 character
284string(5) "UTF-8"
285string(5) "UTF-8"
286bool(false)
287string(5) "UTF-7"
288bool(false)
289string(11) "A 日本? B"
290int(4)
291
292valid base64 character between + and base64 character
293string(5) "UTF-8"
294string(5) "UTF-8"
295bool(false)
296string(5) "UTF-7"
297bool(false)
298string(14) "A 日本語? B"
299int(5)
300
301invalid base64 character between + and base64 character
302string(5) "UTF-8"
303string(5) "UTF-8"
304string(5) "UTF-7"
305string(5) "UTF-7"
306bool(true)
307string(13) "A 日本誵 B"
308int(5)
309
310valid base64 character between + and end of string
311string(5) "UTF-7"
312string(5) "UTF-7"
313string(5) "UTF-7"
314string(5) "UTF-7"
315bool(true)
316string(11) "A 日本語"
317int(5)
318
319invalid base64 character between + and end of string
320string(5) "UTF-8"
321string(5) "UTF-8"
322bool(false)
323string(5) "UTF-7"
324bool(false)
325string(9) "A 日本?"
326int(6)
327
328valid base64 character consisting only of + between + and -
329string(5) "UTF-8"
330string(5) "UTF-8"
331string(5) "UTF-7"
332string(5) "UTF-7"
333bool(true)
334string(13) "A ﯯ뻻 B"
335int(6)
336
337invalid base64 character consisting only of + between + and -
338string(5) "UTF-8"
339string(5) "UTF-8"
340string(5) "UTF-7"
341string(5) "UTF-7"
342bool(true)
343string(13) "A ﯯ뻻 B"
344int(6)
345
346valid base64 character consisting only of + between + and non-base64 character
347string(5) "UTF-8"
348string(5) "UTF-8"
349string(5) "UTF-7"
350string(5) "UTF-7"
351bool(true)
352string(13) "A ﯯ뻻 B"
353int(6)
354
355invalid base64 character consisting only of + between + and non-base64 character
356string(5) "UTF-8"
357string(5) "UTF-8"
358string(5) "UTF-7"
359string(5) "UTF-7"
360bool(true)
361string(13) "A ﯯ뻻 B"
362int(6)
363
364valid base64 character consisting only of + between + and base64 character
365string(5) "UTF-8"
366string(5) "UTF-8"
367bool(false)
368string(5) "UTF-7"
369bool(false)
370string(14) "A ﯯ뻻? B"
371int(7)
372
373invalid base64 character consisting only of + between + and base64 character
374string(5) "UTF-8"
375string(5) "UTF-8"
376bool(false)
377string(5) "UTF-7"
378bool(false)
379string(14) "A ﯯ뻻? B"
380int(8)
381
382valid base64 character consisting only of + between + and end of string
383string(5) "UTF-8"
384string(5) "UTF-8"
385string(5) "UTF-7"
386string(5) "UTF-7"
387bool(true)
388string(11) "A ﯯ뻻"
389int(8)
390
391invalid base64 character consisting only of + between + and end of string
392string(5) "UTF-8"
393string(5) "UTF-8"
394string(5) "UTF-7"
395string(5) "UTF-7"
396bool(true)
397string(11) "A ﯯ뻻"
398int(8)
399
400valid base64 character using surrogate pair between + and -
401string(5) "UTF-8"
402string(5) "UTF-8"
403string(5) "UTF-7"
404string(5) "UTF-7"
405bool(true)
406string(8) "A �� B"
407int(8)
408
409first 16 bits of base64 character using surrogate pair between + and -
410string(5) "UTF-8"
411string(5) "UTF-8"
412bool(false)
413string(5) "UTF-7"
414bool(false)
415string(5) "A ? B"
416int(9)
417
418valid base64 character using surrogate pair between + and non-base64 character
419string(5) "UTF-8"
420string(5) "UTF-8"
421string(5) "UTF-7"
422string(5) "UTF-7"
423bool(true)
424string(8) "A �� B"
425int(9)
426
427first 16 bits of base64 character using surrogate pair between + and non-base64 character
428string(5) "UTF-8"
429string(5) "UTF-8"
430bool(false)
431string(5) "UTF-7"
432bool(false)
433string(5) "A ? B"
434int(10)
435
436valid base64 character using surrogate pair between + and base64 character
437string(5) "UTF-8"
438string(5) "UTF-8"
439bool(false)
440string(5) "UTF-7"
441bool(false)
442string(9) "A ��? B"
443int(11)
444
445first 16 bits of base64 character using surrogate pair between + and base64 character
446string(5) "UTF-8"
447string(5) "UTF-8"
448bool(false)
449string(5) "UTF-7"
450bool(false)
451string(5) "A ? B"
452int(12)
453
454valid base64 character using surrogate pair between + and end of string
455string(5) "UTF-8"
456string(5) "UTF-8"
457string(5) "UTF-7"
458string(5) "UTF-7"
459bool(true)
460string(6) "A ��"
461int(12)
462
463first 16 bits of base64 character using surrogate pair between + and end of string
464string(5) "UTF-8"
465string(5) "UTF-8"
466bool(false)
467string(5) "UTF-7"
468bool(false)
469string(3) "A ?"
470int(13)
471
472invalid base64 character using surrogate pair in reverse order between + and -
473string(5) "UTF-8"
474string(5) "UTF-8"
475bool(false)
476string(5) "UTF-7"
477bool(false)
478string(6) "A ?? B"
479int(15)
480
481last 16 bits of base64 character using surrogate pair in reverse order between + and -
482string(5) "UTF-8"
483string(5) "UTF-8"
484bool(false)
485string(5) "UTF-7"
486bool(false)
487string(5) "A ? B"
488int(16)
489
490invalid base64 character using surrogate pair in reverse order between + and non-base64 character
491string(5) "UTF-8"
492string(5) "UTF-8"
493bool(false)
494string(5) "UTF-7"
495bool(false)
496string(6) "A ?? B"
497int(18)
498
499last 16 bits of base64 character using surrogate pair in reverse order between + and non-base64 character
500string(5) "UTF-8"
501string(5) "UTF-8"
502bool(false)
503string(5) "UTF-7"
504bool(false)
505string(5) "A ? B"
506int(19)
507
508invalid base64 character using surrogate pair in reverse order between + and base64 character
509string(5) "UTF-8"
510string(5) "UTF-8"
511bool(false)
512string(5) "UTF-7"
513bool(false)
514string(6) "A ?? B"
515int(21)
516
517last 16 bits of base64 character using surrogate pair in reverse order between + and base64 character
518string(5) "UTF-8"
519string(5) "UTF-8"
520bool(false)
521string(5) "UTF-7"
522bool(false)
523string(6) "A ?? B"
524int(23)
525
526invalid base64 character using surrogate pair in reverse order between + and end of string
527string(5) "UTF-8"
528string(5) "UTF-8"
529bool(false)
530string(5) "UTF-7"
531bool(false)
532string(4) "A ??"
533int(25)
534
535last 16 bits of base64 character using surrogate pair in reverse order between + and end of string
536string(5) "UTF-8"
537string(5) "UTF-8"
538bool(false)
539string(5) "UTF-7"
540bool(false)
541string(3) "A ?"
542int(26)
543