1--TEST--
2GH-10192 (mb_detect_encoding() results for UTF-7 differ between PHP 8.0 and 8.1)
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7
8$testcases = [
9    'non-base64 character after &' => 'A & B',
10    'non-base64 character after -' => 'A - B',
11    'base64 character before &' => 'A 1& B',
12    'base64 character before -' => 'A 1- B',
13    'base64 character after &' => 'A &1 B',
14    'base64 character after -' => 'A -1 B',
15    'base64 character before and after &' => 'A 1&1 B',
16    'base64 character before and after -' => 'A 1-1 B',
17    'string ends with &' => 'A &',
18    'string ends with -' => 'A -',
19    '& and -' => 'A &- B',
20    '- and &' => 'A -& B',
21    'valid direct encoding character ~' => 'A ~ B',
22    'invalid direct encoding character ESC' => "A \x1b B",
23    'valid direct encoding character ~ after &' => 'A &~ B',
24    'invalid direct encoding character ESC after &' => "A &\x1b B",
25    'valid base64 character between & and -' => 'A &ZeVnLIqe- B', // 日本語 in UTF-16BE
26    'invalid base64 character between & and -' => 'A &ZeVnLIq- B', // 日本語 in UTF-16BE without the last character
27    'valid base64 character between & and non-base64 character' => 'A &ZeVnLIqe B',
28    'invalid base64 character between & and non-base64 character' => 'A &ZeVnLIq B',
29    'valid base64 character between & and base64 character' => 'A &ZeVnLIqe1 B',
30    'invalid base64 character between & and base64 character' => 'A &ZeVnLIq1 B',
31    'valid base64 character between & and end of string' => 'A &ZeVnLIqe',
32    'invalid base64 character between & and end of string' => 'A &ZeVnLIq',
33    'valid base64 character using surrogate pair between & and -' => 'A &2GfePQ- B', // �� in UTF-16BE
34    'first 16 bits of base64 character using surrogate pair between & and -' => 'A &2Gc- B', // first 16 bits of �� in UTF-16BE
35    'valid base64 character using surrogate pair between & and non-base64 character' => 'A &2GfePQ B',
36    'first 16 bits of base64 character using surrogate pair between & and non-base64 character' => 'A &2Gc B',
37    'valid base64 character using surrogate pair between & and base64 character' => 'A &2GfePQ1 B',
38    'first 16 bits of base64 character using surrogate pair between & and base64 character' => 'A &2Gc1 B',
39    'valid base64 character using surrogate pair between & and end of string' => 'A &2GfePQ',
40    'first 16 bits of base64 character using surrogate pair between & and end of string' => 'A &2Gc',
41    'invalid base64 character using surrogate pair in reverse order between & and -' => 'A &3j3YZw- B', // �� in reverse order in UTF-16BE
42    'last 16 bits of base64 character using surrogate pair in reverse order between & and -' => 'A &3j0- B', // last 16 bits of �� in UTF-16BE
43    'invalid base64 character using surrogate pair in reverse order between & and non-base64 character' => 'A &3j3YZw B',
44    'last 16 bits of base64 character using surrogate pair in reverse order between & and non-base64 character' => 'A &3j0 B',
45    'invalid base64 character using surrogate pair in reverse order between & and base64 character' => 'A &3j3YZw1 B',
46    'last 16 bits of base64 character using surrogate pair in reverse order between & and base64 character' => 'A &3j01 B',
47    'invalid base64 character using surrogate pair in reverse order between & and end of string' => 'A &3j3YZw',
48    'last 16 bits of base64 character using surrogate pair in reverse order between & and end of string' => 'A &3j0'
49];
50
51foreach ($testcases as $title => $case) {
52    echo $title . PHP_EOL;
53    var_dump(mb_detect_encoding($case, 'UTF-8, UTF7-IMAP', true));
54    var_dump(mb_detect_encoding($case, 'UTF-8, UTF7-IMAP', false));
55    var_dump(mb_detect_encoding($case, 'UTF7-IMAP', true));
56    var_dump(mb_detect_encoding($case, 'UTF7-IMAP', false));
57    var_dump(mb_check_encoding($case, 'UTF7-IMAP'));
58    var_dump(addcslashes(mb_convert_encoding($case, 'UTF-8', 'UTF7-IMAP'), "\0..\37\177"));
59    var_dump(mb_get_info('illegal_chars'));
60    echo PHP_EOL;
61}
62
63?>
64--EXPECT--
65non-base64 character after &
66string(5) "UTF-8"
67string(5) "UTF-8"
68bool(false)
69string(9) "UTF7-IMAP"
70bool(false)
71string(4) "A ?B"
72int(1)
73
74non-base64 character after -
75string(5) "UTF-8"
76string(5) "UTF-8"
77string(9) "UTF7-IMAP"
78string(9) "UTF7-IMAP"
79bool(true)
80string(5) "A - B"
81int(1)
82
83base64 character before &
84string(5) "UTF-8"
85string(5) "UTF-8"
86bool(false)
87string(9) "UTF7-IMAP"
88bool(false)
89string(5) "A 1?B"
90int(2)
91
92base64 character before -
93string(5) "UTF-8"
94string(5) "UTF-8"
95string(9) "UTF7-IMAP"
96string(9) "UTF7-IMAP"
97bool(true)
98string(6) "A 1- B"
99int(2)
100
101base64 character after &
102string(5) "UTF-8"
103string(5) "UTF-8"
104bool(false)
105string(9) "UTF7-IMAP"
106bool(false)
107string(4) "A ?B"
108int(3)
109
110base64 character after -
111string(5) "UTF-8"
112string(5) "UTF-8"
113string(9) "UTF7-IMAP"
114string(9) "UTF7-IMAP"
115bool(true)
116string(6) "A -1 B"
117int(3)
118
119base64 character before and after &
120string(5) "UTF-8"
121string(5) "UTF-8"
122bool(false)
123string(9) "UTF7-IMAP"
124bool(false)
125string(5) "A 1?B"
126int(4)
127
128base64 character before and after -
129string(5) "UTF-8"
130string(5) "UTF-8"
131string(9) "UTF7-IMAP"
132string(9) "UTF7-IMAP"
133bool(true)
134string(7) "A 1-1 B"
135int(4)
136
137string ends with &
138string(5) "UTF-8"
139string(5) "UTF-8"
140bool(false)
141string(9) "UTF7-IMAP"
142bool(false)
143string(3) "A ?"
144int(5)
145
146string ends with -
147string(5) "UTF-8"
148string(5) "UTF-8"
149string(9) "UTF7-IMAP"
150string(9) "UTF7-IMAP"
151bool(true)
152string(3) "A -"
153int(5)
154
155& and -
156string(9) "UTF7-IMAP"
157string(9) "UTF7-IMAP"
158string(9) "UTF7-IMAP"
159string(9) "UTF7-IMAP"
160bool(true)
161string(5) "A & B"
162int(5)
163
164- and &
165string(5) "UTF-8"
166string(5) "UTF-8"
167bool(false)
168string(9) "UTF7-IMAP"
169bool(false)
170string(5) "A -?B"
171int(6)
172
173valid direct encoding character ~
174string(5) "UTF-8"
175string(5) "UTF-8"
176string(9) "UTF7-IMAP"
177string(9) "UTF7-IMAP"
178bool(true)
179string(5) "A ~ B"
180int(6)
181
182invalid direct encoding character ESC
183string(5) "UTF-8"
184string(5) "UTF-8"
185bool(false)
186string(9) "UTF7-IMAP"
187bool(false)
188string(5) "A ? B"
189int(7)
190
191valid direct encoding character ~ after &
192string(5) "UTF-8"
193string(5) "UTF-8"
194bool(false)
195string(9) "UTF7-IMAP"
196bool(false)
197string(5) "A ? B"
198int(8)
199
200invalid direct encoding character ESC after &
201string(5) "UTF-8"
202string(5) "UTF-8"
203bool(false)
204string(9) "UTF7-IMAP"
205bool(false)
206string(5) "A ? B"
207int(9)
208
209valid base64 character between & and -
210string(9) "UTF7-IMAP"
211string(9) "UTF7-IMAP"
212string(9) "UTF7-IMAP"
213string(9) "UTF7-IMAP"
214bool(true)
215string(13) "A 日本語 B"
216int(9)
217
218invalid base64 character between & and -
219string(5) "UTF-8"
220string(5) "UTF-8"
221bool(false)
222string(9) "UTF7-IMAP"
223bool(false)
224string(11) "A 日本? B"
225int(10)
226
227valid base64 character between & and non-base64 character
228string(5) "UTF-8"
229string(5) "UTF-8"
230bool(false)
231string(9) "UTF7-IMAP"
232bool(false)
233string(13) "A 日本語?B"
234int(11)
235
236invalid base64 character between & and non-base64 character
237string(5) "UTF-8"
238string(5) "UTF-8"
239bool(false)
240string(9) "UTF7-IMAP"
241bool(false)
242string(10) "A 日本?B"
243int(12)
244
245valid base64 character between & and base64 character
246string(5) "UTF-8"
247string(5) "UTF-8"
248bool(false)
249string(9) "UTF7-IMAP"
250bool(false)
251string(13) "A 日本語?B"
252int(13)
253
254invalid base64 character between & and base64 character
255string(5) "UTF-8"
256string(5) "UTF-8"
257bool(false)
258string(9) "UTF7-IMAP"
259bool(false)
260string(13) "A 日本誵?B"
261int(14)
262
263valid base64 character between & and end of string
264string(5) "UTF-8"
265string(5) "UTF-8"
266bool(false)
267string(9) "UTF7-IMAP"
268bool(false)
269string(12) "A 日本語?"
270int(15)
271
272invalid base64 character between & and end of string
273string(5) "UTF-8"
274string(5) "UTF-8"
275bool(false)
276string(9) "UTF7-IMAP"
277bool(false)
278string(9) "A 日本?"
279int(16)
280
281valid base64 character using surrogate pair between & and -
282string(5) "UTF-8"
283string(5) "UTF-8"
284string(9) "UTF7-IMAP"
285string(9) "UTF7-IMAP"
286bool(true)
287string(8) "A �� B"
288int(16)
289
290first 16 bits of base64 character using surrogate pair between & and -
291string(5) "UTF-8"
292string(5) "UTF-8"
293bool(false)
294string(9) "UTF7-IMAP"
295bool(false)
296string(5) "A ? B"
297int(17)
298
299valid base64 character using surrogate pair between & and non-base64 character
300string(5) "UTF-8"
301string(5) "UTF-8"
302bool(false)
303string(9) "UTF7-IMAP"
304bool(false)
305string(8) "A ��?B"
306int(18)
307
308first 16 bits of base64 character using surrogate pair between & and non-base64 character
309string(5) "UTF-8"
310string(5) "UTF-8"
311bool(false)
312string(9) "UTF7-IMAP"
313bool(false)
314string(4) "A ?B"
315int(19)
316
317valid base64 character using surrogate pair between & and base64 character
318string(5) "UTF-8"
319string(5) "UTF-8"
320bool(false)
321string(9) "UTF7-IMAP"
322bool(false)
323string(8) "A ��?B"
324int(20)
325
326first 16 bits of base64 character using surrogate pair between & and base64 character
327string(5) "UTF-8"
328string(5) "UTF-8"
329bool(false)
330string(9) "UTF7-IMAP"
331bool(false)
332string(4) "A ?B"
333int(21)
334
335valid base64 character using surrogate pair between & and end of string
336string(5) "UTF-8"
337string(5) "UTF-8"
338bool(false)
339string(9) "UTF7-IMAP"
340bool(false)
341string(7) "A ��?"
342int(22)
343
344first 16 bits of base64 character using surrogate pair between & and end of string
345string(5) "UTF-8"
346string(5) "UTF-8"
347bool(false)
348string(9) "UTF7-IMAP"
349bool(false)
350string(4) "A ??"
351int(24)
352
353invalid base64 character using surrogate pair in reverse order between & and -
354string(5) "UTF-8"
355string(5) "UTF-8"
356bool(false)
357string(9) "UTF7-IMAP"
358bool(false)
359string(6) "A ?? B"
360int(26)
361
362last 16 bits of base64 character using surrogate pair in reverse order between & and -
363string(5) "UTF-8"
364string(5) "UTF-8"
365bool(false)
366string(9) "UTF7-IMAP"
367bool(false)
368string(5) "A ? B"
369int(27)
370
371invalid base64 character using surrogate pair in reverse order between & and non-base64 character
372string(5) "UTF-8"
373string(5) "UTF-8"
374bool(false)
375string(9) "UTF7-IMAP"
376bool(false)
377string(5) "A ??B"
378int(29)
379
380last 16 bits of base64 character using surrogate pair in reverse order between & and non-base64 character
381string(5) "UTF-8"
382string(5) "UTF-8"
383bool(false)
384string(9) "UTF7-IMAP"
385bool(false)
386string(5) "A ??B"
387int(31)
388
389invalid base64 character using surrogate pair in reverse order between & and base64 character
390string(5) "UTF-8"
391string(5) "UTF-8"
392bool(false)
393string(9) "UTF7-IMAP"
394bool(false)
395string(5) "A ??B"
396int(33)
397
398last 16 bits of base64 character using surrogate pair in reverse order between & and base64 character
399string(5) "UTF-8"
400string(5) "UTF-8"
401bool(false)
402string(9) "UTF7-IMAP"
403bool(false)
404string(5) "A ??B"
405int(35)
406
407invalid base64 character using surrogate pair in reverse order between & and end of string
408string(5) "UTF-8"
409string(5) "UTF-8"
410bool(false)
411string(9) "UTF7-IMAP"
412bool(false)
413string(5) "A ???"
414int(38)
415
416last 16 bits of base64 character using surrogate pair in reverse order between & and end of string
417string(5) "UTF-8"
418string(5) "UTF-8"
419bool(false)
420string(9) "UTF7-IMAP"
421bool(false)
422string(4) "A ??"
423int(40)
424