1--TEST--
2Test mb_encode_mimeheader() function : test cases found by fuzzer
3--EXTENSIONS--
4mbstring
5--INI--
6error_reporting=E_ALL^E_DEPRECATED
7--FILE--
8<?php
9
10mb_internal_encoding('UTF-8');
11
12var_dump(mb_encode_mimeheader("", "UTF-8", "Q"));
13
14// Regression test for QPrint-encoding of strings with embedded NUL (zero) bytes
15var_dump(mb_encode_mimeheader("abc\x00abc", "UTF-8", "Q"));
16
17// Regression test for input strings which end prematurely
18var_dump(mb_encode_mimeheader("\xE2", "UTF-8", "B"));
19
20// Handling of ? signs
21var_dump(mb_encode_mimeheader("?", "ASCII", "B"));
22var_dump(mb_encode_mimeheader("?", "ASCII", "Q"));
23
24// Handling of = signs
25var_dump(mb_encode_mimeheader("=", "ASCII", "B"));
26var_dump(mb_encode_mimeheader("=", "ASCII", "Q"));
27
28// Handling of underscores
29var_dump(mb_encode_mimeheader("_", "ASCII", "B"));
30var_dump(mb_encode_mimeheader("_", "ASCII", "Q"));
31
32// Handling of 0x7F (DEL)
33var_dump(mb_encode_mimeheader("\x7f", "ASCII", "B", ""));
34
35// Handling of leading spaces
36var_dump(mb_encode_mimeheader(" ", "ASCII", "B"));
37var_dump(mb_encode_mimeheader(" ", "ASCII", "Q"));
38var_dump(mb_encode_mimeheader("   ", "ASCII", "B"));
39var_dump(mb_encode_mimeheader("   ", "ASCII", "Q"));
40
41// Try multiple spaces after a word
42var_dump(mb_encode_mimeheader("ab  ab  ", "ASCII", "B"));
43var_dump(mb_encode_mimeheader("ab  ab  ", "ASCII", "Q"));
44
45// Trailing spaces
46var_dump(mb_encode_mimeheader("` ", "HZ", "B", ""));
47var_dump(mb_encode_mimeheader("S ", "ASCII", "Q", "", 73));
48
49// Regression test: extra spaces should not be added at beginning of ASCII string
50// when entire input is one ASCII 'word' and high indent value makes us consider
51// adding a line feed
52var_dump(mb_encode_mimeheader("S4", "ASCII", "B", "\n", 73));
53var_dump(mb_encode_mimeheader("S4", "ASCII", "Q", "\n", 73));
54
55// Regression test: converting UTF-8 to UCS-4 and then QPrint-encoding makes string
56// take a vastly larger number of bytes; make sure we don't overrun max line length
57var_dump(mb_encode_mimeheader("24\x0a", "UCS-4", "Q", "", 29));
58
59// Regression test: include space after ASCII word when switching to Base64 encoding
60var_dump(mb_encode_mimeheader("o\x20\x00", "ASCII", "B"));
61
62// Regression test for buffer overrun while performing Base64-encoding
63var_dump(mb_encode_mimeheader("\x00\x11\x01\x00\x00\x00\x00\x00\x00\x00", "UCS-4", "B"));
64
65// Regression test for incorrect calculation of when to stop generating output
66var_dump(mb_encode_mimeheader("\x01\x00\xcb\xcb\xcb\xcb\xcb\xcb=\xcb\xcb\xcb=?\x01\x00a\x00\x00\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4?4\xcb\xcb\xcb\xcb\xcb=?\x01\x00\x00\x00\x01\x00\x00\x06\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4\xcb\xcb\xcb\xcb\xcb?4", "UCS-2", "B", ""));
67
68// 'Line feed' string is truncated to no more than 8 bytes long
69$linefeed = "=aaaaaa=?";
70var_dump(mb_encode_mimeheader("?", "ASCII", "Q", "=aaaaaa=?", 52));
71var_dump($linefeed); // Make sure 'line feed' string was not modified
72
73// Regression test: must take ASCII characters already output at beginning of line
74// into account when calculating how many QPrint-encoded characters we can output
75// without overrunning max line length
76var_dump(mb_encode_mimeheader(",\x20o\x00\x01\x00\x00(", "JIS", "Q", "", 40));
77
78// Make sure we maintain legacy behavior when linefeed string contains NUL (zero) bytes
79// (We treat the linefeed string as being truncated at that point)
80// The reason is because in the original implementation, the linefeed string was a
81// null-terminated C string, so including NUL bytes would have the side effect of
82// causing only part of the linefeed string to be used
83var_dump(mb_encode_mimeheader("\xff", "ASCII", "Q", "\x00", 54));
84
85// Regression test: After we see a non-ASCII character and switch into Base64/QPrint encoding mode,
86// we may need to emit a linefeed before we start the next MIME encoded word
87// If so, properly record where the line start position is so we can correctly calculate
88// how much output can fit on the line
89var_dump(mb_encode_mimeheader("\xff~H~\xe0\xea\x00\x00\xff\xff\xff\xff\xff>\x00\x00\x00\x00", "HZ", "Q", "", 71));
90
91// ASCII strings with no spaces should pass through unchanged
92var_dump(mb_encode_mimeheader("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5", "BIG-5", "B"));
93
94// Regression test: After decoding part of a line as ASCII, before we switch into Base64/QPrint encoding mode,
95// refill our buffer of wchars so we don't hit the end of the buffer in the middle of a line
96var_dump(mb_encode_mimeheader("\x20\x20\x20\x202\x20\x20\x20sssssssssssssssssssssssssss\x20\x20\x20\x20W\x20\x20\x20\x20\x20\x20W\x20\x20\x20\x20\xb9S\x01\x00\xf0`\x00\x00\x20\x20\x20\x20mSCII\xee\x20\x20\x20\x20mSCII\xeeI\xee", "ArmSCII-8", "B", ""));
97
98// Regression test: Input string with a huge number of spaces
99var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x00", "CP936", "Q", ""));
100
101// Regression test: Long string, all ASCII, but with spaces at the beginning
102var_dump(mb_encode_mimeheader("\x20\x201111111111111111111111111111111111111111111111111111111111111111111111111", "ASCII", "Q", ""));
103
104// Only a single character in input, but when we convert it to outcode and then
105// transfer-encode it, it takes too many bytes to fit on a single line
106// Legacy implementation would always include at least one wchar in each encoded word;
107// imitate the same behavior
108var_dump(mb_encode_mimeheader("\xe7\xad\xb5", "HZ", "Q", "", 44));
109
110// Regression test: Exploring corner cases of when legacy implementation would output plain ASCII
111// with no transfer encoding, and when it would transfer-encode
112var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
113var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3\x20", "GB18030", "Q", ""));
114
115// Change in behavior: The old implementation would output the following string as plain ASCII,
116// but the new one transfer-encodes it
117// In the general case, matching the old implementation's decision to transfer-encode or not
118// perfectly would require allocating potentially unbounded scratch memory (up to the size of
119// the input string), but we aim to only use a constant amount of temporarily allocated memory
120var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
121
122// Regression test for infinite loop which was unintentionally caused when refactoring
123var_dump(mb_encode_mimeheader(",9868949,9868978,9869015,9689100,9869121,9869615,9870690,9867116,98558119861183. ", "utf-8", "B"));
124var_dump(mb_encode_mimeheader('xx ' . str_repeat("A", 81) . " ", "utf-8", "B"));
125
126// Regression test for problem where MIME encoding loop would not leave enough space in wchar
127// buffer for the next iteration, causing an assertion failure
128mb_internal_encoding('MacJapanese');
129var_dump(mb_encode_mimeheader("ne\xf6\xff\xff\xffs\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1", 'CP50220', 'B', "A", 44));
130
131// Regression test for failing assertion caused by the fact that QPrint deliberately generates no
132// wchars for CR (0x0D) bytes
133try {
134	mb_internal_encoding('Quoted-Printable');
135	var_dump(mb_encode_mimeheader("=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=00=00=00=00=01=00=00=00=00=00=00=00850r=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=0050r=08=0DCP850r850r0r", "Quoted-Printable", "B", "", 184));
136} catch (\ValueError $e) {
137	echo $e->getMessage() . \PHP_EOL;
138}
139
140echo "Done";
141?>
142--EXPECT--
143string(0) ""
144string(21) "=?UTF-8?Q?abc=00abc?="
145string(16) "=?UTF-8?B?Pw==?="
146string(19) "=?US-ASCII?B?Pw==?="
147string(18) "=?US-ASCII?Q?=3F?="
148string(19) "=?US-ASCII?B?PQ==?="
149string(18) "=?US-ASCII?Q?=3D?="
150string(19) "=?US-ASCII?B?Xw==?="
151string(18) "=?US-ASCII?Q?=5F?="
152string(19) "=?US-ASCII?B?fw==?="
153string(1) " "
154string(1) " "
155string(3) "   "
156string(3) "   "
157string(8) "ab  ab  "
158string(8) "ab  ab  "
159string(1) "`"
160string(1) "S"
161string(2) "S4"
162string(2) "S4"
163string(61) "=?UCS-4?Q?=00=00=00=32=00=00=00=34?= =?UCS-4?Q?=00=00=00=0A?="
164string(21) "o =?US-ASCII?B?AA==?="
165string(68) "=?UCS-4?B?AAAAAAAAABEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==?="
166string(271) "=?UCS-2?B?AAEAAAA/AD8APwA/AD8APwA9AD8APwA/AD0APwABAAAAYQAAAAAAPwA/AD8=?= =?UCS-2?B?AD0APwA/AD8APwA/AD8APwA/AD8APwA/ADQAPwA0AD8APwA/AD8APwA9AD8=?= =?UCS-2?B?AAEAAAAAAAAAAQAAAAAABgA/AD8APwA/AD8APwA/AD8APwA9AD8APwA/AD8=?= =?UCS-2?B?AD8APwA/AD8APwA/AD8ANAA/AD8APwA/AD8APwA0?="
167string(27) "=aaaaaa= =?US-ASCII?Q?=3F?="
168string(9) "=aaaaaa=?"
169string(55) ", =?ISO-2022-JP?Q?o=00=01=00=00?= =?ISO-2022-JP?Q?=28?="
170string(19) " =?US-ASCII?Q?=3F?="
171string(76) " =?HZ-GB-2312?Q?=3F=7E=7EH=7E=7E=3F=3F=00=00=3F=3F=3F=3F=3F=3E=00=00=00=00?="
172string(75) "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5"
173string(108) "    2   sssssssssssssssssssssssssss    W      W =?ArmSCII-8?B?ICAgP1MBAD9gAAAgICAgbVNDSUk/ICAgIG1TQ0lJP0k/?="
174string(294) "=?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=00?="
175string(75) "  1111111111111111111111111111111111111111111111111111111111111111111111111"
176string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?="
177string(77) "2                                                                          !3"
178string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?="
179string(344) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20!=33?="
180string(135) "=?UTF-8?B?LDk4Njg5NDksOTg2ODk3OCw5ODY5MDE1LDk2ODkxMDAsOTg2OTEyMSw5ODY5?=
181 =?UTF-8?B?NjE1LDk4NzA2OTAsOTg2NzExNiw5ODU1ODExOTg2MTE4My4g?="
182string(142) "xx =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB?=
183 =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBIA==?="
184string(690) "=?ISO-2022-JP?B?bmU/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/cxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MRskQiFEGyhCPxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MQ==?="
185mb_encode_mimeheader(): Argument #2 ($charset) "Quoted-Printable" cannot be used for MIME header encoding
186Done
187