1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
--TEST--
Test mb_encode_mimeheader() function : test cases found by fuzzer
--EXTENSIONS--
mbstring
--INI--
error_reporting=E_ALL^E_DEPRECATED
--FILE--
<?php
mb_internal_encoding('UTF-8');
var_dump(mb_encode_mimeheader("", "UTF-8", "Q"));
// Regression test for QPrint-encoding of strings with embedded NUL (zero) bytes
var_dump(mb_encode_mimeheader("abc\x00abc", "UTF-8", "Q"));
// Regression test for input strings which end prematurely
var_dump(mb_encode_mimeheader("\xE2", "UTF-8", "B"));
// Handling of ? signs
var_dump(mb_encode_mimeheader("?", "ASCII", "B"));
var_dump(mb_encode_mimeheader("?", "ASCII", "Q"));
// Handling of = signs
var_dump(mb_encode_mimeheader("=", "ASCII", "B"));
var_dump(mb_encode_mimeheader("=", "ASCII", "Q"));
// Handling of underscores
var_dump(mb_encode_mimeheader("_", "ASCII", "B"));
var_dump(mb_encode_mimeheader("_", "ASCII", "Q"));
// Handling of 0x7F (DEL)
var_dump(mb_encode_mimeheader("\x7f", "ASCII", "B", ""));
// Handling of leading spaces
var_dump(mb_encode_mimeheader(" ", "ASCII", "B"));
var_dump(mb_encode_mimeheader(" ", "ASCII", "Q"));
var_dump(mb_encode_mimeheader(" ", "ASCII", "B"));
var_dump(mb_encode_mimeheader(" ", "ASCII", "Q"));
// Try multiple spaces after a word
var_dump(mb_encode_mimeheader("ab ab ", "ASCII", "B"));
var_dump(mb_encode_mimeheader("ab ab ", "ASCII", "Q"));
// Trailing spaces
var_dump(mb_encode_mimeheader("` ", "HZ", "B", ""));
var_dump(mb_encode_mimeheader("S ", "ASCII", "Q", "", 73));
// Regression test: extra spaces should not be added at beginning of ASCII string
// when entire input is one ASCII 'word' and high indent value makes us consider
// adding a line feed
var_dump(mb_encode_mimeheader("S4", "ASCII", "B", "\n", 73));
var_dump(mb_encode_mimeheader("S4", "ASCII", "Q", "\n", 73));
// Regression test: converting UTF-8 to UCS-4 and then QPrint-encoding makes string
// take a vastly larger number of bytes; make sure we don't overrun max line length
var_dump(mb_encode_mimeheader("24\x0a", "UCS-4", "Q", "", 29));
// Regression test: include space after ASCII word when switching to Base64 encoding
var_dump(mb_encode_mimeheader("o\x20\x00", "ASCII", "B"));
// Regression test for buffer overrun while performing Base64-encoding
var_dump(mb_encode_mimeheader("\x00\x11\x01\x00\x00\x00\x00\x00\x00\x00", "UCS-4", "B"));
// Regression test for incorrect calculation of when to stop generating output
var_dump(mb_encode_mimeheader("\x01\x00\xcb\xcb\xcb\xcb\xcb\xcb=\xcb\xcb\xcb=?\x01\x00a\x00\x00\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4?4\xcb\xcb\xcb\xcb\xcb=?\x01\x00\x00\x00\x01\x00\x00\x06\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4\xcb\xcb\xcb\xcb\xcb?4", "UCS-2", "B", ""));
// 'Line feed' string is truncated to no more than 8 bytes long
$linefeed = "=aaaaaa=?";
var_dump(mb_encode_mimeheader("?", "ASCII", "Q", "=aaaaaa=?", 52));
var_dump($linefeed); // Make sure 'line feed' string was not modified
// Regression test: must take ASCII characters already output at beginning of line
// into account when calculating how many QPrint-encoded characters we can output
// without overrunning max line length
var_dump(mb_encode_mimeheader(",\x20o\x00\x01\x00\x00(", "JIS", "Q", "", 40));
// Make sure we maintain legacy behavior when linefeed string contains NUL (zero) bytes
// (We treat the linefeed string as being truncated at that point)
// The reason is because in the original implementation, the linefeed string was a
// null-terminated C string, so including NUL bytes would have the side effect of
// causing only part of the linefeed string to be used
var_dump(mb_encode_mimeheader("\xff", "ASCII", "Q", "\x00", 54));
// Regression test: After we see a non-ASCII character and switch into Base64/QPrint encoding mode,
// we may need to emit a linefeed before we start the next MIME encoded word
// If so, properly record where the line start position is so we can correctly calculate
// how much output can fit on the line
var_dump(mb_encode_mimeheader("\xff~H~\xe0\xea\x00\x00\xff\xff\xff\xff\xff>\x00\x00\x00\x00", "HZ", "Q", "", 71));
// ASCII strings with no spaces should pass through unchanged
var_dump(mb_encode_mimeheader("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5", "BIG-5", "B"));
// Regression test: After decoding part of a line as ASCII, before we switch into Base64/QPrint encoding mode,
// refill our buffer of wchars so we don't hit the end of the buffer in the middle of a line
var_dump(mb_encode_mimeheader("\x20\x20\x20\x202\x20\x20\x20sssssssssssssssssssssssssss\x20\x20\x20\x20W\x20\x20\x20\x20\x20\x20W\x20\x20\x20\x20\xb9S\x01\x00\xf0`\x00\x00\x20\x20\x20\x20mSCII\xee\x20\x20\x20\x20mSCII\xeeI\xee", "ArmSCII-8", "B", ""));
// Regression test: Input string with a huge number of spaces
var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x00", "CP936", "Q", ""));
// Regression test: Long string, all ASCII, but with spaces at the beginning
var_dump(mb_encode_mimeheader("\x20\x201111111111111111111111111111111111111111111111111111111111111111111111111", "ASCII", "Q", ""));
// Only a single character in input, but when we convert it to outcode and then
// transfer-encode it, it takes too many bytes to fit on a single line
// Legacy implementation would always include at least one wchar in each encoded word;
// imitate the same behavior
var_dump(mb_encode_mimeheader("\xe7\xad\xb5", "HZ", "Q", "", 44));
// Regression test: Exploring corner cases of when legacy implementation would output plain ASCII
// with no transfer encoding, and when it would transfer-encode
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3\x20", "GB18030", "Q", ""));
// Change in behavior: The old implementation would output the following string as plain ASCII,
// but the new one transfer-encodes it
// In the general case, matching the old implementation's decision to transfer-encode or not
// perfectly would require allocating potentially unbounded scratch memory (up to the size of
// the input string), but we aim to only use a constant amount of temporarily allocated memory
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
// Regression test for infinite loop which was unintentionally caused when refactoring
var_dump(mb_encode_mimeheader(",9868949,9868978,9869015,9689100,9869121,9869615,9870690,9867116,98558119861183. ", "utf-8", "B"));
var_dump(mb_encode_mimeheader('xx ' . str_repeat("A", 81) . " ", "utf-8", "B"));
// Regression test for problem where MIME encoding loop would not leave enough space in wchar
// buffer for the next iteration, causing an assertion failure
mb_internal_encoding('MacJapanese');
var_dump(mb_encode_mimeheader("ne\xf6\xff\xff\xffs\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1", 'CP50220', 'B', "A", 44));
// Regression test for failing assertion caused by the fact that QPrint deliberately generates no
// wchars for CR (0x0D) bytes
try {
mb_internal_encoding('Quoted-Printable');
var_dump(mb_encode_mimeheader("=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=00=00=00=00=01=00=00=00=00=00=00=00850r=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=0050r=08=0DCP850r850r0r", "Quoted-Printable", "B", "", 184));
} catch (\ValueError $e) {
echo $e->getMessage() . \PHP_EOL;
}
echo "Done";
?>
--EXPECT--
string(0) ""
string(21) "=?UTF-8?Q?abc=00abc?="
string(16) "=?UTF-8?B?Pw==?="
string(19) "=?US-ASCII?B?Pw==?="
string(18) "=?US-ASCII?Q?=3F?="
string(19) "=?US-ASCII?B?PQ==?="
string(18) "=?US-ASCII?Q?=3D?="
string(19) "=?US-ASCII?B?Xw==?="
string(18) "=?US-ASCII?Q?=5F?="
string(19) "=?US-ASCII?B?fw==?="
string(1) " "
string(1) " "
string(3) " "
string(3) " "
string(8) "ab ab "
string(8) "ab ab "
string(1) "`"
string(1) "S"
string(2) "S4"
string(2) "S4"
string(61) "=?UCS-4?Q?=00=00=00=32=00=00=00=34?= =?UCS-4?Q?=00=00=00=0A?="
string(21) "o =?US-ASCII?B?AA==?="
string(68) "=?UCS-4?B?AAAAAAAAABEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==?="
string(271) "=?UCS-2?B?AAEAAAA/AD8APwA/AD8APwA9AD8APwA/AD0APwABAAAAYQAAAAAAPwA/AD8=?= =?UCS-2?B?AD0APwA/AD8APwA/AD8APwA/AD8APwA/ADQAPwA0AD8APwA/AD8APwA9AD8=?= =?UCS-2?B?AAEAAAAAAAAAAQAAAAAABgA/AD8APwA/AD8APwA/AD8APwA9AD8APwA/AD8=?= =?UCS-2?B?AD8APwA/AD8APwA/AD8ANAA/AD8APwA/AD8APwA0?="
string(27) "=aaaaaa= =?US-ASCII?Q?=3F?="
string(9) "=aaaaaa=?"
string(55) ", =?ISO-2022-JP?Q?o=00=01=00=00?= =?ISO-2022-JP?Q?=28?="
string(19) " =?US-ASCII?Q?=3F?="
string(76) " =?HZ-GB-2312?Q?=3F=7E=7EH=7E=7E=3F=3F=00=00=3F=3F=3F=3F=3F=3E=00=00=00=00?="
string(75) "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5"
string(108) " 2 sssssssssssssssssssssssssss W W =?ArmSCII-8?B?ICAgP1MBAD9gAAAgICAgbVNDSUk/ICAgIG1TQ0lJP0k/?="
string(294) "=?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=00?="
string(75) " 1111111111111111111111111111111111111111111111111111111111111111111111111"
string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?="
string(77) "2 !3"
string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?="
string(344) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20!=33?="
string(135) "=?UTF-8?B?LDk4Njg5NDksOTg2ODk3OCw5ODY5MDE1LDk2ODkxMDAsOTg2OTEyMSw5ODY5?=
=?UTF-8?B?NjE1LDk4NzA2OTAsOTg2NzExNiw5ODU1ODExOTg2MTE4My4g?="
string(142) "xx =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB?=
=?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBIA==?="
string(690) "=?ISO-2022-JP?B?bmU/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/cxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MRskQiFEGyhCPxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MQ==?="
mb_encode_mimeheader(): Argument #2 ($charset) "Quoted-Printable" cannot be used for MIME header encoding
Done
|