File: mb_encode_mimeheader_basic4.phpt

package info (click to toggle)
php8.4 8.4.11-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 208,108 kB
  • sloc: ansic: 1,060,628; php: 35,345; sh: 11,866; cpp: 7,201; pascal: 4,913; javascript: 3,091; asm: 2,810; yacc: 2,411; makefile: 689; xml: 446; python: 301; awk: 148
file content (186 lines) | stat: -rw-r--r-- 12,464 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
--TEST--
Test mb_encode_mimeheader() function : test cases found by fuzzer
--EXTENSIONS--
mbstring
--INI--
error_reporting=E_ALL^E_DEPRECATED
--FILE--
<?php

mb_internal_encoding('UTF-8');

var_dump(mb_encode_mimeheader("", "UTF-8", "Q"));

// Regression test for QPrint-encoding of strings with embedded NUL (zero) bytes
var_dump(mb_encode_mimeheader("abc\x00abc", "UTF-8", "Q"));

// Regression test for input strings which end prematurely
var_dump(mb_encode_mimeheader("\xE2", "UTF-8", "B"));

// Handling of ? signs
var_dump(mb_encode_mimeheader("?", "ASCII", "B"));
var_dump(mb_encode_mimeheader("?", "ASCII", "Q"));

// Handling of = signs
var_dump(mb_encode_mimeheader("=", "ASCII", "B"));
var_dump(mb_encode_mimeheader("=", "ASCII", "Q"));

// Handling of underscores
var_dump(mb_encode_mimeheader("_", "ASCII", "B"));
var_dump(mb_encode_mimeheader("_", "ASCII", "Q"));

// Handling of 0x7F (DEL)
var_dump(mb_encode_mimeheader("\x7f", "ASCII", "B", ""));

// Handling of leading spaces
var_dump(mb_encode_mimeheader(" ", "ASCII", "B"));
var_dump(mb_encode_mimeheader(" ", "ASCII", "Q"));
var_dump(mb_encode_mimeheader("   ", "ASCII", "B"));
var_dump(mb_encode_mimeheader("   ", "ASCII", "Q"));

// Try multiple spaces after a word
var_dump(mb_encode_mimeheader("ab  ab  ", "ASCII", "B"));
var_dump(mb_encode_mimeheader("ab  ab  ", "ASCII", "Q"));

// Trailing spaces
var_dump(mb_encode_mimeheader("` ", "HZ", "B", ""));
var_dump(mb_encode_mimeheader("S ", "ASCII", "Q", "", 73));

// Regression test: extra spaces should not be added at beginning of ASCII string
// when entire input is one ASCII 'word' and high indent value makes us consider
// adding a line feed
var_dump(mb_encode_mimeheader("S4", "ASCII", "B", "\n", 73));
var_dump(mb_encode_mimeheader("S4", "ASCII", "Q", "\n", 73));

// Regression test: converting UTF-8 to UCS-4 and then QPrint-encoding makes string
// take a vastly larger number of bytes; make sure we don't overrun max line length
var_dump(mb_encode_mimeheader("24\x0a", "UCS-4", "Q", "", 29));

// Regression test: include space after ASCII word when switching to Base64 encoding
var_dump(mb_encode_mimeheader("o\x20\x00", "ASCII", "B"));

// Regression test for buffer overrun while performing Base64-encoding
var_dump(mb_encode_mimeheader("\x00\x11\x01\x00\x00\x00\x00\x00\x00\x00", "UCS-4", "B"));

// Regression test for incorrect calculation of when to stop generating output
var_dump(mb_encode_mimeheader("\x01\x00\xcb\xcb\xcb\xcb\xcb\xcb=\xcb\xcb\xcb=?\x01\x00a\x00\x00\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4?4\xcb\xcb\xcb\xcb\xcb=?\x01\x00\x00\x00\x01\x00\x00\x06\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4\xcb\xcb\xcb\xcb\xcb?4", "UCS-2", "B", ""));

// 'Line feed' string is truncated to no more than 8 bytes long
$linefeed = "=aaaaaa=?";
var_dump(mb_encode_mimeheader("?", "ASCII", "Q", "=aaaaaa=?", 52));
var_dump($linefeed); // Make sure 'line feed' string was not modified

// Regression test: must take ASCII characters already output at beginning of line
// into account when calculating how many QPrint-encoded characters we can output
// without overrunning max line length
var_dump(mb_encode_mimeheader(",\x20o\x00\x01\x00\x00(", "JIS", "Q", "", 40));

// Make sure we maintain legacy behavior when linefeed string contains NUL (zero) bytes
// (We treat the linefeed string as being truncated at that point)
// The reason is because in the original implementation, the linefeed string was a
// null-terminated C string, so including NUL bytes would have the side effect of
// causing only part of the linefeed string to be used
var_dump(mb_encode_mimeheader("\xff", "ASCII", "Q", "\x00", 54));

// Regression test: After we see a non-ASCII character and switch into Base64/QPrint encoding mode,
// we may need to emit a linefeed before we start the next MIME encoded word
// If so, properly record where the line start position is so we can correctly calculate
// how much output can fit on the line
var_dump(mb_encode_mimeheader("\xff~H~\xe0\xea\x00\x00\xff\xff\xff\xff\xff>\x00\x00\x00\x00", "HZ", "Q", "", 71));

// ASCII strings with no spaces should pass through unchanged
var_dump(mb_encode_mimeheader("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5", "BIG-5", "B"));

// Regression test: After decoding part of a line as ASCII, before we switch into Base64/QPrint encoding mode,
// refill our buffer of wchars so we don't hit the end of the buffer in the middle of a line
var_dump(mb_encode_mimeheader("\x20\x20\x20\x202\x20\x20\x20sssssssssssssssssssssssssss\x20\x20\x20\x20W\x20\x20\x20\x20\x20\x20W\x20\x20\x20\x20\xb9S\x01\x00\xf0`\x00\x00\x20\x20\x20\x20mSCII\xee\x20\x20\x20\x20mSCII\xeeI\xee", "ArmSCII-8", "B", ""));

// Regression test: Input string with a huge number of spaces
var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x00", "CP936", "Q", ""));

// Regression test: Long string, all ASCII, but with spaces at the beginning
var_dump(mb_encode_mimeheader("\x20\x201111111111111111111111111111111111111111111111111111111111111111111111111", "ASCII", "Q", ""));

// Only a single character in input, but when we convert it to outcode and then
// transfer-encode it, it takes too many bytes to fit on a single line
// Legacy implementation would always include at least one wchar in each encoded word;
// imitate the same behavior
var_dump(mb_encode_mimeheader("\xe7\xad\xb5", "HZ", "Q", "", 44));

// Regression test: Exploring corner cases of when legacy implementation would output plain ASCII
// with no transfer encoding, and when it would transfer-encode
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3\x20", "GB18030", "Q", ""));

// Change in behavior: The old implementation would output the following string as plain ASCII,
// but the new one transfer-encodes it
// In the general case, matching the old implementation's decision to transfer-encode or not
// perfectly would require allocating potentially unbounded scratch memory (up to the size of
// the input string), but we aim to only use a constant amount of temporarily allocated memory
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));

// Regression test for infinite loop which was unintentionally caused when refactoring
var_dump(mb_encode_mimeheader(",9868949,9868978,9869015,9689100,9869121,9869615,9870690,9867116,98558119861183. ", "utf-8", "B"));
var_dump(mb_encode_mimeheader('xx ' . str_repeat("A", 81) . " ", "utf-8", "B"));

// Regression test for problem where MIME encoding loop would not leave enough space in wchar
// buffer for the next iteration, causing an assertion failure
mb_internal_encoding('MacJapanese');
var_dump(mb_encode_mimeheader("ne\xf6\xff\xff\xffs\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1", 'CP50220', 'B', "A", 44));

// Regression test for failing assertion caused by the fact that QPrint deliberately generates no
// wchars for CR (0x0D) bytes
try {
	mb_internal_encoding('Quoted-Printable');
	var_dump(mb_encode_mimeheader("=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=00=00=00=00=01=00=00=00=00=00=00=00850r=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=0050r=08=0DCP850r850r0r", "Quoted-Printable", "B", "", 184));
} catch (\ValueError $e) {
	echo $e->getMessage() . \PHP_EOL;
}

echo "Done";
?>
--EXPECT--
string(0) ""
string(21) "=?UTF-8?Q?abc=00abc?="
string(16) "=?UTF-8?B?Pw==?="
string(19) "=?US-ASCII?B?Pw==?="
string(18) "=?US-ASCII?Q?=3F?="
string(19) "=?US-ASCII?B?PQ==?="
string(18) "=?US-ASCII?Q?=3D?="
string(19) "=?US-ASCII?B?Xw==?="
string(18) "=?US-ASCII?Q?=5F?="
string(19) "=?US-ASCII?B?fw==?="
string(1) " "
string(1) " "
string(3) "   "
string(3) "   "
string(8) "ab  ab  "
string(8) "ab  ab  "
string(1) "`"
string(1) "S"
string(2) "S4"
string(2) "S4"
string(61) "=?UCS-4?Q?=00=00=00=32=00=00=00=34?= =?UCS-4?Q?=00=00=00=0A?="
string(21) "o =?US-ASCII?B?AA==?="
string(68) "=?UCS-4?B?AAAAAAAAABEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==?="
string(271) "=?UCS-2?B?AAEAAAA/AD8APwA/AD8APwA9AD8APwA/AD0APwABAAAAYQAAAAAAPwA/AD8=?= =?UCS-2?B?AD0APwA/AD8APwA/AD8APwA/AD8APwA/ADQAPwA0AD8APwA/AD8APwA9AD8=?= =?UCS-2?B?AAEAAAAAAAAAAQAAAAAABgA/AD8APwA/AD8APwA/AD8APwA9AD8APwA/AD8=?= =?UCS-2?B?AD8APwA/AD8APwA/AD8ANAA/AD8APwA/AD8APwA0?="
string(27) "=aaaaaa= =?US-ASCII?Q?=3F?="
string(9) "=aaaaaa=?"
string(55) ", =?ISO-2022-JP?Q?o=00=01=00=00?= =?ISO-2022-JP?Q?=28?="
string(19) " =?US-ASCII?Q?=3F?="
string(76) " =?HZ-GB-2312?Q?=3F=7E=7EH=7E=7E=3F=3F=00=00=3F=3F=3F=3F=3F=3E=00=00=00=00?="
string(75) "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5"
string(108) "    2   sssssssssssssssssssssssssss    W      W =?ArmSCII-8?B?ICAgP1MBAD9gAAAgICAgbVNDSUk/ICAgIG1TQ0lJP0k/?="
string(294) "=?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=00?="
string(75) "  1111111111111111111111111111111111111111111111111111111111111111111111111"
string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?="
string(77) "2                                                                          !3"
string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?="
string(344) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20!=33?="
string(135) "=?UTF-8?B?LDk4Njg5NDksOTg2ODk3OCw5ODY5MDE1LDk2ODkxMDAsOTg2OTEyMSw5ODY5?=
 =?UTF-8?B?NjE1LDk4NzA2OTAsOTg2NzExNiw5ODU1ODExOTg2MTE4My4g?="
string(142) "xx =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB?=
 =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBIA==?="
string(690) "=?ISO-2022-JP?B?bmU/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/cxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MRskQiFEGyhCPxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MQ==?="
mb_encode_mimeheader(): Argument #2 ($charset) "Quoted-Printable" cannot be used for MIME header encoding
Done