File: gh9535.phpt

package info (click to toggle)
php8.4 8.4.16-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 211,276 kB
  • sloc: ansic: 1,176,142; php: 35,419; sh: 11,964; cpp: 7,208; pascal: 4,951; javascript: 3,091; asm: 2,817; yacc: 2,411; makefile: 696; xml: 446; python: 301; awk: 148
file content (169 lines) | stat: -rw-r--r-- 4,483 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
--TEST--
GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in PHP8.1)
--EXTENSIONS--
mbstring
--FILE--
<?php
$encodings = [
    'UTF-16',
    'UTF-16BE',
    'UTF-16LE',
    'UTF-7',
    'UTF7-IMAP',
    'ISO-2022-JP-MS',
    'GB18030',
    'HZ',
    'ISO-2022-KR',
    'ISO-2022-JP-MOBILE#KDDI',
    'CP50220',
    'CP50221',
    'CP50222',
];

$input = '宛如繁星般宛如皎月般';
$bytes_length = 15;
foreach($encodings as $encoding) {
    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
    echo $encoding.': '.$reconverted_str.PHP_EOL;
}

echo PHP_EOL;

$input = '星のように月のように';
$bytes_length = 20;
foreach($encodings as $encoding) {
    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
    echo $encoding.': '.$reconverted_str.PHP_EOL;
}

echo PHP_EOL;

$input = 'あaいb';
$bytes_length = 10;
foreach($encodings as $encoding) {
    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
    echo $encoding.': '.$reconverted_str.PHP_EOL;
}

echo PHP_EOL;

$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
$bytes_length = 10;
// For ISO-2022-KR, the initial escape sequence 'ESC $ ) C' will occupy 4 bytes of the output;
// this will make mb_strcut only pick out 6 'A' characters from the input string and not 10
foreach($encodings as $encoding) {
    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
    echo $encoding.': '.$reconverted_str.PHP_EOL;
}

echo PHP_EOL;

$input = '???';
$bytes_length = 2;
// ISO-2022-KR will be affected by the initial escape sequence as stated above
foreach($encodings as $encoding) {
    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
    echo $encoding.trim(': '.$reconverted_str).PHP_EOL;
}

echo PHP_EOL;

foreach($encodings as $encoding) {
    echo $encoding.trim(': '.mb_strcut($input, 0, $bytes_length, $encoding)).PHP_EOL;
}

?>
--EXPECT--
UTF-16: 宛如繁星般宛如
UTF-16BE: 宛如繁星般宛如
UTF-16LE: 宛如繁星般宛如
UTF-7: 宛如繁星
UTF7-IMAP: 宛如繁星
ISO-2022-JP-MS: 宛如繁星
GB18030: 宛如繁星般宛如
HZ: 宛如繁星般
ISO-2022-KR: 宛如繁星
ISO-2022-JP-MOBILE#KDDI: 宛如繁星
CP50220: 宛如繁星
CP50221: 宛如繁星
CP50222: 宛如繁星

UTF-16: 星のように月のように
UTF-16BE: 星のように月のように
UTF-16LE: 星のように月のように
UTF-7: 星のように月
UTF7-IMAP: 星のように月
ISO-2022-JP-MS: 星のように月の
GB18030: 星のように月のように
HZ: 星のように月のよ
ISO-2022-KR: 星のように月の
ISO-2022-JP-MOBILE#KDDI: 星のように月の
CP50220: 星のように月の
CP50221: 星のように月の
CP50222: 星のように月の

UTF-16: あaいb
UTF-16BE: あaいb
UTF-16LE: あaいb
UTF-7: あa
UTF7-IMAP: あa
ISO-2022-JP-MS: あa
GB18030: あaいb
HZ: あa
ISO-2022-KR: あa
ISO-2022-JP-MOBILE#KDDI: あa
CP50220: あa
CP50221: あa
CP50222: あa

UTF-16: AAAAA
UTF-16BE: AAAAA
UTF-16LE: AAAAA
UTF-7: AAAAAAAAAA
UTF7-IMAP: AAAAAAAAAA
ISO-2022-JP-MS: AAAAAAAAAA
GB18030: AAAAAAAAAA
HZ: AAAAAAAAAA
ISO-2022-KR: AAAAAA
ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA
CP50220: AAAAAAAAAA
CP50221: AAAAAAAAAA
CP50222: AAAAAAAAAA

UTF-16: ?
UTF-16BE: ?
UTF-16LE: ?
UTF-7: ??
UTF7-IMAP: ??
ISO-2022-JP-MS: ??
GB18030: ??
HZ: ??
ISO-2022-KR:
ISO-2022-JP-MOBILE#KDDI: ??
CP50220: ??
CP50221: ??
CP50222: ??

UTF-16: ??
UTF-16BE: ??
UTF-16LE: ??
UTF-7: ??
UTF7-IMAP: ??
ISO-2022-JP-MS: ??
GB18030: ??
HZ: ??
ISO-2022-KR:
ISO-2022-JP-MOBILE#KDDI: ??
CP50220: ??
CP50221: ??
CP50222: ??