1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
use Test::More tests => 162;
use utf8;
use Cpanel::JSON::XS;
use warnings;
is(Cpanel::JSON::XS->new->allow_nonref->utf8->encode("ü"), "\"\xc3\xbc\"");
is(Cpanel::JSON::XS->new->allow_nonref->encode("ü"), "\"ü\"");
is(Cpanel::JSON::XS->new->allow_nonref->ascii->utf8->encode(chr 0x8000), '"\u8000"');
is(Cpanel::JSON::XS->new->allow_nonref->ascii->utf8->pretty->encode(chr 0x10402), "\"\\ud801\\udc02\"\n");
ok not defined eval { Cpanel::JSON::XS->new->allow_nonref->utf8->decode('"ü"') };
like $@, qr/malformed UTF-8/;
is(Cpanel::JSON::XS->new->allow_nonref->decode('"ü"'), "ü");
is(Cpanel::JSON::XS->new->allow_nonref->decode('"\u00fc"'), "ü");
ok not defined eval { decode_json ('"\ud801\udc02' . "\x{10204}\"", 1) };
like $@, qr/Wide character/;
SKIP: {
skip "5.6", 1 if $] < 5.008;
is(Cpanel::JSON::XS->new->allow_nonref->decode('"\ud801\udc02' . "\x{10204}\""), "\x{10402}\x{10204}");
}
is(Cpanel::JSON::XS->new->allow_nonref->decode('"\"\n\\\\\r\t\f\b"'), "\"\012\\\015\011\014\010");
my $utf8_love = "I \342\235\244 perl";
is(Cpanel::JSON::XS->new->ascii->encode([$utf8_love]), '["I \u00e2\u009d\u00a4 perl"]', 'utf8 enc ascii');
is(Cpanel::JSON::XS->new->latin1->encode([$utf8_love]), "[\"I \342\235\244 perl\"]", 'utf8 enc latin1');
is(Cpanel::JSON::XS->new->utf8->encode([$utf8_love]), "[\"I \303\242\302\235\302\244 perl\"]", 'utf8 enc utf8');
is(Cpanel::JSON::XS->new->binary->encode([$utf8_love]), '["I \xe2\x9d\xa4 perl"]', 'utf8 enc binary');
SKIP: {
skip "5.6", 4 if $] < 5.008;
my $unicode_love = "I ❤ perl";
is(Cpanel::JSON::XS->new->ascii->encode([$unicode_love]), '["I \u2764 perl"]', 'unicode enc ascii');
is(Cpanel::JSON::XS->new->latin1->encode([$unicode_love]), "[\"I \\u2764 perl\"]", 'unicode enc latin1');
is(Cpanel::JSON::XS->new->utf8->encode([$unicode_love]), "[\"I \342\235\244 perl\"]", 'unicode enc utf8');
is(Cpanel::JSON::XS->new->binary->encode([$unicode_love]), '["I \xe2\x9d\xa4 perl"]', 'unicode enc binary');
}
# TODO: test utf8 hash keys,
# test utf8 strings without any char > 0x80.
# warn on the 66 non-characters as in core
{
BEGIN { 'warnings'->import($] < 5.014 ? 'utf8' : 'nonchar') }
my $w = '';
$SIG{__WARN__} = sub { $w = shift };
my $d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ufdd0"');
my $warn = $w;
{
no warnings 'utf8';
is ($d, "\x{fdd0}", substr($warn,0,31)."...");
}
like ($warn, qr/^Unicode non-character U\+FDD0 is/);
$w = '';
# higher planes
$d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ud83f\udfff"');
$warn = $w;
{
no warnings 'utf8';
is ($d, "\x{1ffff}", substr($warn,0,31)."...");
}
like ($w, qr/^Unicode non-character U\+1FFFF is/);
$w = '';
$d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ud87f\udffe"');
$warn = $w;
{
no warnings 'utf8';
is ($d, "\x{2fffe}", substr($warn,0,31)."...");
}
like ($w, qr/^Unicode non-character U\+2FFFE is/);
$w = '';
$d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ud8a4\uddd1"');
$warn = $w;
is ($d, "\x{391d1}", substr($warn,0,31)."...");
is ($w, '');
}
{
my $w;
BEGIN { 'warnings'->import($] < 5.014 ? 'utf8' : 'nonchar') }
$SIG{__WARN__} = sub { $w = shift };
# no warning with relaxed
my $d = Cpanel::JSON::XS->new->allow_nonref->relaxed->decode('"\ufdd0"');
my $warn = $w;
{
no warnings 'utf8';
is ($d, "\x{fdd0}", "no warning with relaxed");
}
is($w, undef);
}
# security exploits via ill-formed subsequences
# see http://unicode.org/reports/tr36/#UTF-8_Exploit
# testcases from Encode/t/utf8strict.t
# All these sequences are not handled by the unsafe, fast XS decoder,
# rather passed through to the safe Perl decoder, which detects those.
my @ill =
(# http://smontagu.damowmow.com/utf8test.html
# The numbers below, like 2.1.2 are test numbers on this web page
qq/80/ , # 3.1.1
qq/bf/ , # 3.1.2
qq/80 bf/ , # 3.1.3
qq/80 bf 80/ , # 3.1.4
qq/80 bf 80 bf/ , # 3.1.5
qq/80 bf 80 bf 80/ , # 3.1.6
qq/80 bf 80 bf 80 bf/ , # 3.1.7
qq/80 bf 80 bf 80 bf 80/ , # 3.1.8
qq/80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf/ , # 3.1.9
qq/c0 20 c1 20 c2 20 c3 20 c4 20 c5 20 c6 20 c7 20 c8 20 c9 20 ca 20 cb 20 cc 20 cd 20 ce 20 cf 20 d0 20 d1 20 d2 20 d3 20 d4 20 d5 20 d6 20 d7 20 d8 20 d9 20 da 20 db 20 dc 20 dd 20 de 20 df 20/ , # 3.2.1
qq/e0 20 e1 20 e2 20 e3 20 e4 20 e5 20 e6 20 e7 20 e8 20 e9 20 ea 20 eb 20 ec 20 ed 20 ee 20 ef 20/ , # 3.2.2
qq/f0 20 f1 20 f2 20 f3 20 f4 20 f5 20 f6 20 f7 20/ , # 3.2.3
qq/f8 20 f9 20 fa 20 fb 20/ , # 3.2.4
qq/fc 20 fd 20/ , # 3.2.5
qq/c0/ , # 3.3.1
qq/e0 80/ , # 3.3.2
qq/f0 80 80/ , # 3.3.3
qq/f8 80 80 80/ , # 3.3.4
qq/fc 80 80 80 80/ , # 3.3.5
qq/df/ , # 3.3.6
qq/ef bf/ , # 3.3.7
qq/f7 bf bf/ , # 3.3.8
qq/fb bf bf bf/ , # 3.3.9
qq/fd bf bf bf bf/ , # 3.3.10
qq/c0 e0 80 f0 80 80 f8 80 80 80 fc 80 80 80 80 df ef bf f7 bf bf fb bf bf bf fd bf bf bf bf/ , # 3.4.1
qq/fe/ , # 3.5.1
qq/ff/ , # 3.5.2
qq/fe fe ff ff/ , # 3.5.3
qq/f0 8f bf bf/ , # 4.2.3
qq/f8 87 bf bf bf/ , # 4.2.4
qq/fc 83 bf bf bf bf/ , # 4.2.5
qq/c0 af/ , # 4.1.1 # ! overflow not with perl 5.6
qq/e0 80 af/ , # 4.1.2 # ! overflow not with perl 5.6
qq/f0 80 80 af/ , # 4.1.3 # ! overflow not with perl 5.6
qq/f8 80 80 80 af/ , # 4.1.4 # ! overflow not with perl 5.6
qq/fc 80 80 80 80 af/ , # 4.1.5 # ! overflow not with perl 5.6
qq/c1 bf/ , # 4.2.1 # ! overflow not with perl 5.6
qq/e0 9f bf/ , # 4.2.2 # ! overflow not with perl 5.6
qq/c0 80/ , # 4.3.1 # xx! overflow not with perl 5.6
qq/e0 80 80/ , # 4.3.2 # xx! overflow not with perl 5.6
qq/f0 80 80 80/ , # 4.3.3 # xx! overflow not with perl 5.6
qq/f8 80 80 80 80/ , # 4.3.4 # xx! overflow not with perl 5.6
qq/fc 80 80 80 80 80/ , # 4.3.5 # xx! overflow not with perl 5.6
# non-shortest form of 5c i.e. "\\"
qq/c1 9c/ , # ! not with perl 5.6
);
{
# these are no multibyte codepoints, just raw utf8 bytes,
# so most of them work with 5.6 also.
BEGIN { $^W = 1 }
BEGIN { 'warnings'->import($] < 5.014 ? 'utf8' : 'nonchar') }
my $w;
$SIG{__WARN__} = sub { $w = shift };
for my $ill (@ill) {
my $o = pack "C*" => map {hex} split /\s+/, $ill;
my $d = eval { decode_json("[\"$o\"]"); };
is ($d, undef, substr($@,0,25))
or diag $w, ' ', $ill, "\t => ", $d->[0], " $@";
like($@, qr/malformed UTF-8 character/, "ill-formed utf8 <$ill> throws error");
is($d, undef, "without warning");
$w = undef;
}
}
|