1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
use Test::More tests => 155;
use utf8;
use Cpanel::JSON::XS;
is(Cpanel::JSON::XS->new->allow_nonref (1)->utf8 (1)->encode ("ü"), "\"\xc3\xbc\"");
is(Cpanel::JSON::XS->new->allow_nonref (1)->encode ("ü"), "\"ü\"");
is(Cpanel::JSON::XS->new->allow_nonref (1)->ascii (1)->utf8 (1)->encode (chr 0x8000), '"\u8000"');
is(Cpanel::JSON::XS->new->allow_nonref (1)->ascii (1)->utf8 (1)->pretty (1)->encode (chr 0x10402), "\"\\ud801\\udc02\"\n");
SKIP: {
skip "5.6", 1 if $] < 5.008;
eval { Cpanel::JSON::XS->new->allow_nonref (1)->utf8 (1)->decode ('"ü"') };
like $@, qr/malformed UTF-8/;
}
is(Cpanel::JSON::XS->new->allow_nonref (1)->decode ('"ü"'), "ü");
is(Cpanel::JSON::XS->new->allow_nonref (1)->decode ('"\u00fc"'), "ü");
if ($] < 5.008) {
eval { decode_json ('"\ud801\udc02' . "\x{10204}\"", 1) };
like $@, qr/malformed UTF-8/;
} else {
is(Cpanel::JSON::XS->new->allow_nonref (1)->decode ('"\ud801\udc02' . "\x{10204}\""), "\x{10402}\x{10204}");
}
is(Cpanel::JSON::XS->new->allow_nonref (1)->decode ('"\"\n\\\\\r\t\f\b"'), "\"\012\\\015\011\014\010");
my $love = $] < 5.008 ? "I \342\235\244 perl" : "I ❤ perl";
is(Cpanel::JSON::XS->new->ascii->encode ([$love]),
$] < 5.008 ? '["I \u00e2\u009d\u00a4 perl"]' : '["I \u2764 perl"]', 'utf8 enc ascii');
is(Cpanel::JSON::XS->new->latin1->encode ([$love]),
$] < 5.008 ? "[\"I \342\235\244 perl\"]" : '["I \u2764 perl"]', 'utf8 enc latin1');
SKIP: {
skip "5.6", 1 if $] < 5.008;
require Encode;
# [RT #84244] wrong complaint: JSON::XS double encodes to ["I ⤠perl"]
# and with utf8 triple encodes it to ["I ⤠perl"]
if ($Encode::VERSION < 2.40 or $Encode::VERSION >= 2.54) { # Encode stricter check: Cannot decode string with wide characters
# see also http://stackoverflow.com/questions/12994100/perl-encode-pm-cannot-decode-string-with-wide-character
$love = "I \342\235\244 perl";
}
my $s = Encode::decode_utf8($love); # User tries to double decode wide-char to unicode with Encode
is(Cpanel::JSON::XS->new->utf8->encode ([$s]), "[\"I \342\235\244 perl\"]", 'utf8 enc utf8 [RT #84244]');
}
is(Cpanel::JSON::XS->new->binary->encode ([$love]), '["I \xe2\x9d\xa4 perl"]', 'utf8 enc binary');
# TODO: test utf8 hash keys,
# test utf8 strings without any char > 0x80.
# warn on the 66 non-characters as in core
{
my $w;
require warnings;
warnings->unimport($] < 5.014 ? 'utf8' : 'nonchar');
$SIG{__WARN__} = sub { $w = shift };
my $d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ufdd0"');
my $warn = $w;
is ($d, "\x{fdd0}", substr($warn,0,31)."...");
like ($warn, qr/^Unicode non-character U\+FDD0 is/);
$w = '';
# higher planes
$d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ud83f\udfff"');
$warn = $w;
is ($d, "\x{1ffff}", substr($warn,0,31)."...");
like ($w, qr/^Unicode non-character U\+1FFFF is/);
$w = '';
$d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ud87f\udffe"');
$warn = $w;
is ($d, "\x{2fffe}", substr($warn,0,31)."...");
like ($w, qr/^Unicode non-character U\+2FFFE is/);
$w = '';
$d = Cpanel::JSON::XS->new->allow_nonref->decode('"\ud8a4\uddd1"');
$warn = $w;
is ($d, "\x{391d1}", substr($warn,0,31)."...");
is ($w, '');
}
{
my $w;
warnings->unimport($] < 5.014 ? 'utf8' : 'nonchar');
$SIG{__WARN__} = sub { $w = shift };
# no warning with relaxed
my $d = Cpanel::JSON::XS->new->allow_nonref->relaxed->decode('"\ufdd0"');
my $warn = $w;
is ($d, "\x{fdd0}", "no warning with relaxed");
is($w, undef);
}
# security exploits via ill-formed subsequences
# see http://unicode.org/reports/tr36/#UTF-8_Exploit
# testcases from Encode/t/utf8strict.t
# All these sequences are not handled by the unsafe, fast XS decoder,
# rather passed through to the safe Perl decoder, which detects those.
my @ill =
(# http://smontagu.damowmow.com/utf8test.html
# The numbers below, like 2.1.2 are test numbers on this web page
qq/80/ , # 3.1.1
qq/bf/ , # 3.1.2
qq/80 bf/ , # 3.1.3
qq/80 bf 80/ , # 3.1.4
qq/80 bf 80 bf/ , # 3.1.5
qq/80 bf 80 bf 80/ , # 3.1.6
qq/80 bf 80 bf 80 bf/ , # 3.1.7
qq/80 bf 80 bf 80 bf 80/ , # 3.1.8
qq/80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf/ , # 3.1.9
qq/c0 20 c1 20 c2 20 c3 20 c4 20 c5 20 c6 20 c7 20 c8 20 c9 20 ca 20 cb 20 cc 20 cd 20 ce 20 cf 20 d0 20 d1 20 d2 20 d3 20 d4 20 d5 20 d6 20 d7 20 d8 20 d9 20 da 20 db 20 dc 20 dd 20 de 20 df 20/ , # 3.2.1
qq/e0 20 e1 20 e2 20 e3 20 e4 20 e5 20 e6 20 e7 20 e8 20 e9 20 ea 20 eb 20 ec 20 ed 20 ee 20 ef 20/ , # 3.2.2
qq/f0 20 f1 20 f2 20 f3 20 f4 20 f5 20 f6 20 f7 20/ , # 3.2.3
qq/f8 20 f9 20 fa 20 fb 20/ , # 3.2.4
qq/fc 20 fd 20/ , # 3.2.5
qq/c0/ , # 3.3.1
qq/e0 80/ , # 3.3.2
qq/f0 80 80/ , # 3.3.3
qq/f8 80 80 80/ , # 3.3.4
qq/fc 80 80 80 80/ , # 3.3.5
qq/df/ , # 3.3.6
qq/ef bf/ , # 3.3.7
qq/f7 bf bf/ , # 3.3.8
qq/fb bf bf bf/ , # 3.3.9
qq/fd bf bf bf bf/ , # 3.3.10
qq/c0 e0 80 f0 80 80 f8 80 80 80 fc 80 80 80 80 df ef bf f7 bf bf fb bf bf bf fd bf bf bf bf/ , # 3.4.1
qq/fe/ , # 3.5.1
qq/ff/ , # 3.5.2
qq/fe fe ff ff/ , # 3.5.3
qq/f0 8f bf bf/ , # 4.2.3
qq/f8 87 bf bf bf/ , # 4.2.4
qq/fc 83 bf bf bf bf/ , # 4.2.5
qq/c0 af/ , # 4.1.1 # ! overflow not with perl 5.6
qq/e0 80 af/ , # 4.1.2 # ! overflow not with perl 5.6
qq/f0 80 80 af/ , # 4.1.3 # ! overflow not with perl 5.6
qq/f8 80 80 80 af/ , # 4.1.4 # ! overflow not with perl 5.6
qq/fc 80 80 80 80 af/ , # 4.1.5 # ! overflow not with perl 5.6
qq/c1 bf/ , # 4.2.1 # ! overflow not with perl 5.6
qq/e0 9f bf/ , # 4.2.2 # ! overflow not with perl 5.6
qq/c0 80/ , # 4.3.1 # xx! overflow not with perl 5.6
qq/e0 80 80/ , # 4.3.2 # xx! overflow not with perl 5.6
qq/f0 80 80 80/ , # 4.3.3 # xx! overflow not with perl 5.6
qq/f8 80 80 80 80/ , # 4.3.4 # xx! overflow not with perl 5.6
qq/fc 80 80 80 80 80/ , # 4.3.5 # xx! overflow not with perl 5.6
# non-shortest form of 5c i.e. "\\"
qq/c1 9c/ , # ! not with perl 5.6
);
{
# these are no multibyte codepoints, just raw utf8 bytes,
# so most of them work with 5.6 also.
$^W = 1;
my $w;
warnings->import($] < 5.014 ? 'utf8' : 'nonchar');
$SIG{__WARN__} = sub { $w = shift };
for my $ill (@ill) {
my $o = pack "C*" => map {hex} split /\s+/, $ill;
my $d = eval { decode_json("[\"$o\"]"); };
is ($d, undef, substr($@,0,25))
or diag $w, ' ', $ill, "\t => ", $d->[0], " $@";
like($@, qr/malformed UTF-8 character/, "ill-formed utf8 <$ill> throws error");
is($d, undef, "without warning");
$w = undef;
}
}
|