File: unicode.t

package info (click to toggle)
libjson-parse-perl 0.62-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 572 kB
  • sloc: ansic: 3,614; perl: 475; makefile: 12
file content (77 lines) | stat: -rw-r--r-- 2,168 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
use FindBin '$Bin';
use lib "$Bin";
use JPT;

my $m = '{"骪":"\u9aaa"}';
ok (valid_json ($m), "Valid good JSON");

my $ar = json_to_perl ($m);
ok (defined $ar, "Unicode \\uXXXX parsed");
is ($ar->{骪}, '骪', "Unicode \\uXXXX parsed correctly");
note ("keys = ", keys %$ar);

# Here the second unicode piece of the string is added to switch on
# the UTF-8 flag inside Perl and get the required invalidity. 

my $badunicode = '["\uD800", "バター"]';
ok (! valid_json ($badunicode), "$badunicode is invalid");

# This is what the documentation says will happen. However, I'm not
# sure this is correct or what the user expects to happen.

my $okunicode = '["\uD800"]';
ok (! valid_json ($okunicode), "$okunicode is valid");

my $surpair = '["\uD834\uDD1E"]';
my $spo;
eval {
    $spo = parse_json ($surpair);
};
ok (! $@, "parsed surrogate pairs");
is (ord ($spo->[0]), 0x1D11E, "g-clef surrogate pair");

use utf8;
my $surpair_force_utf8 = '["\uD834\uDD1E麻婆茄子"]';
my $spo_force_utf8;
eval {
    $spo_force_utf8 = parse_json ($surpair);
};
ok (! $@, "parsed surrogate pairs");
is (ord ($spo_force_utf8->[0]), 0x1D11E, "g-clef surrogate pair");

use utf8;
my $scorpion = '["蠍"]';
my $p1 = parse_json ($scorpion);
ok (utf8::is_utf8 ($p1->[0]), "UTF-8 survives");

no utf8;

my $ebi = '["蠍"]';
my $p2 = parse_json ($ebi);
ok (! utf8::is_utf8 ($p2->[0]), "Not UTF-8 not marked as UTF-8");

no utf8;
# 蟹
my $kani = '["\u87f9", "蟹", "\u87f9猿"]';
my $p = parse_json ($kani);
ok (utf8::is_utf8 ($p->[0]), "kani upgraded regardless");
ok (! utf8::is_utf8 ($p->[1]), "input string not upgraded, even though it's UTF-8");
ok (utf8::is_utf8 ($p->[2]), "upgrade this too");
is (length ($p->[2]), 2, "length is two by magic");

ok (! valid_json ('["\uDE8C "]'), "invalid \uDE8C + space");

# Test of "surrogate pairs".

my $jc = JSON::Parse->new ();
my $wikipedia_1 = '"\ud801\udc37"';
my $out_1 = $jc->run ($wikipedia_1);
is ($out_1, "\x{10437}");
my $wikipedia_2 = '"\ud852\udf62"';
my $out_2 = $jc->run ($wikipedia_2);
is ($out_2, "\x{24b62}");
my $json_spec = '"\ud834\udd1e"';
my $out_3 = $jc->run ($json_spec);
is ($out_3, "\x{1D11E}");

done_testing ();