File: encoding.t

package info (click to toggle)
libxml-parser-perl 2.47-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,396 kB
  • sloc: xml: 3,937; perl: 2,026; makefile: 38; ansic: 27
file content (111 lines) | stat: -rw-r--r-- 2,218 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
BEGIN { print "1..6\n"; }
END { print "not ok 1\n" unless $loaded; }
use XML::Parser;
$loaded = 1;
print "ok 1\n";

################################################################
# Check encoding

my $xmldec = "<?xml version='1.0' encoding='x-sjis-unicode' ?>\n";

my $docstring = <<"End_of_doc;";
<\x8e\x83>\x90\x46\x81\x41\x98\x61\x81\x41\x99\x44
</\x8e\x83>
End_of_doc;

my $doc = $xmldec . $docstring;

my @bytes;
my $lastel;

sub text {
    my ( $xp, $data ) = @_;

    push( @bytes, unpack( 'U0C*', $data ) );    # was fixed 5.10
}

sub start {
    my ( $xp, $el ) = @_;

    $lastel = $el;
}

my $p = XML::Parser->new( Handlers => { Start => \&start, Char => \&text } );

$p->parse($doc);

my $exptag = ( $] < 5.006 )
  ? "\xe7\xa5\x89"    # U+7949 blessings 0x8e83
  : chr(0x7949);

my @expected = (
    0xe8, 0x89, 0xb2,    # U+8272 beauty    0x9046
    0xe3, 0x80, 0x81,    # U+3001 comma     0x8141
    0xe5, 0x92, 0x8c,    # U+548C peace     0x9861
    0xe3, 0x80, 0x81,    # U+3001 comma     0x8141
    0xe5, 0x83, 0x96,    # U+50D6 joy       0x9944
    0x0a
);

if ( $lastel eq $exptag ) {
    print "ok 2\n";
}
else {
    print "not ok 2\n";
}

if ( @bytes != @expected ) {
    print "not ok 3\n";
}
else {
    my $i;
    for ( $i = 0; $i < @expected; $i++ ) {
        if ( $bytes[$i] != $expected[$i] ) {
            print "not ok 3\n";
            exit;
        }
    }
    print "ok 3\n";
}

$lastel = '';

$p->parse( $docstring, ProtocolEncoding => 'X-SJIS-UNICODE' );

if ( $lastel eq $exptag ) {
    print "ok 4\n";
}
else {
    print "not ok 4\n";
}

# Test the CP-1252 Win-Latin-1 mapping

$docstring = qq(<?xml version='1.0' encoding='WINDOWS-1252' ?>
<doc euro="\x80" lsq="\x91" rdq="\x94" />
);

my %attr;

sub get_attr {
    my ( $xp, $el, @list ) = @_;
    %attr = @list;
}

$p = XML::Parser->new( Handlers => { Start => \&get_attr } );

eval { $p->parse($docstring) };

if ($@) {
    print "not ";    # couldn't load the map
}
print "ok 5\n";

if (   $attr{euro} ne ( $] < 5.006 ? "\xE2\x82\xAC" : chr(0x20AC) )
    or $attr{lsq} ne ( $] < 5.006 ? "\xE2\x80\x98" : chr(0x2018) )
    or $attr{rdq} ne ( $] < 5.006 ? "\xE2\x80\x9D" : chr(0x201D) ) ) {
    print "not ";
}
print "ok 6\n";