1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
|
#!/usr/bin/perl
use strict;
use utf8; # for embedded strings
use XML::DOM;
use Test::More tests => 16;
use Test::NoWarnings;
use constant TMPFILE => "test_encoding.xml";
my $str =
q(<?xml version="1.0" encoding="UTF-8"?>
<blah>
<foo baz="">ã</foo>
<bar>ﭾﭿ</bar>
</blah>);
# test 1 -- check for correct parsing of input string
my $parser = new XML::DOM::Parser;
my $doc = eval { $parser->parse($str); };
ok(((not $@) && defined $doc), 'loads ok, parses str');
try($doc);
$doc->printToFile(TMPFILE);
$doc->dispose;
ok(system("xmllint", "--noout", TMPFILE) == 0, 'xmllint runs ok');
my $doc2 = eval { $parser->parsefile(TMPFILE) };
ok(((not $@) && defined $doc2), 'parses TMPFILE ok');
try($doc2);
$doc2->dispose;
unlink TMPFILE;
sub try {
my $doc = shift;
my $foo = ${$doc->getDocumentElement->getElementsByTagName("foo")}[0];
my $bar = ${$doc->getDocumentElement->getElementsByTagName("bar")}[0];
my $baz = $foo->getAttribute("baz");
my $footext = $foo->getFirstChild->getData;
my $bartext = $bar->getFirstChild->getData;
ok(utf8::is_utf8($baz), 'baz is_utf8...');
is($baz, "\x{E4B6}\x{E4B7}", '...and correct');
ok(utf8::is_utf8($footext), 'footext is_utf8...');
is($footext, "\xE3\x{F8E8}", '...and correct');
ok(utf8::is_utf8($bartext), 'bartext is_utf8');
is($bartext, "\x{FB7E}\x{FB7F}\x{E4B5}\x{E4B6}\x{E4B7}\x{E4B8}\x{E4B9}\x{E4BA}\x{E4BB}\x{E4BC}\x{E4BD}\x{E4BE}", 'and correct');
}
|