File: encodings.t

package info (click to toggle)
libxml-dom-perl 1.46-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 692 kB
  • sloc: perl: 4,216; xml: 4,117; makefile: 2
file content (48 lines) | stat: -rw-r--r-- 1,465 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/perl
use strict;
use utf8; # for embedded strings
use XML::DOM;
use Test::More tests => 16;
use Test::NoWarnings;
use constant TMPFILE => "test_encoding.xml";

my $str =
q(<?xml version="1.0" encoding="UTF-8"?>
<blah>
	<foo baz="">&#227;&#63720;</foo>
	<bar>ﭾﭿ</bar>
</blah>);

# test 1 -- check for correct parsing of input string
my $parser = new XML::DOM::Parser;
my $doc = eval { $parser->parse($str); };
ok(((not $@) && defined $doc), 'loads ok, parses str');

try($doc);
$doc->printToFile(TMPFILE);
$doc->dispose;

ok(system("xmllint", "--noout", TMPFILE) == 0, 'xmllint runs ok');

my $doc2 = eval { $parser->parsefile(TMPFILE) };
ok(((not $@) && defined $doc2), 'parses TMPFILE ok');

try($doc2);
$doc2->dispose;
unlink TMPFILE;

sub try {
	my $doc = shift;
	my $foo = ${$doc->getDocumentElement->getElementsByTagName("foo")}[0];
	my $bar = ${$doc->getDocumentElement->getElementsByTagName("bar")}[0];
	my $baz = $foo->getAttribute("baz");
	my $footext = $foo->getFirstChild->getData;
	my $bartext = $bar->getFirstChild->getData;

	ok(utf8::is_utf8($baz), 'baz is_utf8...');
	is($baz, "\x{E4B6}\x{E4B7}", '...and correct');
	ok(utf8::is_utf8($footext), 'footext is_utf8...');
	is($footext, "\xE3\x{F8E8}", '...and correct');
	ok(utf8::is_utf8($bartext), 'bartext is_utf8');
	is($bartext, "\x{FB7E}\x{FB7F}\x{E4B5}\x{E4B6}\x{E4B7}\x{E4B8}\x{E4B9}\x{E4BA}\x{E4BB}\x{E4BC}\x{E4BD}\x{E4BE}", 'and correct');
}