1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
use strict; # -*- perl -*-
use Test;
BEGIN { plan tests => 32; }
use XML::LibXSLT;
use XML::LibXML;
use Encode;
my $parser = XML::LibXML->new();
ok( $parser );
my $xslt = XML::LibXSLT->new();
{
# U+0100 == LATIN CAPITAL LETTER A WITH MACRON
my $doc = $parser->parse_string(<<XML);
<unicode>\x{0100}dam</unicode>
XML
ok( $doc );
my $style_doc = $parser->parse_string(<<XSLT);
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="UTF-8"/>
<xsl:template match="/unicode">
<xsl:value-of select="."/>
</xsl:template>
</xsl:stylesheet>
XSLT
ok( $style_doc );
my $stylesheet = $xslt->parse_stylesheet($style_doc);
ok( $stylesheet );
my $results = $stylesheet->transform($doc);
ok( $results );
my $output = $stylesheet->output_string( $results );
ok( $output );
# Test that we've correctly converted to characters seeing as the
# output format was UTF-8.
ok( Encode::is_utf8($output) );
ok( $output, "\x{0100}dam" );
$output = $stylesheet->output_as_chars( $results );
ok( Encode::is_utf8($output) );
ok( $output, "\x{0100}dam" );
$output = $stylesheet->output_as_bytes( $results );
ok( !Encode::is_utf8($output) );
ok( $output, "\xC4\x80dam" );
}
# LATIN-2 character 17E - z caron
my $doc = $parser->parse_string(<<XML);
<?xml version="1.0" encoding="UTF-8"?>
<unicode>\x{17E}il</unicode>
XML
ok( $doc );
# no encoding: libxslt chooses either an entity or UTF-8
{
my $style_doc = $parser->parse_string(<<XSLT);
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text"/>
<xsl:template match="/unicode">
<xsl:value-of select="."/>
</xsl:template>
</xsl:stylesheet>
XSLT
ok( $style_doc );
my $stylesheet = $xslt->parse_stylesheet($style_doc);
ok( $stylesheet );
my $results = $stylesheet->transform($doc);
ok( $results );
my $output = $stylesheet->output_string( $results );
ok( !Encode::is_utf8($output) );
ok( $output =~ /^(?:ž|\xC5\xBE)il/ );
$output = $stylesheet->output_as_chars( $results );
ok( Encode::is_utf8($output) );
ok( $output, "\x{17E}il" );
$output = $stylesheet->output_as_bytes( $results );
ok( !Encode::is_utf8($output) );
ok( $output =~ /^(?:ž|\xC5\xBE)il/ );
}
# doesn't map to latin-1 so will appear as an entity
{
my $style_doc = $parser->parse_string(<<XSLT);
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="iso-8859-1"/>
<xsl:template match="/unicode">
<xsl:value-of select="."/>
</xsl:template>
</xsl:stylesheet>
XSLT
ok( $style_doc );
my $stylesheet = $xslt->parse_stylesheet($style_doc);
ok( $stylesheet );
my $results = $stylesheet->transform($doc);
ok( $results );
my $output = $stylesheet->output_string( $results );
ok( $output );
ok( !Encode::is_utf8($output) );
ok( $output, "žil" );
$output = $stylesheet->output_as_chars( $results );
ok( Encode::is_utf8($output) );
ok( $output, "\x{17E}il" );
$output = $stylesheet->output_as_bytes( $results );
ok( !Encode::is_utf8($output) );
ok( $output, "žil" );
}
|