File: 11utf8.t

package info (click to toggle)
libxml-libxslt-perl 1.70-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 476 kB
  • ctags: 748
  • sloc: perl: 800; ansic: 402; xml: 21; makefile: 8
file content (127 lines) | stat: -rw-r--r-- 3,314 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use strict;                     # -*- perl -*-
use Test;
use constant PLAN => 32;
BEGIN {
  plan tests => PLAN;
  unless (eval { require Encode; import Encode; 1 }) {
    skip("this test requires Encode.pm\n") for (1..PLAN);
    exit;
  }
}

use XML::LibXSLT;
use XML::LibXML;

my $parser = XML::LibXML->new();
ok( $parser );

my $xslt = XML::LibXSLT->new();

{
# U+0100 == LATIN CAPITAL LETTER A WITH MACRON
my $doc = $parser->parse_string(<<XML);
<unicode>\x{0100}dam</unicode>
XML
ok( $doc );

my $style_doc = $parser->parse_string(<<XSLT);
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text" encoding="UTF-8"/>
  <xsl:template match="/unicode">
    <xsl:value-of select="."/>
  </xsl:template>
</xsl:stylesheet>
XSLT
ok( $style_doc );

my $stylesheet = $xslt->parse_stylesheet($style_doc);
ok( $stylesheet );

my $results = $stylesheet->transform($doc);
ok( $results );

my $output = $stylesheet->output_string( $results );
ok( $output );

# Test that we've correctly converted to characters seeing as the
# output format was UTF-8.

ok( Encode::is_utf8($output) );
ok( $output, "\x{0100}dam" );

$output = $stylesheet->output_as_chars( $results );
ok( Encode::is_utf8($output) );
ok( $output, "\x{0100}dam" );

$output = $stylesheet->output_as_bytes( $results );
ok( !Encode::is_utf8($output) );
ok( $output, "\xC4\x80dam" );
}

# LATIN-2 character 17E - z caron
my $doc = $parser->parse_string(<<XML);
<?xml version="1.0" encoding="UTF-8"?>
<unicode>\x{17E}il</unicode>
XML
ok( $doc );

# no encoding: libxslt chooses either an entity or UTF-8
{
  my $style_doc = $parser->parse_string(<<XSLT);
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text"/>
  <xsl:template match="/unicode">
    <xsl:value-of select="."/>
  </xsl:template>
</xsl:stylesheet>
XSLT
  ok( $style_doc );
  my $stylesheet = $xslt->parse_stylesheet($style_doc);
  ok( $stylesheet );
  my $results = $stylesheet->transform($doc);
  ok( $results );

  my $output = $stylesheet->output_string( $results );  
  ok( !Encode::is_utf8($output) );
  ok( $output =~ /^(?:&#382;|\xC5\xBE)il/ );

  $output = $stylesheet->output_as_chars( $results );
  ok( Encode::is_utf8($output) );
  ok( $output, "\x{17E}il" );
  $output = $stylesheet->output_as_bytes( $results );
  ok( !Encode::is_utf8($output) );
  ok( $output =~ /^(?:&#382;|\xC5\xBE)il/ );
}

# doesn't map to latin-1 so will appear as an entity
{
  my $style_doc = $parser->parse_string(<<XSLT);
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text" encoding="iso-8859-1"/>
  <xsl:template match="/unicode">
    <xsl:value-of select="."/>
  </xsl:template>
</xsl:stylesheet>
XSLT
  ok( $style_doc );
  my $stylesheet = $xslt->parse_stylesheet($style_doc);
  ok( $stylesheet );
  my $results = $stylesheet->transform($doc);
  ok( $results );
  my $output = $stylesheet->output_string( $results );
  ok( $output );

  ok( !Encode::is_utf8($output) );
  ok( $output, "&#382;il" );

  $output = $stylesheet->output_as_chars( $results );
  ok( Encode::is_utf8($output) );
  ok( $output, "\x{17E}il" );

  $output = $stylesheet->output_as_bytes( $results );
  ok( !Encode::is_utf8($output) );
  ok( $output, "&#382;il" );
}