File: charset-decoding

package info (click to toggle)
libxml-sax-perl 0.16%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 556 kB
  • ctags: 175
  • sloc: perl: 2,417; xml: 121; sh: 79; makefile: 52
file content (123 lines) | stat: -rw-r--r-- 3,359 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Fix charset decoding in the PurePerl module (#405186)
# Niko Tyni <ntyni@iki.fi> Sun, 04 Nov 2007 20:46:58 +0200

--- git.orig/SAX/PurePerl/Productions.pm
+++ git/SAX/PurePerl/Productions.pm
@@ -4,7 +4,7 @@
 
 use Exporter;
 @ISA = ('Exporter');
-@EXPORT_OK = qw($S $Char $VersionNum $BaseChar $Letter $Ideographic
+@EXPORT_OK = qw($S $Char $VersionNum $BaseChar $Ideographic
     $Extender $Digit $CombiningChar $EncNameStart $EncNameEnd $NameChar $CharMinusDash
     $PubidChar $Any $SingleChar);
 
@@ -36,12 +36,10 @@
     
     $Digit = qr/ [\x30-\x39] /x;
     
-    $Letter = qr/^ $BaseChar $/x;
-    
     # can't do this one without unicode
     # $CombiningChar = qr/^$/msx;
     
-    $NameChar = qr/^ $BaseChar | $Digit | [._:-] | $Extender $/x;
+    $NameChar = qr/ $BaseChar | $Digit | [._:-] | $Extender /x;
     PERL
     die $@ if $@;
 }
@@ -138,9 +136,7 @@
 [\x{4E00}-\x{9FA5}\x{3007}\x{3021}-\x{3029}]
 /x;
 
-    $Letter = qr/^ $BaseChar | $Ideographic $/x;
-
-    $NameChar = qr/^ $Letter | $Digit | [._:-] | $CombiningChar | $Extender $/x;
+    $NameChar = qr/ $BaseChar | $Ideographic | $Digit | [._:-] | $CombiningChar | $Extender /x;
     PERL
 
     die $@ if $@;
--- git.orig/SAX/PurePerl/Reader/UnicodeExt.pm
+++ git/SAX/PurePerl/Reader/UnicodeExt.pm
@@ -16,7 +16,7 @@
 }
 
 sub switch_encoding_string {
-    Encode::from_to($_[0], $_[1], "utf-8");
+    $_[0] = Encode::decode($_[1], $_[0]);
 }
 
 1;
--- git.orig/SAX/PurePerl/Reader.pm
+++ git/SAX/PurePerl/Reader.pm
@@ -4,7 +4,7 @@
 
 use strict;
 use XML::SAX::PurePerl::Reader::URI;
-use XML::SAX::PurePerl::Productions qw( $SingleChar $Letter $NameChar );
+use XML::SAX::PurePerl::Productions qw( $SingleChar );
 use Exporter ();
 
 use vars qw(@ISA @EXPORT_OK);
--- git.orig/SAX/PurePerl.pm
+++ git/SAX/PurePerl.pm
@@ -672,7 +672,7 @@
     
     return unless length($name);
     
-    $name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader);
+    $name =~ /^$NameChar+$/o or $self->parser_error("Name <$name> does not match NameChar production", $reader);
 
     return $name;
 }
--- git.orig/t/14encoding.t
+++ git/t/14encoding.t
@@ -1,23 +1,25 @@
 use Test;
 BEGIN { $tests = 0;
-    if ($] >= 5.007002) { $tests = 7 }
+    if ($] >= 5.007002) { $tests = 9 }
     plan tests => $tests;
 }
 if ($tests) {
 use XML::SAX::PurePerl;
-use XML::SAX::PurePerl::DebugHandler;
 
-my $handler = XML::SAX::PurePerl::DebugHandler->new();
+my $handler = TestHandler->new(); # see below for the TestHandler class
 ok($handler);
 
 my $parser = XML::SAX::PurePerl->new(Handler => $handler);
 ok($parser);
 
 # warn("utf-16\n");
+# verify that the first element is correctly decoded
+$handler->{test_elements} = [ "\x{9031}\x{5831}" ]; 
 $parser->parse_uri("testfiles/utf-16.xml");
 ok(1);
 
 # warn("utf-16le\n");
+$handler->{test_elements} = [ "foo" ];
 $parser->parse_uri("testfiles/utf-16le.xml");
 ok(1);
 
@@ -33,3 +35,19 @@
 $parser->parse_uri("testfiles/iso8859_2.xml");
 ok(1);
 }
+
+package TestHandler;
+use XML::SAX::PurePerl::DebugHandler;
+use base qw(XML::SAX::PurePerl::DebugHandler);
+use Test;
+
+sub start_element {
+    my $self = shift;
+    if ($self->{test_elements} and
+        my $value = pop @{$self->{test_elements}}) {
+        ok($_[0]->{Name}, $value);
+    }
+    $self->SUPER::start_element(@_);
+}
+
+1;