1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
#!/usr/bin/perl -w
use strict;
my $tag;
my $text;
use HTML::Parser ();
my $p = HTML::Parser->new(start_h => [sub { $tag = shift }, "tagname"],
text_h => [sub { $text .= shift }, "dtext"],
);
use Test::More tests => 14;
SKIP: {
eval {
$p->marked_sections(1);
};
skip $@, 14 if $@;
$p->parse("<![[foo]]>");
is($text, "foo");
$p->parse("<![TEMP INCLUDE[bar]]>");
is($text, "foobar");
$p->parse("<![ INCLUDE -- IGNORE -- [foo<![IGNORE[bar]]>]]>\n<br>");
is($text, "foobarfoo\n");
$text = "";
$p->parse("<![ CDATA [<foo");
$p->parse("<![IGNORE[bar]]>,bar>]]><br>");
is($text, "<foo<![IGNORE[bar,bar>]]>");
$text = "";
$p->parse("<![ RCDATA [å<a>]]><![CDATA[å<a>]]>å<a><br>");
is($text, "<a>å<a>");
is($tag, "br");
$text = "";
$p->parse("<![INCLUDE RCDATA CDATA IGNORE [fooå<a>]]><br>");
is($text, "");
$text = "";
$p->parse("<![INCLUDE RCDATA CDATA [fooå<a>]]><br>");
is($text, "fooå<a>");
$text = "";
$p->parse("<![INCLUDE RCDATA [fooå<a>]]><br>");
is($text, "foo<a>");
$text = "";
$p->parse("<![INCLUDE [fooå<a>]]><br>");
is($text, "foo");
$text = "";
$p->parse("<![[fooå<a>]]><br>");
is($text, "foo");
# offsets/line/column numbers
$p = HTML::Parser->new(default_h => [\&x, "line,column,offset,event,text"],
marked_sections => 1,
);
$p->parse(<<'EOT')->eof;
<title>Test</title>
<![CDATA
[fooå<a>
]]>
<![[
INCLUDE
STUFF
]]>
<h1>Test</h1>
EOT
my @x;
sub x {
my($line, $col, $offset, $event, $text) = @_;
$text =~ s/\n/\\n/g;
$text =~ s/ /./g;
push(@x, "$line.$col:$offset $event \"$text\"\n");
}
#diag @x;
is(join("", @x), <<'EOT');
1.0:0 start_document ""
1.0:0 start "<title>"
1.7:7 text "Test"
1.11:11 end "</title>"
1.19:19 text "\n"
3.3:32 text "fooå<a>\n"
4.3:49 text "\n"
5.4:54 text "\nINCLUDE\nSTUFF\n"
8.3:72 text "\n.."
9.2:75 start "<h1>"
9.6:79 text "Test"
9.10:83 end "</h1>"
9.15:88 text "\n"
10.0:89 end_document ""
EOT
my $doc = "<Tag><![CDATA[This is cdata]]></Tag>";
my $result = "";
$p = HTML::Parser->new(
marked_sections => 1,
handlers => {
default => [ sub { $result .= join("",@_); }, "skipped_text,text" ]
}
)->parse($doc)->eof;
is($doc, $result);
$text = "";
$p = HTML::Parser->new(
text_h => [sub { $text .= shift }, "dtext"],
marked_sections => 1,
);
$p->parse("<![CDATA[foo [1]]]>");
is($text, "foo [1]", "CDATA text ending in square bracket");
} # SKIP
|