1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
use Test::More tests => 3;
use HTML::PullParser;
my $doc = <<'EOT';
<title>Title</title>
<style> h1 { background: white }
<foo>
</style>
<H1 ID="3">Heading</H1>
<!-- ignore this -->
This is a text with a <A HREF="http://www.sol.no" name="l1">link</a>.
EOT
my $p = HTML::PullParser->new(doc => $doc,
start => 'event,tagname,@attr',
end => 'event,tagname',
text => 'event,dtext',
ignore_elements => [qw(script style)],
unbroken_text => 1,
boolean_attribute_value => 1,
);
my $t = $p->get_token;
is($t->[0], "start");
is($t->[1], "title");
$p->unget_token($t);
my @a;
while (my $t = $p->get_token) {
for (@$t) {
s/\s/./g;
}
push(@a, join("|", @$t));
}
my $res = join("\n", @a, "");
#diag $res;
is($res, <<'EOT');
start|title
text|Title
end|title
text|..
start|h1|id|3
text|Heading
end|h1
text|...This.is.a.text.with.a.
start|a|href|http://www.sol.no|name|l1
text|link
end|a
text|..
EOT
|