1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
use Test::More tests => 3;
use strict;
use HTML::Parser;
my $p = HTML::Parser->new(api_version => 3);
$p->report_tags("a");
my @doc;
$p->handler(start => \&a_handler, "skipped_text, text");
$p->handler(end_document => \@doc, '@{skipped_text}');
$p->parse(<<EOT)->eof;
<title>hi</title>
<h1><a href="foo">link</a></h1>
and <a foo="">some</a> text.
EOT
sub a_handler {
push(@doc, shift);
my $text = shift;
push(@doc, uc($text));
}
is(join("", @doc), <<'EOT');
<title>hi</title>
<h1><A HREF="FOO">link</a></h1>
and <A FOO="">some</a> text.
EOT
#
# Comment stripper. Interaction with "" handlers.
#
my $doc = <<EOT;
<html>text</html>
<!-- comment -->
and some more <b>text</b>.
EOT
(my $expected = $doc) =~ s/<!--.*?-->//;
$p = HTML::Parser->new(api_version => 3);
$p->handler(comment => "");
$p->handler(end_document => sub {
my $stripped = shift;
#diag $stripped;
is($stripped, $expected);
}, "skipped_text");
for (split(//, $doc)) {
$p->parse($_);
}
$p->eof;
#
# Interaction with unbroken text
#
my @x;
$p = HTML::Parser->new(api_version => 3, unbroken_text => 1);
$p->handler(text => \@x, '@{"X", skipped_text, text}');
$p->handler(end => "");
$p->handler(end_document => \@x, '@{"Y", skipped_text}');
$doc = "a a<a>b b</a>c c<x>d d</x>e";
for (split(//, $doc)) {
$p->parse($_);
}
$p->eof;
#diag join(":", @x);
is(join(":", @x), "X::a a:X:<a>:b bc c:X:<x>:d de:Y:");
|