1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
#!perl -w
use strict;
use Test::More tests => 11;
{ package H;
sub new { bless {}, shift; }
sub header {
my $self = shift;
my $key = uc(shift);
my $old = $self->{$key};
if (@_) { $self->{$key} = shift; }
$old;
}
sub push_header {
my($self, $k, $v) = @_;
$k = uc($k);
if (exists $self->{$k}) {
$self->{$k} = [ $self->{$k} ] unless ref $self->{$k};
push(@{$self->{$k}}, $v);
} else {
$self->{$k} = $v;
}
}
sub as_string {
my $self = shift;
my $str = "";
for (sort keys %$self) {
if (ref($self->{$_})) {
my $v;
for $v (@{$self->{$_}}) {
$str .= "$_: $v\n";
}
} else {
$str .= "$_: $self->{$_}\n";
}
}
$str;
}
}
my $HTML = <<'EOT';
<title>Å vre eller å ikke vre</title>
<meta http-equiv="Expires" content="Soon">
<meta http-equiv="Foo" content="Bar">
<link href="mailto:gisle@aas.no" rev=made title="Gisle Aas">
<script>
"</script>"
ignore this
</script>
<base href="http://www.sn.no">
<meta name="Keywords" content="test, test, test,...">
<meta name="Keywords" content="more">
Dette er vanlig tekst. Denne teksten definerer ogs slutten p
<head> delen av dokumentet.
<style>
"</style>"
ignore this too
</style>
<isindex>
Dette er ogs vanlig tekst som ikke skal blir parset i det hele tatt.
EOT
$| = 1;
#$HTML::HeadParser::DEBUG = 1;
require HTML::HeadParser;
my $p = HTML::HeadParser->new( H->new );
if ($p->parse($HTML)) {
fail("Need more data which should not happen");
} else {
#diag $p->as_string;
pass();
}
like($p->header('Title'), qr/ vre eller ikke vre/);
is($p->header('Expires'), 'Soon');
is($p->header('Content-Base'), 'http://www.sn.no');
like($p->header('Link'), qr/<mailto:gisle\@aas.no>/);
# This header should not be present because the head ended
ok(!$p->header('Isindex'));
# Try feeding one char at a time
my $expected = $p->as_string;
my $nl = 1;
$p = HTML::HeadParser->new(H->new);
while ($HTML =~ /(.)/sg) {
#print STDERR '#' if $nl;
#print STDERR $1;
$nl = $1 eq "\n";
$p->parse($1) or last;
}
is($p->as_string, $expected);
# Try reading it from a file
my $file = "hptest$$.html";
die "$file already exists" if -e $file;
open(FILE, ">$file") or die "Can't create $file: $!";
binmode(FILE);
print FILE $HTML;
print FILE "<p>This is more content...</p>\n" x 2000;
print FILE "<title>Buuuh!</title>\n" x 200;
close FILE or die "Can't close $file: $!";
$p = HTML::HeadParser->new(H->new);
$p->parse_file($file);
unlink($file) or warn "Can't unlink $file: $!";
is($p->header("Title"), " vre eller ikke vre");
# We got into an infinite loop on data without tags and no EOL.
# This was actually a HTML::Parser bug.
open(FILE, ">$file") or die "Can't create $file: $!";
print FILE "Foo";
close(FILE);
$p = HTML::HeadParser->new(H->new);
$p->parse_file($file);
unlink($file) or warn "Can't unlink $file: $!";
ok(!$p->as_string);
SKIP: {
skip "Need Unicode support", 2 if $] < 5.008;
# Test that the Unicode BOM does not confuse us?
$p = HTML::HeadParser->new(H->new);
ok($p->parse("\x{FEFF}\n<title>Hi <foo></title>"));
$p->eof;
is($p->header("title"), "Hi <foo>");
}
|