1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
#!/usr/bin/perl -T
use strict;
use warnings;
use Test::Builder::Tester;
use Test::More tests => 3;
use URI::file ();
use Test::WWW::Mechanize ();
subtest scrape_text_by_id => sub {
plan tests => 8;
my $mech = Test::WWW::Mechanize->new( autolint => 0 );
isa_ok( $mech, 'Test::WWW::Mechanize' );
my $uri = URI::file->new_abs( 't/goodlinks.html' )->as_string;
$mech->get_ok( $uri, 'Get a dummy page just to have one' );
subtest 'nothing to find' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body></body></html>' );
is_deeply( [$mech->scrape_text_by_id( 'asdf' )], [], 'empty list returned in list context' );
is( $mech->scrape_text_by_id( 'asdf' ), undef, 'undef returned in scalar context' );
};
subtest 'find one' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf">contents</p></body></html>' );
is_deeply( [$mech->scrape_text_by_id( 'asdf' )], ['contents'], 'list context' );
is( $mech->scrape_text_by_id( 'asdf' ), 'contents', 'scalar context' );
};
subtest 'find multiple' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf">contents</p><p id="asdf">further</p></body></html>' );
is_deeply( [$mech->scrape_text_by_id( 'asdf' )], ['contents', 'further'], 'empty list returned in list context' );
is( $mech->scrape_text_by_id( 'asdf' ), 'contents', 'first string returned in scalar context' );
};
subtest 'present but empty' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf"></p></body></html>' );
is_deeply( [$mech->scrape_text_by_id( 'asdf' )], [''], 'list context' );
is( $mech->scrape_text_by_id( 'asdf' ), '', 'scalar context' );
};
subtest 'present but emptier' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf" /></body></html>' );
is_deeply( [$mech->scrape_text_by_id( 'asdf' )], [''], 'list context' );
is( $mech->scrape_text_by_id( 'asdf' ), '', 'scalar context' );
};
subtest 'nested tag' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf">Bob and <b>Bongo!</b></p></body></html>' );
is_deeply( [$mech->scrape_text_by_id( 'asdf' )], ['Bob and Bongo!'], 'list context' );
is( $mech->scrape_text_by_id( 'asdf' ), 'Bob and Bongo!', 'scalar context' );
};
};
subtest 'scraped_id_is and scraped_id_like' => sub {
plan tests => 5;
my $mech = Test::WWW::Mechanize->new( autolint => 0 );
isa_ok( $mech, 'Test::WWW::Mechanize' );
my $uri = URI::file->new_abs( 't/goodlinks.html' )->as_string;
$mech->get_ok( $uri, 'Get a dummy page just to have one' );
subtest 'find one' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf">contents</p></body></html>' );
$mech->scraped_id_is( 'asdf', 'contents', 'Works in scalar context' );
$mech->scraped_id_like( 'asdf', qr/con.+s/, 'Works on regexes' );
};
subtest 'nested tag' => sub {
plan tests => 2;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf">Bob and <b>Bongo!</b></p></body></html>' );
$mech->scraped_id_is( 'asdf', 'Bob and Bongo!' );
$mech->scraped_id_like( 'asdf', qr/Bob.+Bongo/ );
};
subtest 'failures' => sub {
plan tests => 6;
$mech->update_html( '<html><head><title></title></head><body><p id="asdf">Bob and <b>Bongo!</b></p><p id="empty"></p></body></html>' );
# Test standard successes.
$mech->scraped_id_is( 'asdf', 'Bob and Bongo!' );
$mech->scraped_id_like( 'asdf', qr/Bob.+Bongo/ );
# Test failures.
test_out( 'not ok 1 - Trying to match nonexistent ID to a string' );
test_fail( +2 );
test_diag( q{Can't find ID "nonexistent" to compare to "foo"} );
$mech->scraped_id_is( 'nonexistent', 'foo', 'Trying to match nonexistent ID to a string' );
test_test( 'Fails when trying to find nonexistent ID' );
my $regex = qr/Dave/ism;
test_out( 'not ok 1 - Trying to match nonexistent ID to a regex' );
test_fail( +2 );
test_diag( qq{Can't find ID "nonexistent" to match against $regex} );
$mech->scraped_id_like( 'nonexistent', $regex, 'Trying to match nonexistent ID to a regex' );
test_test( 'Fails when mismatched against existing ID' );
# Make sure that empty tags don't get seen as non-existent.
$mech->scraped_id_is( 'empty', '' );
$mech->scraped_id_like( 'empty', qr/^$/ );
};
};
# Previous versions would miss a search for id="foo" if it was not
# exactly id="foo". Here we test for variants.
subtest 'scrape_text_by_id optimization' => sub {
plan tests => 6;
_find_the_chips( <<'HTML', 'Double-quoted ID' );
<html>
<head><title>Bongo</title></head>
<body>not chips<p id="fish">chips</p>also not chips</body></html>
HTML
_find_the_chips( <<'HTML', 'Single-quoted ID' );
<html>
<head><title>Bongo</title></head>
<body>not chips<p id='fish'>chips</p>also not chips</body></html>
HTML
_find_the_chips( <<'HTML', 'Unquoted ID' );
<html>
<head><title>Bongo</title></head>
<body>not chips<p id=fish>chips</p>also not chips</body></html>
HTML
_find_the_chips( <<'HTML', 'Abnormal spacing' );
<html>
<head><title>Bongo</title></head>
<body>not chips<p id = fish >chips</p>also not chips</body></html>
HTML
_find_the_chips( <<'HTML', 'Unquoted broken across lines' );
<html>
<head><title>Bongo</title></head>
<body>not chips<p id
=
fish >chips</p>also not chips</body></html>
HTML
_find_the_chips( <<'HTML', 'Quoted broken across lines' );
<html>
<head><title>Bongo</title></head>
<body>not chips<p
id
=
"fish"
>
chips
</p>
also not chips</body></html>
HTML
};
sub _find_the_chips {
local $Test::Builder::Level = $Test::Builder::Level + 1;
my $html = shift;
my $msg = shift or die;
return subtest "_find_the_chips( $msg )" => sub {
plan tests => 2;
my $mech = Test::WWW::Mechanize->new( autolint => 0 );
isa_ok( $mech, 'Test::WWW::Mechanize' );
$mech->update_html( $html );
$mech->scraped_id_is( 'fish', 'chips' );
};
}
done_testing();
exit 0;
|