File: find_link.t

package info (click to toggle)
libwww-mechanize-perl 2.03-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 988 kB
  • sloc: perl: 4,088; makefile: 6
file content (171 lines) | stat: -rw-r--r-- 6,425 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!perl -T

use warnings;
use strict;

use Test::More;
use URI::file;

BEGIN {
    delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)};  # Placates taint-unsafe Cwd.pm in 5.6.1
    use_ok( 'WWW::Mechanize' );
}

my $mech = WWW::Mechanize->new( cookie_jar => undef );
isa_ok( $mech, 'WWW::Mechanize' );

my $uri = URI::file->new_abs( 't/find_link.html' )->as_string;

$mech->get( $uri );
ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page};

my $x;
$x = $mech->find_link();
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://www.drphil.com/', 'First link on the page' );
is( $x->url, 'http://www.drphil.com/', 'First link on the page' );

$x = $mech->find_link( n => 3 );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'styles.css', 'Third link should be the CSS' );
is( $x->url, 'styles.css', 'Third link should be the CSS' );

$x = $mech->find_link( url_regex => qr/upcase/i );
isa_ok( $x, 'WWW::Mechanize::Link' );
like( $x->url, qr/\Qupcase.com/i, 'found link in uppercase meta tag' );

$x = $mech->find_link( text => 'CPAN A' );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://a.cpan.org/', 'First CPAN link' );
is( $x->url, 'http://a.cpan.org/', 'First CPAN link' );

$x = $mech->find_link( url => 'CPAN' );
ok( !defined $x, 'No url matching CPAN' );

$x = $mech->find_link( text_regex => qr/CPAN/, n=>3 );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://c.cpan.org/', '3rd CPAN text' );
is( $x->url, 'http://c.cpan.org/', '3rd CPAN text' );

$x = $mech->find_link( text => 'CPAN', n=>34 );
ok( !defined $x, 'No 34th CPAN text' );

$x = $mech->find_link( text_regex => qr/(?i:cpan)/ );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://a.cpan.org/', 'Got 1st cpan via regex' );
is( $x->url, 'http://a.cpan.org/', 'Got 1st cpan via regex' );

$x = $mech->find_link( text_regex => qr/cpan/i );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://a.cpan.org/', 'Got 1st cpan via regex' );
is( $x->url, 'http://a.cpan.org/', 'Got 1st cpan via regex' );

$x = $mech->find_link( text_regex => qr/cpan/i, n=>153 );
ok( !defined $x, 'No 153rd cpan link' );

$x = $mech->find_link( url => 'http://b.cpan.org/' );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://b.cpan.org/', 'Got b.cpan.org' );
is( $x->url, 'http://b.cpan.org/', 'Got b.cpan.org' );

$x = $mech->find_link( url => 'http://b.cpan.org', n=>2 );
ok( !defined $x, 'Not a second b.cpan.org' );

$x = $mech->find_link( url_regex => qr/[b-d]\.cpan\.org/, n=>2 );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://c.cpan.org/', 'Got c.cpan.org' );
is( $x->url, 'http://c.cpan.org/', 'Got c.cpan.org' );

my @wanted_links= (
   [ 'http://a.cpan.org/', 'CPAN A', undef, 'a' ],
   [ 'http://b.cpan.org/', 'CPAN B', undef, 'a' ],
   [ 'http://c.cpan.org/', 'CPAN C', 'bongo', 'a' ],
   [ 'http://d.cpan.org/', 'CPAN D', undef, 'a' ],
);
my @links = $mech->find_all_links( text_regex => qr/CPAN/ );
@{$_} = @{$_}[0..3] for @links;
is_deeply( \@links, \@wanted_links, 'Correct links came back' );

my $linkref = $mech->find_all_links( text_regex => qr/CPAN/ );
is_deeply( $linkref, \@wanted_links, 'Correct links came back' );

# Check combinations of links
$x = $mech->find_link( text => 'News' );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://www.msnbc.com/', 'First News is MSNBC' );
is( $x->url, 'http://www.msnbc.com/', 'First News is MSNBC' );

$x = $mech->find_link( text => 'News', url_regex => qr/bbc/ );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://www.bbc.co.uk/', 'First BBC news link' );
is( $x->url, 'http://www.bbc.co.uk/', 'First BBC news link' );
is( $x->[1], 'News', 'First BBC news text' );
is( $x->text, 'News', 'First BBC news text' );

$x = $mech->find_link( text => 'News', url_regex => qr/cnn/ );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://www.cnn.com/', 'First CNN news link' );
is( $x->url, 'http://www.cnn.com/', 'First CNN news link' );
is( $x->[1], 'News', 'First CNN news text' );
is( $x->text, 'News', 'First CNN news text' );

AREA_CHECKS: {
    my @wanted_links = (
        [ 'http://www.cnn.com/', 'CNN', undef, 'a' ],
        [ 'http://www.cnn.com/', 'News', 'Fred', 'a' ],
        # Can someone confirm that I just fixed a bug here, and
        # area tags /should/ have names? -mls
        [ 'http://www.cnn.com/area', undef, 'Marty', 'area' ],
    );
    my @links = $mech->find_all_links( url_regex => qr/cnn\.com/ );
    @{$_} = @{$_}[0..3] for @links;
    is_deeply( \@links, \@wanted_links, 'Correct links came back' );
}

$x = $mech->find_link( name => 'bongo' );
isa_ok( $x, 'WWW::Mechanize::Link' );
is_deeply( $x, [ 'http://c.cpan.org/', 'CPAN C', 'bongo', 'a' ], 'Got the CPAN C link' );

$x = $mech->find_link( name_regex => qr/^[A-Z]/, n => 2 );
isa_ok( $x, 'WWW::Mechanize::Link' );
is_deeply( $x, [ 'http://www.cnn.com/', 'News', 'Fred', 'a' ], 'Got 2nd link that begins with a capital' );

$x = $mech->find_link( tag => 'a', n => 3 );
isa_ok( $x, 'WWW::Mechanize::Link' );
is_deeply( $x, [ 'http://b.cpan.org/', 'CPAN B', undef, 'a' ], 'Got 3rd <A> tag' );

$x = $mech->find_link( tag_regex => qr/^(a|frame)$/, n => 7 );
isa_ok( $x, 'WWW::Mechanize::Link' );
is_deeply( $x, [ 'http://d.cpan.org/', 'CPAN D', undef, 'a' ], 'Got 7th <A> or <FRAME> tag' );

$x = $mech->find_link( text => 'Rebuild Index' );
isa_ok( $x, 'WWW::Mechanize::Link' );
is_deeply( [@{$x}[0..3]], [ '/cgi-bin/MT/mt.cgi', 'Rebuild Index', undef, 'a' ], 'Got the JavaScript link' );

$x = $mech->find_link( url => 'blongo.html' );
isa_ok( $x, 'WWW::Mechanize::Link' );

$x = $mech->find_link( url_abs => 'blongo.html' );
ok( !defined $x, 'No match' );

$x = $mech->find_link( url_abs_regex => qr[t/blongo\.html$] );
isa_ok( $x, 'WWW::Mechanize::Link' );

$x = $mech->find_link( text_regex => qr/click/i);
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'http://www.yahoo.com/', 'Got js url link' );
is( $x->url, 'http://www.yahoo.com/', 'Got js url link' );

$x = $mech->find_link( rel => 'icon' );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'foo.png', 'Got icon url link' );

$x = $mech->find_link( rel_regex => qr/sheet/i );
isa_ok( $x, 'WWW::Mechanize::Link' );
is( $x->[0], 'styles.css', 'Got stylesheet url link' );

$mech->get( URI::file->new_abs('t/refresh.html') );
my $link = $mech->find_link( tag => 'meta' );
is( $link->url, 'http://www.mysite.com/', 'got link from meta tag via tag search' );

done_testing();