1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
|
#!/usr/bin/perl -w
use strict;
use Test::More 'no_plan';
use_ok 'URI::Find';
use_ok 'URI::Find::Schemeless';
my $No_joined = @ARGV && $ARGV[0] eq '--no-joined' ? shift : 0;
# %Run contains one entry for each type of finder. Keys are mnemonics,
# required to be a single letter. The values are hashes, keys are names
# (used only for output) and values are the subs which actually run the
# tests. Each is invoked with a reference to the text to scan and a
# code reference, and runs the finder on that text with that callback,
# returning the number of matches.
my %Run;
BEGIN {
%Run = (
# plain
P => {
old_interface => sub { run_function(\&find_uris, @_) },
regular => sub { run_object('URI::Find', @_) },
},
# schemeless
S => {
schemeless =>
sub { run_object('URI::Find::Schemeless', @_) },
},
);
die if grep { length != 1 } keys %Run;
}
# A spec is a reference to a 2-element list. The first is a string
# which contains the %Run keys which will find the URL, the second is
# the URL itself. Eg:
#
# [PS => 'http://www.foo.com/'] # found by both P and S
# [S => 'http://asdf.foo.com/'] # only found by S
#
# %Tests maps from input text to a list of specs which describe the URLs
# which will be found. If the value is a reference to an empty list, no
# URLs will be found in the key.
#
# As a special case, a %Tests value can be initialized as a string.
# This will be replaced with a spec which indicates that all finders
# will locate that as the only URL in the key.
my %Tests;
BEGIN {
my $all = join '', keys %Run;
# ARGH! URI::URL is inconsistant in how it normalizes URLs!
# HTTP URLs get a trailing slash, FTP and gopher do not.
%Tests = (
'Something something something.travel and stuff'
=> [[ S => 'http://something.travel/' ]],
'<URL:http://www.perl.com>' => 'http://www.perl.com/',
'<ftp://ftp.site.org>' => 'ftp://ftp.site.org',
'<ftp.site.org>' => [[ S => 'ftp://ftp.site.org' ]],
'Make sure "http://www.foo.com" is caught' =>
'http://www.foo.com/',
'http://www.foo.com' => 'http://www.foo.com/',
'www.foo.com' => [[ S => 'http://www.foo.com/' ]],
'ftp.foo.com' => [[ S => 'ftp://ftp.foo.com' ]],
'gopher://moo.foo.com' => 'gopher://moo.foo.com',
'I saw this site, http://www.foo.com, and its really neat!'
=> 'http://www.foo.com/',
'Foo Industries (at http://www.foo.com)'
=> 'http://www.foo.com/',
'Oh, dear. Another message from Dejanews. http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25 How fun.'
=> 'http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25',
'Hmmm, Storyserver from news.com. http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-200-1537811 How nice.'
=> [[S => 'http://news.com/'],
[$all => 'http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-200-1537811']],
'$html = get("http://www.perl.com/");' => 'http://www.perl.com/',
q|my $url = url('http://www.perl.com/cgi-bin/cpan_mod');|
=> 'http://www.perl.com/cgi-bin/cpan_mod',
'http://www.perl.org/support/online_support.html#mail'
=> 'http://www.perl.org/support/online_support.html#mail',
'irc.lightning.net irc.mcs.net'
=> [[S => 'http://irc.lightning.net/'],
[S => 'http://irc.mcs.net/']],
'foo.bar.xx/~baz/',
=> [[S => 'http://foo.bar.xx/~baz/']],
'foo.bar.xx/~baz/ abcd.efgh.mil, none.such/asdf/ hi.there.org'
=> [[S => 'http://foo.bar.xx/~baz/'],
[S => 'http://abcd.efgh.mil/'],
[S => 'http://hi.there.org/']],
'foo:<1.2.3.4>'
=> [[S => 'http://1.2.3.4/']],
'mail.eserv.com.au? failed before ? designated end'
=> [[S => 'http://mail.eserv.com.au/']],
'foo.info/himom ftp.bar.biz'
=> [[S => 'http://foo.info/himom'],
[S => 'ftp://ftp.bar.biz']],
'(http://round.com)' => 'http://round.com/',
'[http://square.com]' => 'http://square.com/',
'{http://brace.com}' => 'http://brace.com/',
'<http://angle.com>' => 'http://angle.com/',
'(round.com)' => [[S => 'http://round.com/' ]],
'[square.com]' => [[S => 'http://square.com/' ]],
'{brace.com}' => [[S => 'http://brace.com/' ]],
'<angle.com>' => [[S => 'http://angle.com/' ]],
'<x>intag.com</x>' => [[S => 'http://intag.com/' ]],
'[mailto:somebody@company.ext]' => 'mailto:somebody@company.ext',
'HTtp://MIXED-Case.Com' => 'http://mixed-case.com/',
"The technology of magnetic energy has become so powerful an entire ".
"house can...http://bit.ly/8yEdeb"
=> "http://bit.ly/8yEdeb",
# False tests
'HTTP::Request::Common' => [],
'comp.infosystems.www.authoring.cgi' => [],
'MIME/Lite.pm' => [],
'foo@bar.baz.com' => [],
'Foo.pm' => [],
'Foo.pl' => [],
'hi Foo.pm Foo.pl mom' => [],
'x comp.ai.nat-lang libdb.so.3 x' => [],
'x comp.ai.nat-lang libdb.so.3 x' => [],
'www.marselisl www.info@skive-hallerne.dk' => [],
# XXX broken
# q{$url = 'http://'.rand(1000000).'@anonymizer.com/'.$url;}
# => [],
);
# Convert plain string values to a list of 1 spec which indicates
# that all finders will find that as the only URL.
for (@Tests{keys %Tests}) {
$_ = [[$all, $_]] if !ref;
}
# Run everything together as one big test.
$Tests{join "\n", keys %Tests} = [map { @$_ } values %Tests]
unless $No_joined;
# Each test yields 3 tests for each finder (return value matches
# number returned, matches equal expected matches, text was not
# modified).
my $finders = 0;
$finders += keys %{ $Run{$_} } for keys %Run;
}
# Given a run type and a list of specs, return the URLs which that type
# should find.
sub specs_to_urls {
my ($this_type, @spec) = @_;
my @out;
for (@spec) {
my ($found_by_types, $url) = @$_;
push @out, $url if index($found_by_types, $this_type) >= 0;
}
return @out;
}
sub run_function {
my ($rfunc, $rtext, $callback) = @_;
return $rfunc->($rtext, $callback);
}
sub run_object {
my ($class, $rtext, $callback) = @_;
my $finder = $class->new($callback);
return $finder->find($rtext);
}
sub run {
my ($orig_text, @spec) = @_;
note "# testing [$orig_text]\n";
for my $run_type (keys %Run) {
note "# run type $run_type\n";
while( my($run_name, $run_sub) = each %{ $Run{$run_type} } ) {
note "# running $run_name\n";
my @want = specs_to_urls $run_type, @spec;
my $text = $orig_text;
my @out;
my $n = $run_sub->(\$text, sub { push @out, $_[0]; $_[1] });
is $n, @out, "return value length";
is_deeply \@out, \@want, "output" or diag("Original text: $text");
is $text, $orig_text, "text unmodified";
}
}
}
while( my($text, $rspec_list) = each %Tests ) {
run $text, @$rspec_list;
}
# We used to turn URI::URL strict on and leave it on.
for my $val (0, 1) {
URI::URL::strict($val);
my $f = URI::Find->new(sub { });
my $t = "foo";
$f->find(\$t);
is $val, URI::URL::strict(), "URI::URL::strict $val";
}
|