File: Find.t

package info (click to toggle)
liburi-find-perl 20100505-2
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 148 kB
  • ctags: 24
  • sloc: perl: 655; makefile: 38
file content (208 lines) | stat: -rw-r--r-- 8,401 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/perl -w

use strict;

use Test::More 'no_plan';

use_ok 'URI::Find';
use_ok 'URI::Find::Schemeless';

my $No_joined = @ARGV && $ARGV[0] eq '--no-joined' ? shift : 0;


# %Run contains one entry for each type of finder.  Keys are mnemonics,
# required to be a single letter.  The values are hashes, keys are names
# (used only for output) and values are the subs which actually run the
# tests.  Each is invoked with a reference to the text to scan and a
# code reference, and runs the finder on that text with that callback,
# returning the number of matches.

my %Run;
BEGIN {
    %Run = (
            # plain
            P => {
                  old_interface => sub { run_function(\&find_uris, @_) },
                  regular       => sub { run_object('URI::Find', @_) },
                 },
            # schemeless
            S => {
                  schemeless    =>
                      sub { run_object('URI::Find::Schemeless', @_) },
                 },
       );

    die if grep { length != 1 } keys %Run;
}

# A spec is a reference to a 2-element list.  The first is a string
# which contains the %Run keys which will find the URL, the second is
# the URL itself.  Eg:
#
#    [PS => 'http://www.foo.com/']      # found by both P and S
#    [S  => 'http://asdf.foo.com/']     # only found by S
#
# %Tests maps from input text to a list of specs which describe the URLs
# which will be found.  If the value is a reference to an empty list, no
# URLs will be found in the key.
#
# As a special case, a %Tests value can be initialized as a string.
# This will be replaced with a spec which indicates that all finders
# will locate that as the only URL in the key.

my %Tests;
BEGIN {
    my $all = join '', keys %Run;

    # ARGH!  URI::URL is inconsistant in how it normalizes URLs!
    # HTTP URLs get a trailing slash, FTP and gopher do not.
    %Tests = (
          'Something something something.travel and stuff'
              => [[ S => 'http://something.travel/' ]],
          '<URL:http://www.perl.com>' => 'http://www.perl.com/',
          '<ftp://ftp.site.org>'      => 'ftp://ftp.site.org',
          '<ftp.site.org>'            => [[ S => 'ftp://ftp.site.org' ]],
          'Make sure "http://www.foo.com" is caught' =>
                'http://www.foo.com/',
          'http://www.foo.com'  => 'http://www.foo.com/',
          'www.foo.com'         => [[ S => 'http://www.foo.com/' ]],
          'ftp.foo.com'         => [[ S => 'ftp://ftp.foo.com' ]],
          'gopher://moo.foo.com'        => 'gopher://moo.foo.com',
          'I saw this site, http://www.foo.com, and its really neat!'
              => 'http://www.foo.com/',
          'Foo Industries (at http://www.foo.com)'
              => 'http://www.foo.com/',
          'Oh, dear.  Another message from Dejanews.  http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25  How fun.'
              => 'http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25',
          'Hmmm, Storyserver from news.com.  http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-200-1537811  How nice.'
             => [[S => 'http://news.com/'],
                 [$all => 'http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-200-1537811']],
          '$html = get("http://www.perl.com/");' => 'http://www.perl.com/',
          q|my $url = url('http://www.perl.com/cgi-bin/cpan_mod');|
              => 'http://www.perl.com/cgi-bin/cpan_mod',
          'http://www.perl.org/support/online_support.html#mail'
              => 'http://www.perl.org/support/online_support.html#mail',
          'irc.lightning.net irc.mcs.net'
              => [[S => 'http://irc.lightning.net/'],
                  [S => 'http://irc.mcs.net/']],
          'foo.bar.xx/~baz/',
              => [[S => 'http://foo.bar.xx/~baz/']],
          'foo.bar.xx/~baz/ abcd.efgh.mil, none.such/asdf/ hi.there.org'
              => [[S => 'http://foo.bar.xx/~baz/'],
                  [S => 'http://abcd.efgh.mil/'],
                  [S => 'http://hi.there.org/']],
          'foo:<1.2.3.4>'
              => [[S => 'http://1.2.3.4/']],
          'mail.eserv.com.au?  failed before ? designated end'
              => [[S => 'http://mail.eserv.com.au/']],
          'foo.info/himom ftp.bar.biz'
              => [[S => 'http://foo.info/himom'],
                  [S => 'ftp://ftp.bar.biz']],
          '(http://round.com)'   => 'http://round.com/',
          '[http://square.com]'  => 'http://square.com/',
          '{http://brace.com}'   => 'http://brace.com/',
          '<http://angle.com>'   => 'http://angle.com/',
          '(round.com)'          => [[S => 'http://round.com/'  ]],
          '[square.com]'         => [[S => 'http://square.com/' ]],
          '{brace.com}'          => [[S => 'http://brace.com/'  ]],
          '<angle.com>'          => [[S => 'http://angle.com/'  ]],
          '<x>intag.com</x>'     => [[S => 'http://intag.com/'  ]],
          '[mailto:somebody@company.ext]' => 'mailto:somebody@company.ext',
          'HTtp://MIXED-Case.Com' => 'http://mixed-case.com/',
          "The technology of magnetic energy has become so powerful an entire ".
          "house can...http://bit.ly/8yEdeb"
            => "http://bit.ly/8yEdeb",

          # False tests
          'HTTP::Request::Common'                       => [],
          'comp.infosystems.www.authoring.cgi'          => [],
          'MIME/Lite.pm'                                => [],
          'foo@bar.baz.com'                             => [],
          'Foo.pm'                                      => [],
          'Foo.pl'                                      => [],
          'hi Foo.pm Foo.pl mom'                        => [],
          'x comp.ai.nat-lang libdb.so.3 x'             => [],
          'x comp.ai.nat-lang libdb.so.3 x'             => [],
          'www.marselisl www.info@skive-hallerne.dk'    => [],
# XXX broken
#         q{$url = 'http://'.rand(1000000).'@anonymizer.com/'.$url;}
#                                                       => [],
    );

    # Convert plain string values to a list of 1 spec which indicates
    # that all finders will find that as the only URL.
    for (@Tests{keys %Tests}) {
        $_ = [[$all, $_]] if !ref;
    }

    # Run everything together as one big test.
    $Tests{join "\n", keys %Tests} = [map { @$_ } values %Tests]
        unless $No_joined;

    # Each test yields 3 tests for each finder (return value matches
    # number returned, matches equal expected matches, text was not
    # modified).
    my $finders = 0;
    $finders += keys %{ $Run{$_} } for keys %Run;
}

# Given a run type and a list of specs, return the URLs which that type
# should find.

sub specs_to_urls {
    my ($this_type, @spec) = @_;
    my @out;

    for (@spec) {
        my ($found_by_types, $url) = @$_;
        push @out, $url if index($found_by_types, $this_type) >= 0;
    }

    return @out;
}

sub run_function {
    my ($rfunc, $rtext, $callback) = @_;

    return $rfunc->($rtext, $callback);
}

sub run_object {
    my ($class, $rtext, $callback) = @_;

    my $finder = $class->new($callback);
    return $finder->find($rtext);
}

sub run {
    my ($orig_text, @spec) = @_;

    note "# testing [$orig_text]\n";
    for my $run_type (keys %Run) {
        note "# run type $run_type\n";
        while( my($run_name, $run_sub) = each %{ $Run{$run_type} } ) {
            note "# running $run_name\n";
            my @want = specs_to_urls $run_type, @spec;
            my $text = $orig_text;
            my @out;
            my $n = $run_sub->(\$text, sub { push @out, $_[0]; $_[1] });
            is $n, @out, "return value length";
            is_deeply \@out, \@want, "output" or diag("Original text: $text");
            is $text, $orig_text, "text unmodified";
        }
    }
}

while( my($text, $rspec_list) = each %Tests ) {
    run $text, @$rspec_list;
}

# We used to turn URI::URL strict on and leave it on.

for my $val (0, 1) {
    URI::URL::strict($val);
    my $f = URI::Find->new(sub { });
    my $t = "foo";
    $f->find(\$t);
    is $val, URI::URL::strict(), "URI::URL::strict $val";
}