File: benchmark.pl

package info (click to toggle)
libhtml-treebuilder-libxml-perl 0.28-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 264 kB
  • sloc: perl: 533; makefile: 2; sh: 1
file content (42 lines) | stat: -rw-r--r-- 956 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/perl
use strict;
use warnings;
use FindBin;
use lib "$FindBin::Bin/../lib/";
use Web::Scraper;
use URI;
use Benchmark 'cmpthese';
use LWP::UserAgent;
use HTML::TreeBuilder::LibXML;

my $html = get_content();
my $scraper = scraper {
    process 'a > img', 'image[]' => '@src';
};

print "Web::Scraper: $Web::Scraper::VERSION\n";
print "HTML::TreeBuilder::XPath: $HTML::TreeBuilder::XPath::VERSION\n";
print "HTML::TreeBuilder::LibXML: $HTML::TreeBuilder::LibXML::VERSION\n";
cmpthese(50, {
    no_libxml  => \&no_libxml,
    use_libxml => \&use_libxml,
});

sub no_libxml {
    my $res = $scraper->scrape($html);
}

sub use_libxml {
    local *HTML::TreeBuilder::XPath::new = sub {
        HTML::TreeBuilder::LibXML->new();
    };
    my $res = $scraper->scrape($html);
}

sub get_content {
    my $ua = LWP::UserAgent->new;
    my $res = $ua->get('http://www.nicovideo.jp/');
    $res->is_success or die 'cannot get html';
    $res->content;
}