File: test-range

package info (click to toggle)
libhttp-cache-transparent-perl 1.4-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 144 kB
  • sloc: perl: 342; makefile: 4
file content (63 lines) | stat: -rwxr-xr-x 2,213 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/perl -w
#  Copyright (C) 2002 Nigel Horne <njh@bandsman.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA.

use strict;
use WWW::RobotRules::AnyDBM_File;
use LWP::RobotUA;	# for spidering and cache lookup
use HTTP::Cache::Transparent;

HTTP::Cache::Transparent::init( { BasePath => '/tmp/cache',
                                Verbose => 1 } );

my $url = "http://www.bandsman.co.uk";
my $rules = WWW::RobotRules::AnyDBM_File->new('www.bandsman.co.uk/Spider', '/tmp/robots.cache');

my $robot = LWP::RobotUA->new('www.bandsman.co.uk/Spider', 'njh@despammed.com', $rules);
$robot->timeout(20);
# $robot->delay(1/60);	# wait 1 second between accesses to same site
$robot->delay(0);
$robot->protocols_allowed(['http']);	# disabling all others

my $request = new HTTP::Request 'GET' => $url;
$request->header('Accept' => 'text/html');
$robot->max_size(2048);
$request->header('Accept-Encoding' => 'gzip; deflate',
	'Referer' => 'http://www.bandsman.co.uk');

my $webdoc = $robot->simple_request($request);

if(!$webdoc->is_success) {
	die $webdoc->status_line . "\n";
	exit 1;
}
my $content = $webdoc->content;

if(my $encoding = $webdoc->content_encoding) {
	if($encoding =~ /gzip/i) {
		$content = Compress::Zlib::memGunzip($content);
		if(!defined $content) {
			die "$url can't be gunziped\n";
		}
	} elsif($encoding  =~ /deflate/i) {
		$content = Compress::Zlib::uncompress($content);
		if(!defined $content) {
			die "$url can't be uncompressed\n";
		}
	}
}
print "$url: " . length($content) . " bytes\n";
print $content . "\n";