1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
#===========================================================================
package Sitescooper::LWPHTTPClient;
require Exporter;
use Carp;
use Sitescooper::HTTPClient;
@ISA = qw(Sitescooper::HTTPClient);
@EXPORT= qw();
$VERSION = "0.1";
sub Version { $VERSION; }
use strict;
sub new {
my $class = shift; $class = ref($class) || $class;
my $scoop = shift;
croak "scoop not defd" unless defined ($scoop);
# the state object is static; it doesn't store anything
# at the moment. We create it here to avoid freeing and
# reallocing it continually.
my $self = {
'scoop' => $scoop,
'state' => new Sitescooper::HTTPRequestState(),
};
bless ($self, $class);
$self;
}
sub init {
my ($self) = @_;
# nothing needed here at this point
}
sub get_max_active_requests {
my ($self) = @_;
1;
}
sub can_preload {
my ($self) = @_;
0;
}
# ===========================================================================
sub start_get {
my ($self, $referrer, $url, $lastmod, $is_dynamic) = @_;
my $req = $self->make_http_request ($referrer, $url, $lastmod, $is_dynamic);
my $timeout = 10;
my $resp = $self->invoke_http_request ($req, $timeout);
$self->{state}->{resp} = $resp;
$self->{state};
}
sub get_waiting_fh {
my ($self, $state) = @_;
undef;
}
sub ready_to_finish {
my ($self, $state) = @_;
1;
}
sub finish_get {
my ($self, $state) = @_;
croak "state != self->state" unless ($self->{state} == $state);
my $resp = $state->{resp};
delete $state->{resp};
$resp;
}
# ===========================================================================
sub make_http_request { # static, called by Preloaders
my ($self, $referrer, $url, $lastmod, $is_dynamic) = @_;
# REVISIT - support POST.
my $req = new HTTP::Request ('GET', $url);
$self->{scoop}->{useragent}->add_proxy_auth_to_request ($req);
# TODO -- support other languages via config file directive
$req->header ("Accept-Language" => "en",
"Accept-Charset" => "iso-8859-1,*,utf-8");
if (defined $lastmod) {
$req->header ("If-Modified-Since" => HTTP::Date::time2str ($lastmod));
}
if (defined $referrer) {
$req->referer ($referrer);
}
if (($is_dynamic && !defined $lastmod) || ($self->{scoop}->{cf}->{badcache})) {
$req->header ("Cache-Control" => "no-cache");
$req->header ("Pragma" => "no-cache");
}
# cookie_jar will assume that it's a HTTP request. Reasonable enough
# I suppose, but we need to make sure ourselves.
if ($url =~ /^http:/i) { $self->{scoop}->{cookie_jar}->add_cookie_header($req); }
$req;
}
# ---------------------------------------------------------------------------
sub invoke_http_request { # static, called by Preloaders
my ($self, $req, $timeout) = @_;
my $resp = undef;
# use the version of request which can handle redirect-replies which
# set cookies at the same time.
my $cmd = '
$resp = $self->{scoop}->{useragent}->request_handle_cookie_redirects
($self->{scoop}->{cookie_jar}, $req);
';
$self->{scoop}->{useragent}->clear_last_auth_realm();
# REVISIT -- implement timeout for Win32/Mac perl
if ($self->{scoop}->MyOS() eq 'UNIX') {
eval '
local $SIG{"ALRM"} = sub { die "alarm\n" };
alarm $timeout*60; { ' . $cmd. ' } alarm 0;
';
} else {
eval $cmd;
}
die if $@ && $@ ne "alarm\n";
$resp;
}
# ---------------------------------------------------------------------------
1;
|