File: LWPHTTPClient.pm

package info (click to toggle)
sitescooper 3.1.2-1
  • links: PTS
  • area: main
  • in suites: sarge, woody
  • size: 3,000 kB
  • ctags: 662
  • sloc: perl: 8,677; makefile: 105
file content (140 lines) | stat: -rw-r--r-- 3,479 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#===========================================================================

package Sitescooper::LWPHTTPClient;

require Exporter;
use Carp;
use Sitescooper::HTTPClient;

@ISA = qw(Sitescooper::HTTPClient);
@EXPORT= qw();
$VERSION = "0.1";
sub Version { $VERSION; }

use strict;

sub new {
  my $class = shift; $class = ref($class) || $class;
  my $scoop = shift;
  croak "scoop not defd" unless defined ($scoop);

  # the state object is static; it doesn't store anything
  # at the moment. We create it here to avoid freeing and
  # reallocing it continually.
  my $self = {
    'scoop'		=> $scoop,
    'state'		=> new Sitescooper::HTTPRequestState(),
  };

  bless ($self, $class);
  $self;
}

sub init {
  my ($self) = @_;
  # nothing needed here at this point
}

sub get_max_active_requests {
  my ($self) = @_;
  1;
}


sub can_preload {
  my ($self) = @_;
  0;
}   

# ===========================================================================

sub start_get {
  my ($self, $referrer, $url, $lastmod, $is_dynamic) = @_;

  my $req = $self->make_http_request ($referrer, $url, $lastmod, $is_dynamic);
  my $timeout = 10;
  my $resp = $self->invoke_http_request ($req, $timeout);
  $self->{state}->{resp} = $resp;
  $self->{state};
}

sub get_waiting_fh {
  my ($self, $state) = @_;
  undef;
}

sub ready_to_finish {
  my ($self, $state) = @_;
  1;
}

sub finish_get {
  my ($self, $state) = @_;
  croak "state != self->state" unless ($self->{state} == $state);
  my $resp = $state->{resp};
  delete $state->{resp};
  $resp;
}

# ===========================================================================

sub make_http_request {                 # static, called by Preloaders
  my ($self, $referrer, $url, $lastmod, $is_dynamic) = @_;

  # REVISIT - support POST.
  my $req = new HTTP::Request ('GET', $url);
  $self->{scoop}->{useragent}->add_proxy_auth_to_request ($req);

  # TODO -- support other languages via config file directive
  $req->header ("Accept-Language" => "en",
        "Accept-Charset" => "iso-8859-1,*,utf-8");

  if (defined $lastmod) {
    $req->header ("If-Modified-Since" => HTTP::Date::time2str ($lastmod));
  }
  if (defined $referrer) {
    $req->referer ($referrer);
  }
  if (($is_dynamic && !defined $lastmod) || ($self->{scoop}->{cf}->{badcache})) {
    $req->header ("Cache-Control" => "no-cache");
    $req->header ("Pragma" => "no-cache");
  }

  # cookie_jar will assume that it's a HTTP request. Reasonable enough
  # I suppose, but we need to make sure ourselves.
  if ($url =~ /^http:/i) { $self->{scoop}->{cookie_jar}->add_cookie_header($req); }
  $req;
}

# ---------------------------------------------------------------------------

sub invoke_http_request {               # static, called by Preloaders
  my ($self, $req, $timeout) = @_;
  my $resp = undef;

  # use the version of request which can handle redirect-replies which
  # set cookies at the same time.
  my $cmd = '
    $resp = $self->{scoop}->{useragent}->request_handle_cookie_redirects
					($self->{scoop}->{cookie_jar}, $req);
  ';

  $self->{scoop}->{useragent}->clear_last_auth_realm();

  # REVISIT -- implement timeout for Win32/Mac perl
  if ($self->{scoop}->MyOS() eq 'UNIX') {
    eval '
      local $SIG{"ALRM"} = sub { die "alarm\n" };
      alarm $timeout*60; { ' . $cmd. ' } alarm 0;
    ';
  } else {
    eval $cmd;
  }

  die if $@ && $@ ne "alarm\n";
  $resp;
}

# ---------------------------------------------------------------------------

1;