File: PreloadURLProcessor.pm

package info (click to toggle)
sitescooper 3.1.2-1
  • links: PTS
  • area: main
  • in suites: sarge, woody
  • size: 3,000 kB
  • ctags: 662
  • sloc: perl: 8,677; makefile: 105
file content (99 lines) | stat: -rw-r--r-- 2,523 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#===========================================================================

package Sitescooper::PreloadURLProcessor;

use Sitescooper::URLProcessor;
use Carp;

use strict;

use vars qw(
		@ISA
	);

@ISA = qw(Sitescooper::URLProcessor);

# ---------------------------------------------------------------------------

sub new {
  my $class = shift; $class = ref($class) || $class;
  my ($scoop, $robot, $scf, $ref, $url) = @_;
  my $self = $class->SUPER::new($scoop, $robot, $scf, $ref, $url);

  # bless ($self, $class);
  $self;
}

# ---------------------------------------------------------------------------

sub start_get {
  my $self = shift;
  my $url = $self->{url};

  if ($self->get_state() != $Sitescooper::URLProcessor::STATE_PRE_GET) {
    croak ("state != STATE_PRE_GET");
  }

  my $fullurl = $url; $url = Sitescooper::Util::URLWithoutAnchor ($url);
  study $url;

  if ($url =~ m,^(ftp|mailto|https|gopher|pnm)://,) {
    $self->{scoop}->dbg ("Non-story URL ignored (bad protocol): $fullurl");
    return;
  }

  my $newurl = $self->apply_url_preproc($url);

  if (!defined $newurl) {
    $self->{scoop}->dbg ("URLProcess says URL should be ignored: $fullurl"); return;
  } elsif ($newurl ne $url) {
    $fullurl = $newurl; $url = Sitescooper::Util::URLWithoutAnchor ($newurl);
    $self->{url} = $url;
  }

  if ($self->{scoop}->{cf}->{use_only_cache}) {
    $self->dbg("-fromcache switch is on, not preloading");
    return;
  }

  $self->{scoop}->verbose ("Preloading: $url");

  $self->{http_state} =
      $self->{scoop}->{httpclient}->start_get ($self->{referrer}, $url, undef);
  if (!defined $self->{http_state}) {
    croak "http_state is unset after start_get";
  }

  $self->set_state ($Sitescooper::URLProcessor::STATE_NET_WAIT);
  1;
}

# ---------------------------------------------------------------------------

sub finish_get {
  my $self = shift;

  $self->set_state ($Sitescooper::URLProcessor::STATE_POST_GET);
  if ($Sitescooper::Main::got_intr_flag) { return; }

  my $url = $self->{url};

  my $resp;
  if (!defined $self->{http_state}) {
    croak "http_state is unset in get_url_reply";
  }
  $resp = $self->{scoop}->{httpclient}->finish_get ($self->{http_state});
  $self->{http_state} = undef;

  if (!$resp->is_success) {
    $self->sitewarn  ("Preload GET failed: ".$resp->status_line." ($url)");
    return undef;
  }

  $self->{scoop}->{preloaded_responses}->{$url} = $resp->content;
  1;
}

# ---------------------------------------------------------------------------

1;