#===========================================================================

package Sitescooper::PreloadURLProcessor;

use Sitescooper::URLProcessor;
use Carp;

use strict;

use vars qw(
		@ISA
	);

@ISA = qw(Sitescooper::URLProcessor);

# ---------------------------------------------------------------------------

sub new {
  my $class = shift; $class = ref($class) || $class;
  my ($scoop, $robot, $scf, $ref, $url) = @_;
  my $self = $class->SUPER::new($scoop, $robot, $scf, $ref, $url);

  # bless ($self, $class);
  $self;
}

# ---------------------------------------------------------------------------

sub start_get {
  my $self = shift;
  my $url = $self->{url};

  if ($self->get_state() != $Sitescooper::URLProcessor::STATE_PRE_GET) {
    croak ("state != STATE_PRE_GET");
  }

  my $fullurl = $url; $url = Sitescooper::Util::URLWithoutAnchor ($url);
  study $url;

  if ($url =~ m,^(ftp|mailto|https|gopher|pnm)://,) {
    $self->{scoop}->dbg ("Non-story URL ignored (bad protocol): $fullurl");
    return;
  }

  my $newurl = $self->apply_url_preproc($url);

  if (!defined $newurl) {
    $self->{scoop}->dbg ("URLProcess says URL should be ignored: $fullurl"); return;
  } elsif ($newurl ne $url) {
    $fullurl = $newurl; $url = Sitescooper::Util::URLWithoutAnchor ($newurl);
    $self->{url} = $url;
  }

  if ($self->{scoop}->{cf}->{use_only_cache}) {
    $self->dbg("-fromcache switch is on, not preloading");
    return;
  }

  $self->{scoop}->verbose ("Preloading: $url");

  $self->{http_state} =
      $self->{scoop}->{httpclient}->start_get ($self->{referrer}, $url, undef);
  if (!defined $self->{http_state}) {
    croak "http_state is unset after start_get";
  }

  $self->set_state ($Sitescooper::URLProcessor::STATE_NET_WAIT);
  1;
}

# ---------------------------------------------------------------------------

sub finish_get {
  my $self = shift;

  $self->set_state ($Sitescooper::URLProcessor::STATE_POST_GET);
  if ($Sitescooper::Main::got_intr_flag) { return; }

  my $url = $self->{url};

  my $resp;
  if (!defined $self->{http_state}) {
    croak "http_state is unset in get_url_reply";
  }
  $resp = $self->{scoop}->{httpclient}->finish_get ($self->{http_state});
  $self->{http_state} = undef;

  if (!$resp->is_success) {
    $self->sitewarn  ("Preload GET failed: ".$resp->status_line." ($url)");
    return undef;
  }

  $self->{scoop}->{preloaded_responses}->{$url} = $resp->content;
  1;
}

# ---------------------------------------------------------------------------

1;
