1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
#===========================================================================
#
package Sitescooper::CacheObject;
require Exporter;
use Carp;
use Sitescooper::Main;
use Sitescooper::PerSiteCache;
@ISA = qw();
use vars qw{ @ISA $SLASH };
use strict;
# ---------------------------------------------------------------------------
sub new {
my $class = shift; $class = ref($class) || $class;
my ($main, $cache, $pagehtml, $lastmod) = @_;
my $self = {
'main' => $main,
'cache' => $cache,
'pagehtml' => $pagehtml,
'lastmod' => $lastmod,
};
bless ($self, $class);
$self;
}
# ---------------------------------------------------------------------------
sub get_page {
my ($self) = @_;
$self->{pagehtml};
}
sub get_lastmod {
my ($self) = @_;
$self->{lastmod};
}
# ---------------------------------------------------------------------------
# ensure that the cleaned-up HTML doesn't match the cleaned-up cached
# HTML. Sometimes the ad banners will be the only things that have
# changed between retrieves, and html_to_text will have stripped those
# out.
#
sub text_matches_cached_text {
my ($self, $url, $urlproc, $text) = @_;
my $html = $urlproc->strip_story ($url, $self->{pagehtml}, " (cached)");
$html = $urlproc->html_to_text
($url, $html, $self->{main}->{cf}->{output_style});
if ($urlproc->text_equals ($text, $html)) {
return 1;
} else {
return 0;
}
}
# ---------------------------------------------------------------------------
1;
|