File: CacheObject.pm

package info (click to toggle)
sitescooper 3.1.2-1
  • links: PTS
  • area: main
  • in suites: sarge, woody
  • size: 3,000 kB
  • ctags: 662
  • sloc: perl: 8,677; makefile: 105
file content (69 lines) | stat: -rw-r--r-- 1,516 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#===========================================================================
# 

package Sitescooper::CacheObject;

require Exporter;
use Carp;

use Sitescooper::Main;
use Sitescooper::PerSiteCache;

@ISA = qw();
use vars qw{ @ISA $SLASH };
use strict;

# ---------------------------------------------------------------------------

sub new {
  my $class = shift; $class = ref($class) || $class;

  my ($main, $cache, $pagehtml, $lastmod) = @_;

  my $self = {
    'main'		=> $main,
    'cache'		=> $cache,
    'pagehtml'		=> $pagehtml,
    'lastmod'		=> $lastmod,
  };

  bless ($self, $class);
  $self;
}

# ---------------------------------------------------------------------------

sub get_page {
  my ($self) = @_;
  $self->{pagehtml};
}

sub get_lastmod {
  my ($self) = @_;
  $self->{lastmod};
}

# ---------------------------------------------------------------------------

# ensure that the cleaned-up HTML doesn't match the cleaned-up cached
# HTML. Sometimes the ad banners will be the only things that have
# changed between retrieves, and html_to_text will have stripped those
# out.
#
sub text_matches_cached_text {
  my ($self, $url, $urlproc, $text) = @_;

  my $html = $urlproc->strip_story ($url, $self->{pagehtml}, " (cached)");
  $html = $urlproc->html_to_text
  		($url, $html, $self->{main}->{cf}->{output_style});
  
  if ($urlproc->text_equals ($text, $html)) {
    return 1;
  } else {
    return 0;
  }
}

# ---------------------------------------------------------------------------

1;