File: Reuse.pm

package info (click to toggle)
spamassassin 4.0.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 22,988 kB
  • sloc: perl: 88,863; ansic: 5,193; sh: 3,737; javascript: 339; sql: 295; makefile: 209; python: 49
file content (295 lines) | stat: -rw-r--r-- 8,491 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
=head1 NAME

Mail::SpamAssassin::Plugin::Reuse - For reusing old rule hits during a mass-check

=head1 SYNOPSIS

  loadplugin    Mail::SpamAssassin::Plugin::Reuse

  ifplugin      Mail::SpamAssassin::Plugin::Reuse

  reuse NETWORK_RULE [ NETWORK_RULE_OLD_NAME ]

  run_reuse_tests_only 0/1

  endif

=head1 DESCRIPTION

The purpose of this plugin is to work in conjunction with B<mass-check
--reuse> to map rules hit in input messages to rule hits in the
mass-check output.

run_reuse_tests_only 1 is special option for spamassassin/spamd use.
Only reuse flagged tests will be run. It will also _enable_ network/DNS
lookups. This is mainly intended for fast mass processing of corpus
messages, so they can be properly reused later. For example:
  spamd --pre="loadmodule Mail::SpamAssassin::Plugin::Reuse" \
    --pre="run_reuse_tests_only 1" ...
Such dedicated spamd could be scripted to add X-Spam-Status header to
messages efficiently.

=cut

package Mail::SpamAssassin::Plugin::Reuse;

# use bytes;
use strict;
use warnings;
use re 'taint';

use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Logger;
use Mail::SpamAssassin::Constants qw(:sa);

our @ISA = qw(Mail::SpamAssassin::Plugin);

my $RULENAME_RE = RULENAME_RE;

# constructor
sub new {
  my $invocant = shift;
  my $samain = shift;

  # some boilerplate...
  my $class = ref($invocant) || $invocant;
  my $self = $class->SUPER::new($samain);
  bless ($self, $class);

  $self->set_config($samain->{conf});
  # make sure we run last (or close) of the finish_parsing_end since
  # we need all other rules to be defined
  $self->register_method_priority("finish_parsing_start", 100);
  return $self;
}

sub set_config {
  my ($self, $conf) = @_;
  my @cmds;

  # reuse CURRENT_NAME ADDITIONAL_NAMES_IN_INPUT ...
  # e.g.
  # reuse NET_TEST_V1 NET_TEST_V0

  push (@cmds, {
    setting => 'reuse',
    type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
    code => sub {
      my ($conf, $key, $value, $line) = @_;

      if ($value !~ /^\s*(${RULENAME_RE})(?:\s+(${RULENAME_RE}(?:\s+${RULENAME_RE})*))?\s*$/) {
        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
      }

      my $new_name = $1;
      my @old_names = ($new_name);
      if (defined $2) {
        push @old_names, split (/\s+/, $2);
      }

      dbg("reuse: read rule, old: %s new: %s", join(' ', @old_names), $new_name);
  
      foreach my $old (@old_names) {
        push @{$conf->{reuse_tests}->{$new_name}}, $old;
      }
    }
  });

  push(@cmds, {
    setting => 'run_reuse_tests_only',
    default => 0,
    type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL,
  });

  $conf->{parser}->register_commands(\@cmds);
}

sub finish_parsing_start {
  my ($self, $opts) = @_;

  my $conf = $opts->{conf};
  my $tflags = $conf->{tflags};

  while (my($rulename,$tfl) = each %{$tflags}) {
    if ($tfl =~ /\bnet\b/ && !exists $conf->{reuse_tests}->{$rulename}) {
      dbg("reuse: forcing reuse of net rule $rulename");
      push @{$conf->{reuse_tests}->{$rulename}}, $rulename;
    }
  }

  return 0 if (!exists $conf->{reuse_tests});

  if ($conf->{run_reuse_tests_only}) {
    # simply delete all rules not reuse
    foreach (keys %{$conf->{tests}}) {
      if (!defined $conf->{reuse_tests}->{$_}) {
        delete $conf->{tests}->{$_};
      }
    }
    return 0;
  }

  foreach my $rule_name (keys %{$conf->{reuse_tests}}) {

    # If the rule does not exist, add a new EMPTY test, set default score
    if (!exists $conf->{tests}->{$rule_name}) {
      dbg("reuse: $rule_name does not exist, adding empty test");
      $conf->{parser}->add_test($rule_name, undef, $Mail::SpamAssassin::Conf::TYPE_EMPTY_TESTS);
    }
    if (!exists $conf->{scores}->{$rule_name}) {
      my $set_score = ($rule_name =~/^T_/) ? 0.01 : 1.0;
      $set_score = -$set_score if ( ($tflags->{$rule_name}||'') =~ /\bnice\b/ );
      foreach my $ss (0..3) {
        $conf->{scoreset}->[$ss]->{$rule_name} = $set_score;
      }
    }

    # Figure out when to add any hits -- grab priority and "stage"
    my $priority = $conf->{priority}->{$rule_name} || 0;
    my $stage = $self->_get_stage_from_rule($conf, $rule_name);
    $conf->{reuse_tests_order}->{$rule_name} = [ $priority, $stage ];

  }
}

sub check_start {
  my ($self, $opts) = @_;

  my $pms = $opts->{permsgstatus};
  my $conf = $pms->{conf};
  my $scoreset = $conf->{scoreset};

  return 0 if $conf->{run_reuse_tests_only};

  # Can we reuse?
  my $msg = $pms->get_message();

  unless (exists $msg->{metadata}->{reuse_tests_hit}) {
    dbg("reuse: no old test hits passed in");
    return 0;
  }
  my $old_hash = $msg->{metadata}->{reuse_tests_hit};

  # now go through the rules and priorities and figure out which ones
  # need to be disabled
  foreach my $rule (keys %{$conf->{reuse_tests}}) {

    my ($priority, $stage) = @{$conf->{reuse_tests_order}->{$rule}};

    # score set could change after check_start but before we add hits,
    # so we need to disable the rule in all sets
    my @dis;
    foreach my $ss (0..3) {
      if (exists $scoreset->[$ss]->{$rule}) {
        $pms->{reuse_old_scores}->{$rule}->[$ss] =
          $scoreset->[$ss]->{$rule};
        $scoreset->[$ss]->{$rule} = 0;
        push @dis, $ss;
      }
    }
    dbg("reuse: disabling rule $rule in score sets %s",
      join(',', @dis)) if @dis;

    # now, check for hits
    foreach my $old_test (@{$conf->{reuse_tests}->{$rule}}) {
      if ($old_hash->{$old_test}) {
        push @{$pms->{reuse_hits_to_add}->{"$priority $stage"}}, $rule;
        dbg("reuse: rule $rule hit, will add at priority $priority, stage " .
           "$stage");
        last;
      } else {
        # Make sure rule is marked ready for meta rules
        $pms->rule_ready($rule);
      }
    }
  }
}

sub check_end {
  my ($self, $opts) = @_;

  my $pms = $opts->{permsgstatus};
  my $conf = $pms->{conf};
  my $scoreset = $conf->{scoreset};

  return 0 if $conf->{run_reuse_tests_only};

  foreach my $disabled_rule (keys %{$pms->{reuse_old_scores}}) {
    foreach my $ss (0..3) {
      next unless exists $scoreset->[$ss]->{$disabled_rule};
      $scoreset->[$ss]->{$disabled_rule} =
        $pms->{reuse_old_scores}->{$disabled_rule}->[$ss];
    }
  }

  delete $pms->{reuse_old_scores};
}

sub start_rules {
  my ($self, $opts) = @_;

  my $pms = $opts->{permsgstatus};

  return 0 if $pms->{conf}->{run_reuse_tests_only};

  return $self->_add_hits($pms, $opts->{priority}, $opts->{ruletype});
}

sub _add_hits {
  my ($self, $pms, $priority, $stage) = @_;

  return unless exists $pms->{reuse_hits_to_add}->{"$priority $stage"};
  return if exists $pms->{reuse_hits_done}->{"$priority $stage"};
  foreach my $rule (@{$pms->{reuse_hits_to_add}->{"$priority $stage"}}) {
    # Add hit even if rule was originally disabled
    my $ss = $pms->{conf}->get_score_set();
    $pms->{conf}->{scores}->{$rule} =
      $pms->{reuse_old_scores}->{$rule}->[$ss] || 0.001;

    dbg("reuse: registering hit for $rule: score: " .
       $pms->{conf}->{scores}->{$rule});
    $pms->got_hit($rule);

    $pms->{conf}->{scores}->{$rule} = 0;
  }
  $pms->{reuse_hits_done}->{"$priority $stage"} = 1;
}

my %type_to_stage = (
  $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS    => "head",
  $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS    => "eval",
  $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS    => "body",
  $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS    => "eval",
  $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS    => "full",
  $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS    => "eval",
  $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS => "rawbody",
  $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS => "eval",
  $Mail::SpamAssassin::Conf::TYPE_URI_TESTS     => "uri",
  $Mail::SpamAssassin::Conf::TYPE_URI_EVALS     => "eval",
  $Mail::SpamAssassin::Conf::TYPE_META_TESTS    => "meta",
  $Mail::SpamAssassin::Conf::TYPE_RBL_EVALS     => "eval",
);

sub _get_stage_from_rule {
  my  ($self, $conf, $rule) = @_;

  my $type = $conf->{test_types}->{$rule};
  if ($type && $type == $Mail::SpamAssassin::Conf::TYPE_EMPTY_TESTS) {
    # this is a "fake" rule... see if the rule "text"/"definition" is
    # the name of the "parent" rule"
    my $parent = $conf->{tests}->{$rule};
    if ($parent) {
      $type = $conf->{test_types}->{$parent};
    }
  }
  if ($type && exists $type_to_stage{$type}) {
    return $type_to_stage{$type};
  }
  else {
    # Run before the meta rules run so that they can use these hits as
    # inputs.
    return "meta";
  }
}

1;