File: rule_names.t

package info (click to toggle)
spamassassin 3.1.7-2
  • links: PTS
  • area: main
  • in suites: etch-m68k
  • size: 5,376 kB
  • ctags: 2,123
  • sloc: perl: 39,706; ansic: 3,133; sh: 1,344; sql: 170; makefile: 168
file content (124 lines) | stat: -rwxr-xr-x 3,270 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/perl -w

BEGIN {
  if (-e 't/test_dir') { # if we are running "t/rule_names.t", kluge around ...
    chdir 't';
  }

  if (-e 'test_dir') {            # running from test directory, not ..
    unshift(@INC, '../blib/lib');
  }
}

my $prefix = '.';
if (-e 'test_dir') {            # running from test directory, not ..
  $prefix = '..';
}

use strict;
use SATest; sa_t_init("rule_names");
use Test;
use Mail::SpamAssassin;
use Digest::SHA1;
use vars qw(%patterns %anti_patterns);

# initialize SpamAssassin
my $sa = create_saobj({'dont_copy_prefs' => 1});

$sa->init(0); # parse rules

# get rule names
my @tests;
while (my ($test, $type) = each %{ $sa->{conf}->{test_types} }) {
  push @tests, $test;
}

# run tests
my $mail = 'log/rule_names.eml';
write_mail();
%patterns = ();
my $i = 1;
for my $test (@tests) {
  # look for test with spaces on either side, should match report
  # lines in spam report, only exempt rules that are really unavoidable
  # and are clearly not hitting due to rules being named poorly
  next if $test =~ /^UPPERCASE_\d/;
  next if $test eq "UNIQUE_WORDS";
  # exempt the auto-generated nightly mass-check rules
  next if $test =~ /^T_MC_/;

  $anti_patterns{"$test,"} = "P_" . $i++;
}

# settings
plan tests => (scalar(keys %anti_patterns) + scalar(keys %patterns)),
onfail => sub {
    warn "\n\n   Note: rule_name failures may be only cosmetic" .
    "\n        but must be fixed before release\n\n";
};

tstprefs ("
	# set super low threshold, so always marked as spam
	required_score -10000.0
	# add two fake lexically high tests so every other hit will always be
	# followed by a comma in the X-Spam-Status header
	body ZZZZZZZZ /./
	body zzzzzzzz /./
");
sarun ("-L < $mail", \&patterns_run_cb);
ok_all_patterns();

# function to write test email with varied (not random) ordering tests in body
sub write_mail {
  if (open(MAIL, ">$mail")) {
    print MAIL <<'EOF';
Received: from internal.example.com [127.0.0.1] by localhost
    for recipient@example.com; Fri, 07 Oct 2002 09:02:00 +0000
Received: from external.example.org [150.51.53.1] by internal.example.com
    for recipient@example.com; Fri, 07 Oct 2002 09:01:00 +0000
Message-ID: <clean.1010101@example.com>
Date: Mon, 07 Oct 2002 09:00:00 +0000
From: Sender <sender@example.com>
MIME-Version: 1.0
To: Recipient <recipient@example.com>
Subject: this trivial message should have no hits
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

EOF

    # we are looking for random failures, but we do a deterministic
    # test to prevent too much frustration with "make test".

    # start off sorted
    @tests = sort @tests;

    print MAIL join("\n", @tests) . "\n\n";

    # 25 iterations gets most hits most of the time, but 10 is large enough
    for (1..10) {
      print MAIL join("\n", sha1_shuffle($_, @tests)) . "\n\n";
    }
    close(MAIL);
  }
  else {
    die "can't open output file: $!";
  }
}

# Fisher-Yates shuffle
sub fy_shuffle {
  for (my $i = $#_; $i > 0; $i--) {
    @_[$_,$i] = @_[$i,$_] for rand $i+1;
  }
  return @_;
}

# SHA1 shuffle
sub sha1_shuffle {
  my $i = shift;
  return map { $_->[0] }
         sort { $a->[1] cmp $b->[1] }
         map { [$_, Digest::SHA1::sha1($_ . $i)] }
         @_;
}