File: header_utf8.t

package info (click to toggle)
spamassassin 4.0.2-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,724 kB
  • sloc: perl: 89,143; ansic: 5,193; sh: 3,737; javascript: 339; sql: 295; makefile: 209; python: 49
file content (232 lines) | stat: -rwxr-xr-x 7,753 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/usr/bin/perl -T

###
### UTF-8 CONTENT, edit with UTF-8 locale/editor
###

use lib '.'; use lib 't';
use SATest; sa_t_init("header_utf8.t");

use constant HAS_EMAIL_ADDRESS_XS => eval { require Email::Address::XS; };
use constant HAS_LIBIDN => eval { require Net::LibIDN; };
use constant HAS_LIBIDN2 => eval { require Net::LibIDN2; };

if (!HAS_EMAIL_ADDRESS_XS) {
  warn "Email::Address::XS is not installed, tests will be lacking\n";
}
if (!HAS_LIBIDN && !HAS_LIBIDN2) {
  warn "Net::LibIDN or Net::LibIDN2 is not installed, tests will be lacking\n";
}

use Test::More;
plan skip_all => "Test requires Perl 5.8" unless $] > 5.008; # TODO: SA already doesn't support anything below 5.8.1

my $tests = 156;
$tests = 305 if (HAS_EMAIL_ADDRESS_XS || (!HAS_EMAIL_ADDRESS_XS && HAS_LIBIDN && HAS_LIBIDN2));
plan tests => $tests;

# ---------------------------------------------------------------------------

%mypatterns = (
  ' 1.0 LT_RPATH '     => '',
  ' 1.0 LT_ENVFROM '   => '',
  ' 1.0 LT_FROM '      => '',
  ' 1.0 LT_FROM_ADDR ' => '',
  ' 1.0 LT_FROM_NAME ' => '',
  ' 1.0 LT_FROM_RAW '  => '',
  ' 1.0 LT_TO_ADDR '   => '',
  ' 1.0 LT_TO_NAME '   => '',
  ' 1.0 LT_CC_ADDR '   => '',
  ' 1.0 LT_SUBJ '      => '',
  ' 1.0 LT_SUBJ_RAW '  => '',
  ' 1.0 LT_MESSAGEID ' => '',
  ' 1.0 LT_MSGID '     => '',
  ' 1.0 LT_CT '        => '',
  ' 1.0 LT_CT_RAW '    => '',
  ' 1.0 LT_AUTH_DOM '  => '',
  ' 1.0 LT_NOTE '      => '',
  ' 1.0 LT_UTF8SMTP_ANY '    => '',
  ' 1.0 LT_SPLIT_UTF8_SUBJ ' => '',
  ' 100 USER_IN_BLOCKLIST '  => '',
);

%mypatterns_utf8 = (  # as it appears in a report body
  ' 1.0 LT_ANY_CHARS En-tête contient caractères' => 'LT_ANY_CHARS utf8',
);

%mypatterns_mime_qp = (  # as it appears in a mail header section
  ' 1.0 LT_ANY_CHARS =?UTF-8?Q?En-t=C3=AAte_contient_caract=C3=A8res?=' => 'LT_ANY_CHARS mime encoded',
);

%mypatterns_mime_b64 = (  # as it appears in a mail header section
  ' 1.0 LT_ANY_CHARS =?UTF-8?B?5a2X56ym6KKr5YyF5ZCr5Zyo5raI5oGv5oql5aS06YOo5YiG?=' => 'LT_ANY_CHARS mime encoded',
);

%mypatterns_mime_b64_bug7307 = (
  ' 1.0 LT_SUBJ2 '      => '',
  ' 1.0 LT_SUBJ2_RAW '  => '',
);

%anti_patterns = (
  ' 1.0 NO_RELAYS '  => 'NO_RELAYS',
# ' 1.0 INVALID_MSGID '  => 'INVALID_MSGID',
);

my $myrules = <<'END';
  header USER_IN_BLOCKLIST  eval:check_from_in_blocklist()
  tflags USER_IN_BLOCKLIST  userconf nice noautolearn
  score USER_IN_BLOCKLIST 100
  add_header all  AuthorDomain _AUTHORDOMAIN_
  blocklist_from  Marilù.Gioffré@esempio-università.it
  header LT_UTF8SMTP_ANY  Received =~ /\bwith\s*UTF8SMTPS?A?\b/mi
  header LT_RPATH   Return-Path:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
  header LT_ENVFROM EnvelopeFrom =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
  header LT_FROM      From =~ /^Marilù Gioffré ♥ <Marilù\.Gioffré\@esempio-università\.it>$/m
  header LT_FROM_ADDR From:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/
  header LT_FROM_NAME From:name =~ /^Marilù Gioffré ♥\z/
  header LT_FROM_RAW  From:raw  =~ /^\s*=\?ISO-8859-1\?Q\?Maril=F9\?= Gioffré ♥ <Marilù\.Gioffré\@esempio-università\.it>$/m
  header LT_AUTH_DOM  X-AuthorDomain =~ /^xn--esempio-universit-4ob\.it\z/
  header LT_TO_ADDR   To:addr =~ /^Dörte\@Sörensen\.example\.com\z/
  header LT_TO_NAME   To:name =~ /^Dörte Å\. Sörensen, Jr\./
  header LT_CC_ADDR   Cc:addr =~ /^θσερ\@εχαμπλε\.ψομ\z/
  header LT_SUBJ      Subject =~ /^Domače omrežje$/m
  header LT_SUBJ_RAW  Subject:raw  =~ /^\s*=\?iso-8859-2\*sl\?Q\?Doma=e8e\?=\s+=\?utf-8\*sl\?Q\?_omre=C5\?=/m
  header LT_SUBJ2     Subject =~ /^【重要訊息】台電105年3月電費,委託金融機構扣繳成功電子繳費憑證\(電號07487616730\)$/m
  header LT_SUBJ2_RAW Subject:raw  =~ /^\s*=\?UTF-8\?B\?44CQ6YeN6KaB6KiK5oGv44CR5Y\+w6Zu7MTA15bm0\?=\s*=\?UTF-8\?B\?M\+aciOmbu\+iyu\+\+8jOWnlOiol\+mHkeiejeapn\+ani\+aJow==\?=\s*=\?UTF-8\?B\?57mz5oiQ5Yqf6Zu75a2Q57mz6LK75oaR6K2JKOmbu\+iZnw==\?=\s*=\?UTF-8\?B\?MDc0ODc2MTY3MzAp\?=$/m
  header LT_MSGID     Message-ID =~ /^<b497e6c2\@example\.срб>$/m
  header LT_MESSAGEID MESSAGEID  =~ /^<b497e6c2\@example\.срб>$/m
  header LT_CT        Content-Type =~ /документы для отдела кадров\.pdf/
  header LT_CT_RAW    Content-Type:raw =~ /=\?utf-8\?B\?tdC70LAg0LrQsNC00YDQvtCyLnBkZg==\?="/
  header LT_SPLIT_UTF8_SUBJ Subject:raw =~ m{(=\?UTF-8) (?: \* [^?=<>, \t]* )? (\?Q\?) [^ ?]* =[89A-F][0-9A-F] \?= \s* \1 (?: \* [^ ?=]* )? \2 =[89AB][0-9A-F]}xsmi
  header LT_NOTE      X-Note =~ /^The above.*char =C5 =BE is invalid, .*wild$/m
  header LT_ANY_CHARS From =~ /./
  describe         LT_ANY_CHARS  Header contains characters
  lang fr describe LT_ANY_CHARS  En-tête contient caractères
  # sorry, Google translate:
  lang zh describe LT_ANY_CHARS  字符被包含在消息报头部分
END

if (!HAS_LIBIDN && !HAS_LIBIDN2) {
  # temporary fudge to prevent a test failing
  # until the Net::LibIDN becomes a mandatory module
  $myrules =~ s{^(\s*header LT_AUTH_DOM\s+X-AuthorDomain =~)\s*(/.*/)$}
               {$1 /esempio-università\.it/}m
}



## Test 1 with internal parser, any libidn
$ENV{'SA_HEADER_ADDRESS_PARSER'} = 1;
if (HAS_LIBIDN) {
  $ENV{'SA_LIBIDN'} = 1;
} elsif (HAS_LIBIDN2) {
  $ENV{'SA_LIBIDN'} = 2;
  $libidn2_done++;
}
run_tests();
## Test 2 with Email::Address::XS
if (HAS_EMAIL_ADDRESS_XS) {
  $ENV{'SA_HEADER_ADDRESS_PARSER'} = 2;
  if (HAS_LIBIDN2 && !defined $libidn2_done) {
    $ENV{'SA_LIBIDN'} = 2;
    $libidn2_done++;
  }
  run_tests();
} else {
  ## .. or Test 2 with internal parser, libidn2
  if (HAS_LIBIDN2 && !defined $libidn2_done) {
    $ENV{'SA_LIBIDN'} = 2;
    run_tests();
  }
}


sub run_tests {

$ENV{PERL_BADLANG} = 0;  # suppresses Perl warning about failed locale setting
# see Mail::SpamAssassin::Conf::Parser::parse(), also Bug 6992
$ENV{LANGUAGE} = $ENV{LANG} = 'fr_CH.UTF-8';

#--- normalize_charset 1

tstprefs ($myrules . '
  report_safe 0
  normalize_charset 1
');

%patterns = (%mypatterns, %mypatterns_mime_qp);
sarun ("-L < data/nice/unicode1", \&patterns_run_cb);
ok_all_patterns();

tstprefs ($myrules . '
  report_safe 1
  normalize_charset 1
');
%patterns = (%mypatterns, %mypatterns_utf8);
sarun ("-L < data/nice/unicode1", \&patterns_run_cb);
ok_all_patterns();

tstprefs ($myrules . '
  report_safe 2
  normalize_charset 1
');
%patterns = (%mypatterns, %mypatterns_utf8);
sarun ("-L < data/nice/unicode1", \&patterns_run_cb);
ok_all_patterns();

#--- normalize_charset 0

tstprefs ($myrules . '
  report_safe 0
  normalize_charset 0
');
%patterns = (%mypatterns, %mypatterns_mime_qp);
sarun ("-L < data/nice/unicode1", \&patterns_run_cb);
ok_all_patterns();

tstprefs ($myrules . '
  report_safe 1
  normalize_charset 0
');
%patterns = (%mypatterns, %mypatterns_utf8);
sarun ("-L < data/nice/unicode1", \&patterns_run_cb);
ok_all_patterns();

tstprefs ($myrules . '
  report_safe 2
  normalize_charset 0
');
%patterns = (%mypatterns, %mypatterns_utf8);
sarun ("-L < data/nice/unicode1", \&patterns_run_cb);
ok_all_patterns();

#--- base64 encoded-words

$ENV{PERL_BADLANG} = 0;  # suppresses Perl warning about failed locale setting
# see Mail::SpamAssassin::Conf::Parser::parse(), also Bug 6992
$ENV{LANGUAGE} = $ENV{LANG} = 'zh_CN.UTF-8';

tstprefs ($myrules . '
  report_safe 0
  normalize_charset 1
');
%patterns = (%mypatterns, %mypatterns_mime_b64);
sarun ("-L < data/nice/unicode1", \&patterns_run_cb);
ok_all_patterns();

#--- base64 encoded-words - Bug 7307

$ENV{LANGUAGE} = $ENV{LANG} = 'en_US.UTF-8';

tstprefs ($myrules . '
  report_safe 0
  normalize_charset 1
');
%patterns = (%mypatterns_mime_b64_bug7307);
%anti_patterns = ();
sarun ("-L < data/nice/unicode2", \&patterns_run_cb);
ok_all_patterns();


} ## run_tests