1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
(***********************************************************************)
(* *)
(* SpamOracle -- a Bayesian spam filter *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 2002 Institut National de Recherche en Informatique et *)
(* en Automatique. This file is distributed under the terms of the *)
(* GNU Public License version 2, http://www.gnu.org/licenses/gpl.txt *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Summarize the attachments of a message as one line that can be
put in the header of the message. Allows procmail to filter
suspicious attachments without looking at the message body. *)
open Printf
open Mail
let re_content_type =
Str.regexp "\\([/a-zA-Z0-9-]+\\)"
let re_innocuous_content_types =
Str.regexp_case_fold "text/plain\\|text/html\\|text/x-vcard\\|multipart/\\|message/rfc822\\|message/delivery-status"
let re_charset =
Str.regexp_case_fold "charset=\\(\"\\([^\"]+\\)\"\\|[^ \t;]+\\)"
let re_innocuous_charsets =
Str.regexp_case_fold "us-ascii\\|iso[-_]8859[-_]1$\\|iso[-_]8859[-_]15\\|windows-1252"
let re_name =
Str.regexp_case_fold "name=\\(\"\\([^\"]+\\)\"\\|[^ \t;]+\\)"
let match_anchored re s =
Str.string_match re s 0
let match_unanchored re s =
try ignore (Str.search_forward re s 0); true with Not_found -> false
let summarize msg =
let res = Buffer.create 200 in
let rec summ m =
let h = header "content-type:" m in
if match_anchored re_content_type h then begin
let c = Str.matched_group 1 h in
if not (Str.string_match re_innocuous_content_types c 0) then
bprintf res "type=\"%s\" " c
end;
if match_unanchored re_charset h then begin
let c =
try Str.matched_group 2 h with Not_found -> Str.matched_group 1 h in
if not (Str.string_match re_innocuous_charsets c 0) then
bprintf res "cset=\"%s\" " c
end;
if match_unanchored re_name h then begin
let c =
try Str.matched_group 2 h with Not_found -> Str.matched_group 1 h in
bprintf res "name=\"%s\" " c
end;
let h = header "content-disposition:" m in
if match_unanchored re_name h then begin
let c =
try Str.matched_group 2 h with Not_found -> Str.matched_group 1 h in
bprintf res "name=\"%s\" " c
end;
List.iter summ m.parts in
List.iter summ msg.parts;
Buffer.contents res
|