File: mbox.ml

package info (click to toggle)
spamoracle 1.4-8
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 248 kB
  • ctags: 243
  • sloc: ml: 1,198; makefile: 138; sh: 74
file content (94 lines) | stat: -rw-r--r-- 2,764 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
(***********************************************************************)
(*                                                                     *)
(*                 SpamOracle -- a Bayesian spam filter                *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 2002 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  This file is distributed under the terms of the   *)
(*  GNU Public License version 2, http://www.gnu.org/licenses/gpl.txt  *)
(*                                                                     *)
(***********************************************************************)

(* $Id: mbox.ml,v 1.4 2002/08/26 09:35:25 xleroy Exp $ *)

(* Reading of a mailbox file and splitting into individual messages *)

type t =
  { ic: in_channel;
    zipped: bool;
    mutable start: string;
    buf: Buffer.t }

let open_mbox_file filename =
  if Filename.check_suffix filename ".gz" then
    { ic = Unix.open_process_in ("gunzip -c " ^filename);
      zipped = true;
      start = "";
      buf = Buffer.create 50000 }
  else
    { ic = open_in filename;
      zipped = false;
      start = "";
      buf = Buffer.create 50000 }

let open_mbox_channel ic =
    { ic = ic;
      zipped = false;
      start = "";
      buf = Buffer.create 50000 }

let read_msg t =
  Buffer.clear t.buf;
  Buffer.add_string t.buf t.start;
  let rec read () =
    let line = input_line t.ic in
    if String.length line >= 5
    && String.sub line 0 5 = "From "
    && Buffer.length t.buf > 0 then begin
      t.start <- (line ^ "\n");
      Buffer.contents t.buf
    end else begin
      Buffer.add_string t.buf line;
      Buffer.add_char t.buf '\n';
      read ()
    end in
  try
    read()
  with End_of_file ->
    if Buffer.length t.buf > 0 then begin
      t.start <- "";
      Buffer.contents t.buf
    end else
      raise End_of_file

let close_mbox t =
  if t.zipped
  then ignore(Unix.close_process_in t.ic)
  else close_in t.ic

let mbox_file_iter filename fn =
  let ic = open_mbox_file filename in
  try
    while true do fn(read_msg ic) done
  with End_of_file ->
    close_mbox ic

let mbox_channel_iter inchan fn =
  let ic = open_mbox_channel inchan in
  try
    while true do fn(read_msg ic) done
  with End_of_file ->
    close_mbox ic

let read_single_msg inchan =
  let res = Buffer.create 10000 in
  let buf = String.create 1024 in
  let rec read () =
    let n = input inchan buf 0 (String.length buf) in
    if n > 0 then begin
      Buffer.add_substring res buf 0 n;
      read ()
    end in
  read ();
  Buffer.contents res