File: mbox2omega

package info (click to toggle)
xapian-omega 1.0.7-3+lenny2
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 2,424 kB
  • ctags: 744
  • sloc: sh: 9,112; cpp: 7,954; makefile: 245; perl: 119
file content (73 lines) | stat: -rwxr-xr-x 2,073 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/perl -w
# Copyright (C) 2004,2005,2007 Olly Betts
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
# USA

use strict;

if (grep {$_ eq '--help'} @ARGV) {
    die <<EOT;
Syntax: $0 [MBOX...]

Run this script with one or more mailbox filenames on the command line (or
pipe a mailbox in on stdin).  It produces output suitable for feeding to
scriptindex using the mbox2omega.script index script.  For example:

  $0 *.mbox | scriptindex /path/to/database mbox2omega.script

The index script tells scriptindex how to process the dump file, so you can
customise that to change how the indexing is done.

Note that this script is mainly intended as a simple example of how you might
generate scriptindex dump files from a data source, and its handling of mail
messages is quite primitive - e.g. it doesn't handle MIME or character sets.
EOT
}

my $hdr = 1;
line: while (<>) {
    if ($hdr) {
	chomp;
	while (1) {
	    if (/^$/) {
		print "body=\n";
		$hdr = 0;
		next line;
	    }
	    # Handle continuation lines
	    my $line = $_;
	    while (<>) {
		chomp;
		last unless /^[ \t]/;
		$line .= $_;
	    }
	    if ($line =~ s/^Message-ID:\s*<?(.*?)>?\s*$/$1/i) {
		print "id=$line\n" if length $line;
	    } elsif ($line =~ s/^Subject:\s*(.*?)\s*$/$1/i) {
		print "title=$line\n" if length $line;
	    }
	}
    }

    if (/^From /) {
	print "\n";
	$hdr = 1;
	next;
    }
    if ($_ !~ /^\s+$/) {
	print "=$_";
    }
}