File: sb_filter.py

package info (click to toggle)
spambayes 1.0.3-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 2,764 kB
  • ctags: 3,166
  • sloc: python: 29,036; ansic: 195; sh: 110; lisp: 83; makefile: 76
file content (257 lines) | stat: -rwxr-xr-x 7,649 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/env python

## A hammie front-end to make the simple stuff simple.
##
##
## The intent is to call this from procmail and its ilk like so:
##
##   :0 fw
##   | sb_filter.py
##
## Then, you can set up your MUA to pipe ham and spam to it, one at a
## time, by calling it with either the -g or -s options, respectively.
##
## Author: Neale Pickett <neale@woozle.org>
##

"""Usage: %(program)s [options] [filenames]

Options can one or more of:
    -h
        show usage and exit
    -x
        show some usage examples and exit
    -d DBFILE
        use database in DBFILE
    -p PICKLEFILE
        use pickle (instead of database) in PICKLEFILE
    -n
        create a new database
*   -f
        filter (default if no processing options are given)
*   -g
        [EXPERIMENTAL] (re)train as a good (ham) message
*   -s
        [EXPERIMENTAL] (re)train as a bad (spam) message
*   -t
        [EXPERIMENTAL] filter and train based on the result -- you must
        make sure to untrain all mistakes later.  Not recommended.
*   -G
        [EXPERIMENTAL] untrain ham (only use if you've already trained
        this message)
*   -S
        [EXPERIMENTAL] untrain spam (only use if you've already trained
        this message)

    -o section:option:value
        set [section, option] in the options database to value

All options marked with '*' operate on stdin, and write the resultant
message to stdout.

If no filenames are given on the command line, standard input will be
processed as a single message.  If one or more filenames are given on the
command line, each will be processed according to the following rules:

    * If the filename is '-', standard input will be processed as a single
      message (may only be usefully given once).

    * If the filename starts with '+' it will be processed as an MH folder.

    * If the filename is a directory and it contains a subdirectory named
      'cur', it will be processed as a Maildir.

    * If the filename is a directory and it contains a subdirectory named
      'Mail', it will be processed as an MH Mailbox.

    * If the filename is a directory and not a Maildir nor an MH Mailbox, it
      will be processed as a Mailbox directory consisting of just .txt and
      .lorien files.

    * Otherwise, the filename is treated as a Unix-style mailbox (messages
      begin on a line starting with 'From ').

Output is always to standard output as a Unix-style mailbox.
"""

import os
import sys
import getopt
from spambayes import hammie, Options, mboxutils, storage
from spambayes.Version import get_version_string

try:
    True, False
except NameError:
    # Maintain compatibility with Python 2.2
    True, False = 1, 0

# See Options.py for explanations of these properties
program = sys.argv[0]

example_doc = """_Examples_

filter a message on disk:
    %(program)s < message

(re)train a message as ham:
    %(program)s -g < message

(re)train a message as spam:
    %(program)s -s < message


procmail recipe to filter and train in one step:
    :0 fw
    | %(program)s -t


mutt configuration:  This binds the 'H' key to retrain the message as
ham, and prompt for a folder to move it to.  The 'S' key retrains as
spam, and moves to a 'spam' folder.  See contrib/muttrc in the spambayes
distribution for other neat mutt tricks.

  macro index S "|sb_filter.py -s | procmail\n"
  macro pager S "|sb_filter.py -s | procmail\n"
  macro index H "|sb_filter.py -g | procmail\n"
  macro pager H "|sb_filter.py -g | procmail\n"
  color index red black "~h 'X-Spambayes-Disposition: spam' ~F"


"""

def examples():
    print example_doc % globals()
    sys.exit(0)

def usage(code, msg=''):
    """Print usage message and sys.exit(code)."""
    # Include version info in usage
    print >> sys.stderr, get_version_string("sb_filter")
    print >> sys.stderr, "    with engine %s" % get_version_string()
    print >> sys.stderr
    
    if msg:
        print >> sys.stderr, msg
        print >> sys.stderr
    print >> sys.stderr, __doc__ % globals()
    sys.exit(code)

class HammieFilter(object):
    def __init__(self):
        options = Options.options
        # This is a bit of a hack to counter the default for
        # persistent_storage_file changing from ~/.hammiedb to hammie.db
        # This will work unless a user:
        #   * had hammie.db as their value for persistent_storage_file, and
        #   * their config file was loaded by Options.py.
        if options["Storage", "persistent_storage_file"] == \
           options.default("Storage", "persistent_storage_file"):
            options["Storage", "persistent_storage_file"] = \
                                    "~/.hammiedb"
        options.merge_files(['/etc/hammierc',
                            os.path.expanduser('~/.hammierc')])
        self.dbname, self.usedb = storage.database_type([])
        self.h = None

    def open(self, mode):
        if self.h is None or self.mode != mode:
            if self.h is not None:
                if self.mode != 'r':
                    self.h.store()
            self.mode = mode
            self.h = hammie.open(self.dbname, self.usedb, self.mode)

    def close(self):
        if self.h is not None:
            if self.mode != 'r':
                self.h.store()
        self.h = None

    __del__ = close

    def newdb(self):
        self.open('n')
        self.close()
        print >> sys.stderr, "Created new database in", self.dbname

    def filter(self, msg):
        self.open('r')
        return self.h.filter(msg)

    def filter_train(self, msg):
        self.open('c')
        return self.h.filter(msg, train=True)

    def train_ham(self, msg):
        self.open('c')
        self.h.train_ham(msg, True)
        self.h.store()

    def train_spam(self, msg):
        self.open('c')
        self.h.train_spam(msg, True)
        self.h.store()

    def untrain_ham(self, msg):
        self.open('c')
        self.h.untrain_ham(msg)
        self.h.store()

    def untrain_spam(self, msg):
        self.open('c')
        self.h.untrain_spam(msg)
        self.h.store()

def main():
    h = HammieFilter()
    actions = []
    opts, args = getopt.getopt(sys.argv[1:], 'hxd:p:nfgstGSo:',
                               ['help', 'examples', 'option='])
    create_newdb = False
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-x', '--examples'):
            examples()
        elif opt in ('-o', '--option'):
            Options.options.set_from_cmdline(arg, sys.stderr)
        elif opt == '-f':
            actions.append(h.filter)
        elif opt == '-g':
            actions.append(h.train_ham)
        elif opt == '-s':
            actions.append(h.train_spam)
        elif opt == '-t':
            actions.append(h.filter_train)
        elif opt == '-G':
            actions.append(h.untrain_ham)
        elif opt == '-S':
            actions.append(h.untrain_spam)
        elif opt == "-n":
            create_newdb = True
    h.dbname, h.usedb = storage.database_type(opts)

    if create_newdb:
        h.newdb()
        sys.exit(0)

    if actions == []:
        actions = [h.filter]

    if not args:
        args = ["-"]
    for fname in args:
        mbox = mboxutils.getmbox(fname)
        for msg in mbox:
            for action in actions:
                action(msg)
                if args == ["-"]:
                    unixfrom = msg.get_unixfrom() is not None
                else:
                    unixfrom = True
            result = mboxutils.as_string(msg, unixfrom=unixfrom)
            sys.stdout.write(result)

if __name__ == "__main__":
    main()