File: sb_imapfilter.py

package info (click to toggle)
spambayes 1.1a6-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 4,712 kB
  • sloc: python: 48,776; ansic: 535; sh: 87; lisp: 83; makefile: 46
file content (1324 lines) | stat: -rw-r--r-- 57,527 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
#!/usr/bin/env python

"""An IMAP filter.  An IMAP message box is scanned and all non-scored
messages are scored and (where necessary) filtered.

Usage:
    sb_imapfilter [options]

        note: option values with spaces in them must be enclosed
              in double quotes

        options:
            -p  dbname  : pickled training database filename
            -d  dbname  : dbm training database filename
            -t          : train contents of spam folder and ham folder
            -c          : classify inbox
            -h          : display this message
            -v          : verbose mode
            -P          : security option to prompt for imap password,
                          rather than look in options["imap", "password"]
            -e y/n      : expunge/purge messages on exit (y) or not (n)
            -i debuglvl : a somewhat mysterious imaplib debugging level
                          (4 is a good level, and suitable for bug reports)
            -l minutes  : period of time between filtering operations
            -b          : Launch a web browser showing the user interface.
            -o section:option:value :
                          set [section, option] in the options database
                          to value

Examples:

    Classify inbox, with dbm database
        sb_imapfilter -c -d bayes.db

    Train Spam and Ham, then classify inbox, with dbm database
        sb_imapfilter -t -c -d bayes.db

    Train Spam and Ham only, with pickled database
        sb_imapfilter -t -p bayes.db

Warnings:
    o We never delete mail, unless you use the -e/purge option, but we do
      mark a lot as deleted, and your mail client might remove that for
      you.  We try to only mark as deleted once the moved/altered message
      is correctly saved, but things might go wrong.  We *strongly*
      recommend that you try this script out on mail that you can recover
      from somewhere else, at least at first.
"""

from __future__ import generators

todo = """
    o IMAP supports authentication via other methods than the plain-text
      password method that we are using at the moment.  Neither of the
      servers I have access to offer any alternative method, however.  If
      someone's does, then it would be nice to offer this.
      Thanks to #1169939 we now support CRAM_MD5 if available.  It'd still
      be good to support others, though.
    o Usernames should be able to be literals as well as quoted strings.
      This might help if the username/password has special characters like
      accented characters.
    o Suggestions?
"""

# This module is part of the SpamBayes project, which is Copyright 2002-2007
# The Python Software Foundation and is covered by the Python Software
# Foundation license.

__author__ = "Tony Meyer <ta-meyer@ihug.co.nz>, Tim Stone"
__credits__ = "All the SpamBayes folk. The original filter design owed " \
              "much to isbg by Roger Binns (http://www.rogerbinns.com/isbg)."

# If we are running as a frozen application, then chances are that
# output is just lost.  We'd rather log this, like sb_server and Oulook
# log, so that the user can pull up the output if possible.  We could just
# rely on the user piping the output appropriately, but would rather have
# more control.  The sb_server tray application only does this if not
# running in a console window, but we do it whenever we are frozen.
import os
import sys
if hasattr(sys, "frozen"):
    # We want to move to logging module later, so for now, we
    # hack together a simple logging strategy.
    try:
        import win32api
    except ImportError:
        if sys.platform == "win32":
            # Fall back to CWD, but warn user.
            status = "Warning: your log is stored in the current " \
                     "working directory.  We recommend installing " \
                     "the pywin32 extensions, so that the log is " \
                     "stored in the Windows temp directory."
            temp_dir = os.getcwd()
        else:
            # Try for a /tmp directory.
            if os.path.isdir("/tmp"):
                temp_dir = "/tmp"
                status = "Log file opened in /tmp"
            else:
                status = "Warning: your log is stored in the current " \
                         "working directory.  If this does not suit you " \
                         "please let the spambayes@python.org crowd know " \
                         "so that an alternative can be arranged."
    else:
        temp_dir = win32api.GetTempPath()
        status = "Log file opened in " + temp_dir
    for i in range(3, 0, -1):
        try:
            os.unlink(os.path.join(temp_dir, "SpamBayesIMAP%d.log" % (i+1)))
        except os.error:
            pass
        try:
            os.rename(
                os.path.join(temp_dir, "SpamBayesIMAP%d.log" % i),
                os.path.join(temp_dir, "SpamBayesIMAP%d.log" % (i+1))
                )
        except os.error:
            pass
    # Open this log, as unbuffered, so crashes still get written.
    sys.stdout = open(os.path.join(temp_dir,"SpamBayesIMAP1.log"), "wt", 0)
    sys.stderr = sys.stdout

import socket
import re
import time
import getopt
import types
import thread
import email
import email.Parser
from getpass import getpass
from email.Utils import parsedate

from spambayes import Stats
from spambayes import message
from spambayes.Options import options, optionsPathname
from spambayes import storage, Dibbler
from spambayes.UserInterface import UserInterfaceServer
from spambayes.ImapUI import IMAPUserInterface, LoginFailure

from spambayes.Version import get_current_version

from imaplib import IMAP4
from imaplib import Time2Internaldate
try:
    if options["imap", "use_ssl"]:
        from imaplib import IMAP4_SSL as BaseIMAP
    else:
        from imaplib import IMAP4 as BaseIMAP
except ImportError:
    from imaplib import IMAP4 as BaseIMAP


class BadIMAPResponseError(Exception):
    """An IMAP command returned a non-"OK" response."""
    def __init__(self, command, response):
        self.command = command
        self.response = response
    def __str__(self):
        return "The command '%s' failed to give an OK response.\n%s" % \
               (self.command, self.response)


class IMAPSession(BaseIMAP):
    '''A class extending the IMAP4 class, with a few optimizations'''

    timeout = 60 # seconds
    def __init__(self, server, debug=0, do_expunge = options["imap", "expunge"] ):
        if ":" in server:
            server, port = server.split(':', 1)
            port = int(port)
        else:
            if options["imap", "use_ssl"]:
                port = 993
            else:
                port = 143

        # There's a tricky situation where if use_ssl is False, but we
        # try to connect to a IMAP over SSL server, we will just hang
        # forever, waiting for a response that will never come.  To
        # get past this, just for the welcome message, we install a
        # timeout on the connection.  Normal service is then returned.
        # This only applies when we are not using SSL.
        if not hasattr(self, "ssl"):
            readline = self.readline
            self.readline = self.readline_timeout
        try:
            BaseIMAP.__init__(self, server, port)
        except (BaseIMAP.error, socket.gaierror, socket.error):
            if options["globals", "verbose"]:
                print >> sys.stderr, "Cannot connect to server", server, "on port", port
                if not hasattr(self, "ssl"):
                    print >> sys.stderr, ("If you are connecting to an SSL server,"
                                          "please ensure that you\n"
                                          "have the 'Use SSL' option enabled.")
            self.connected = False
        else:
            self.connected = True
        if not hasattr(self, "ssl"):
            self.readline = readline
        self.debug = debug
        self.do_expunge = do_expunge
        self.server = server
        self.port = port
        self.logged_in = False

        # For efficiency, we remember which folder we are currently
        # in, and only send a select command to the IMAP server if
        # we want to *change* folders.  This functionality is used by
        # both IMAPMessage and IMAPFolder.
        self.current_folder = None

        # We override the base read so that we only read a certain amount
        # of data at a time.  OS X and Python has problems with getting 
        # large amounts of memory at a time, so maybe this will be a way we
        # can work around that (I don't know, and don't have a mac to test,
        # but we need to try something).
        self._read = self.read
        self.read = self.safe_read

    def readline_timeout(self):
        """Read line from remote, possibly timing out."""
        st_time = time.time()
        self.sock.setblocking(False)
        buffer = []
        while True:
            if (time.time() - st_time) > self.timeout:
                if options["globals", "verbose"]:
                    print >> sys.stderr, "IMAP Timing out"
                break
            try:
                data = self.sock.recv(1)
            except socket.error, e:
                if e[0] == 10035:
                    # Nothing to receive, keep going.
                    continue
                raise
            if not data:
                break
            if data == '\n':
                break
            buffer.append(data)
        self.sock.setblocking(True)
        return "".join(buffer)

    def login(self, username, pwd):
        """Log in to the IMAP server, catching invalid username/password."""
        assert self.connected, "Must be connected before logging in."
        if 'AUTH=CRAM-MD5' in self.capabilities:
            login_func = self.login_cram_md5
            args = (username, pwd)
            description = "MD5"
        else:
            login_func = BaseIMAP.login # superclass login
            args = (self, username, pwd)
            description = "plain-text"
        try:
            login_func(*args)
        except BaseIMAP.error, e:
            msg = "The username (%s) and/or password (sent in %s) may " \
                  "be incorrect." % (username, description)
            raise LoginFailure(msg)
        self.logged_in = True

    def logout(self):
        """Log off from the IMAP server, possibly expunging.

        Note that most, if not all, of the expunging is probably done in
        SelectFolder, rather than here, for purposes of speed."""
        # We may never have logged in, in which case we do nothing.
        if self.connected and self.logged_in and self.do_expunge:
            # Expunge messages from the ham, spam and unsure folders.
            for fol in ["spam_folder",
                        "unsure_folder",
                        "ham_folder"]:
                folder_name = options["imap", fol]
                if folder_name:
                    self.select(folder_name)
                    self.expunge()
            # Expunge messages from the ham and spam training folders.
            for fol_list in ["ham_train_folders",
                             "spam_train_folders",]:
                for fol in options["imap", fol_list]:
                    self.select(fol)
                    self.expunge()
        BaseIMAP.logout(self)  # superclass logout

    def check_response(self, command, IMAP_response):
        """A utility function to check the response from IMAP commands.

        Raises BadIMAPResponseError if the response is not OK.  Returns
        the data segment of the response otherwise."""
        response, data = IMAP_response
        if response != "OK":
            raise BadIMAPResponseError(command, IMAP_response)
        return data

    def SelectFolder(self, folder):
        """A method to point ensuing IMAP operations at a target folder.

        This is essentially a wrapper around the IMAP select command, which
        ignores the command if the folder is already selected."""
        if self.current_folder != folder:
            if self.current_folder != None and self.do_expunge:
                # It is faster to do close() than a single
                # expunge when we log out (because expunge returns
                # a list of all the deleted messages which we don't do
                # anything with).
                self.close()
                self.current_folder = None

            if folder == "":
                # This is Python bug #845560 - if the empty string is
                # passed, we get a traceback, not just an 'invalid folder'
                # error, so raise our own error.
                raise BadIMAPResponseError("select",
                                           "Cannot have empty string as "
                                           "folder name in select")

            # We *always* use SELECT and not EXAMINE, because this
            # speeds things up considerably.
            response = self.select(folder, None)
            data = self.check_response("select %s" % (folder,), response)
            self.current_folder = folder
            return data

    number_re = re.compile(r"{\d+}")
    folder_re = re.compile(r"\(([\w\\ ]*)\) ")
    def folder_list(self):
        """Return a alphabetical list of all folders available on the
        server."""
        response = self.list()
        try:
            all_folders = self.check_response("list", response)
        except BadIMAPResponseError:
            # We want to keep going, so just print out a warning, and
            # return an empty list.
            if options["globals", "verbose"]:
                print >> sys.stderr, "Could not retrieve folder list."
            return []
        folders = []
        for fol in all_folders:
            # Sigh.  Some servers may give us back the folder name as a
            # literal, so we need to crunch this out.
            if isinstance(fol, types.TupleType):
                m = self.number_re.search(fol[0])
                if not m:
                    # Something is wrong here!  Skip this folder.
                    continue
                fol = '%s"%s"' % (fol[0][:m.start()], fol[1])
            m = self.folder_re.search(fol)
            if not m:
                # Something is not good with this folder, so skip it.
                continue
            name_attributes = fol[:m.end()-1]

            # IMAP is a truly odd protocol.  The delimiter is
            # only the delimiter for this particular folder - each
            # folder *may* have a different delimiter
            self.folder_delimiter = fol[m.end()+1:m.end()+2]

            # A bit of a hack, but we really need to know if this is
            # the case.
            if self.folder_delimiter == ',':
                print >> sys.stderr, ("WARNING: Your imap server uses a comma as the "
                                      "folder delimiter.  This may cause unpredictable " \
                                      "errors.")
            folders.append(fol[m.end()+4:].strip('"'))
        folders.sort()
        return folders

    # A flag can have any character in the ascii range 32-126 except for
    # (){ %*"\
    FLAG_CHARS = ""
    for i in range(32, 127):
        if not chr(i) in ['(', ')', '{', ' ', '%', '*', '"', '\\']:
            FLAG_CHARS += chr(i)
    FLAG = r"\\?[" + re.escape(FLAG_CHARS) + r"]+"
    # The empty flag set "()" doesn't match, so that extract_fetch_data()
    # returns data["FLAGS"] == None
    FLAGS_RE = re.compile(r"(FLAGS) (\((" + FLAG + r" )*(" + FLAG + r")\))")
    INTERNALDATE_RE = re.compile(r"(INTERNALDATE) (\"\d{1,2}\-[A-Za-z]{3,3}\-" +
                                 r"\d{2,4} \d{2,2}\:\d{2,2}\:\d{2,2} " +
                                 r"[\+\-]\d{4,4}\")")
    RFC822_RE = re.compile(r"(RFC822) (\{[\d]+\})")
    BODY_PEEK_RE = re.compile(r"(BODY\[\]) (\{[\d]+\})")
    RFC822_HEADER_RE = re.compile(r"(RFC822.HEADER) (\{[\d]+\})")
    UID_RE = re.compile(r"(UID) ([\d]+)")
    UID_RE2 = re.compile(r" *(UID) ([\d]+)\)")
    FETCH_RESPONSE_RE = re.compile(r"([0-9]+) \(([" + \
                                   re.escape(FLAG_CHARS) + r"\"\{\}\(\)\\ ]*)\)?")
    LITERAL_RE = re.compile(r"^\{[\d]+\}$")
    def _extract_fetch_data(self, response):
        """This does the real work of extracting the data, for each message
        number.
        """
        # We support the following FETCH items:
        #  FLAGS
        #  INTERNALDATE
        #  RFC822
        #  UID
        #  RFC822.HEADER
        #  BODY.PEEK
        # All others are ignored.

        if isinstance(response, types.StringTypes):
            response = (response,)

        data = {}
        expected_literal = None
        if self.UID_RE2.match(response[-1]):
            response = response[:-1]
            
        for part in response:
            # We ignore parentheses by themselves, for convenience.
            if part == ')':
                continue
            if expected_literal:
                # This should be a literal of a certain size.
                key, expected_size = expected_literal
##                if len(part) != expected_size:
##                    raise BadIMAPResponseError(\
##                        "FETCH response (wrong size literal %d != %d)" % \
##                        (len(part), expected_size), response)
                data[key] = part
                expected_literal = None
                continue
            # The first item will always be the message number.
            mo = self.FETCH_RESPONSE_RE.match(part)
            if mo:
                data["message_number"] = mo.group(1)
                rest = mo.group(2)
            else:
                raise BadIMAPResponseError("FETCH response", response)
            
            for r in [self.FLAGS_RE, self.INTERNALDATE_RE, self.RFC822_RE,
                      self.UID_RE, self.RFC822_HEADER_RE, self.BODY_PEEK_RE]:
                mo = r.search(rest)
                if mo is not None:
                    if self.LITERAL_RE.match(mo.group(2)):
                        # The next element will be a literal.
                        expected_literal = (mo.group(1),
                                            int(mo.group(2)[1:-1]))
                    else:
                        data[mo.group(1)] = mo.group(2)
        return data

    def extract_fetch_data(self, response):
        """Extract data from the response given to an IMAP FETCH command.

        The data is put into a dictionary, which is returned, where the
        keys are the fetch items.
        """
        # There may be more than one message number in the response, so
        # handle separately.
        if isinstance(response, types.StringTypes):
            response = (response,)

        data = {}
        for msg in response:
            msg_data = self._extract_fetch_data(msg)
            if msg_data:
                # Maybe there are two about the same message number!
                num = msg_data["message_number"]
                if num in data:
                    data[num].update(msg_data)
                else:
                    data[num] = msg_data
        return data

    # Maximum amount of data that will be read at any one time.
    MAXIMUM_SAFE_READ = 4096
    def safe_read(self, size):
        """Read data from remote, but in manageable sizes."""
        data = []
        while size > 0:
            if size < self.MAXIMUM_SAFE_READ:
                to_collect = size
            else:
                to_collect = self.MAXIMUM_SAFE_READ
            data.append(self._read(to_collect))
            size -= self.MAXIMUM_SAFE_READ
        return "".join(data)


class IMAPMessage(message.SBHeaderMessage):
    def __init__(self):
        message.SBHeaderMessage.__init__(self)
        self.folder = None
        self.previous_folder = None
        self.rfc822_command = "(BODY.PEEK[])"
        self.rfc822_key = "BODY[]"
        self.got_substance = False
        self.invalid = False
        self.could_not_retrieve = False
        self.imap_server = None

    def extractTime(self):
        """When we create a new copy of a message, we need to specify
        a timestamp for the message, if we can't get the information
        from the IMAP server itself.  If the message has a valid date
        header we use that.  Otherwise, we use the current time."""
        message_date = self["Date"]
        if message_date is not None:
            parsed_date = parsedate(message_date)
            if parsed_date is not None:
                try:
                    return Time2Internaldate(time.mktime(parsed_date))
                except ValueError:
                    # Invalid dates can cause mktime() to raise a
                    # ValueError, for example:
                    #   >>> time.mktime(parsedate("Mon, 06 May 0102 10:51:16 -0100"))
                    #   Traceback (most recent call last):
                    #     File "<interactive input>", line 1, in ?
                    #   ValueError: year out of range
                    # (Why this person is getting mail from almost two
                    # thousand years ago is another question <wink>).
                    # In any case, we just pass and use the current date.
                    pass
                except OverflowError:
                    pass
        return Time2Internaldate(time.time())

    def get_full_message(self):
        """Retrieve the RFC822 message from the IMAP server and return a
        new IMAPMessage object that has the same details as this message,
        but also has the substance."""
        if self.got_substance:
            return self

        assert self.id, "Cannot get substance of message without an id"
        assert self.uid, "Cannot get substance of message without an UID"
        assert self.imap_server, "Cannot do anything without IMAP connection"

        # First, try to select the folder that the message is in.
        try:
            self.imap_server.SelectFolder(self.folder.name)
        except BadIMAPResponseError:
            # Can't select the folder, so getting the substance will not
            # work.
            self.could_not_retrieve = True
            print >> sys.stderr, "Could not select folder %s for message " \
                  "%s (uid %s)" % (self.folder.name, self.id, self.uid)
            return self

        # Now try to fetch the substance of the message.
        try:
            response = self.imap_server.uid("FETCH", self.uid,
                                            self.rfc822_command)
        except MemoryError:
            # Really big messages can trigger a MemoryError here.
            # The problem seems to be line 311 (Python 2.3) of socket.py,
            # which has "return "".join(buffers)".  This has also caused
            # problems with Mac OS X 10.3, which apparently is very stingy
            # with memory (the malloc calls fail!).  The problem then is
            # line 301 of socket.py which does
            # "data = self._sock.recv(recv_size)".
            # We want to handle this gracefully, although we can't really
            # do what we do later, and rewrite the message, since we can't
            # load it in the first place.  Maybe an elegant solution would
            # be to get the message in parts, or just use the first X
            # characters for classification.  For now, we just carry on,
            # warning the user and ignoring the message.
            self.could_not_retrieve = True
            print >> sys.stderr, "MemoryError with message %s (uid %s)" % \
                  (self.id, self.uid)
            return self

        command = "uid fetch %s" % (self.uid,)
        response_data = self.imap_server.check_response(command, response)
        data = self.imap_server.extract_fetch_data(response_data)
        # The data will be a dictionary - hopefully with only one element,
        # but maybe more than one.  The key is the message number, which we
        # do not have (we use the UID instead).  So we look through the
        # message and use the first data of the right type we find.
        rfc822_data = None
        for msg_data in data.itervalues():
            if self.rfc822_key in msg_data:
                rfc822_data = msg_data[self.rfc822_key]
                break
        if rfc822_data is None:
            raise BadIMAPResponseError("FETCH response", response_data)

        try:
            new_msg = email.message_from_string(rfc822_data, IMAPMessage)
        # We use a general 'except' because the email package doesn't
        # always return email.Errors (it can return a TypeError, for
        # example) if the email is invalid.  In any case, we want
        # to keep going, and not crash, because we might leave the
        # user's mailbox in a bad state if we do.  Better to soldier on.
        except:
            # Yikes!  Barry set this to return at this point, which
            # would work ok for training (IIRC, that's all he's
            # using it for), but for filtering, what happens is that
            # the message ends up blank, but ok, so the original is
            # flagged to be deleted, and a new (almost certainly
            # unsure) message, *with only the spambayes headers* is
            # created.  The nice solution is still to do what sb_server
            # does and have a X-Spambayes-Exception header with the
            # exception data and then the original message.
            self.invalid = True
            text, details = message.insert_exception_header(
                rfc822_data, self.id)
            self.invalid_content = text
            self.got_substance = True

            # Print the exception and a traceback.
            print >> sys.stderr, details

            return self            

        new_msg.folder = self.folder
        new_msg.previous_folder = self.previous_folder
        new_msg.rfc822_command = self.rfc822_command
        new_msg.rfc822_key = self.rfc822_key
        new_msg.imap_server = self.imap_server
        new_msg.uid = self.uid
        new_msg.setId(self.id)
        new_msg.got_substance = True

        if not new_msg.has_key(options["Headers", "mailid_header_name"]):
            new_msg[options["Headers", "mailid_header_name"]] = self.id

        if options["globals", "verbose"]:
            sys.stdout.write(chr(8) + "*")
        return new_msg

    def MoveTo(self, dest):
        '''Note that message should move to another folder.  No move is
        carried out until Save() is called, for efficiency.'''
        if self.previous_folder is None:
            self.previous_folder = self.folder
        self.folder = dest

    def as_string(self, unixfrom=False):
        # Basically the same as the parent class's except that we handle
        # the case where the data was unparsable, so we haven't done any
        # filtering, and we are not actually a proper email.Message object.
        # We also don't mangle the from line; the server must take care of
        # this.
        if self.invalid:
            return self._force_CRLF(self.invalid_content)
        else:
            return message.SBHeaderMessage.as_string(self, unixfrom,
                                                     mangle_from_=False)

    recent_re = re.compile(r"\\Recent ?| ?\\Recent")
    def Save(self):
        """Save message to IMAP server.

        We can't actually update the message with IMAP, so what we do is
        create a new message and delete the old one."""

        assert self.folder is not None, \
               "Can't save a message that doesn't have a folder."
        assert self.id, "Can't save a message that doesn't have an id."
        assert self.imap_server, "Can't do anything without IMAP connection."

        response = self.imap_server.uid("FETCH", self.uid,
                                        "(FLAGS INTERNALDATE)")
        command = "fetch %s (flags internaldate)" % (self.uid,)
        response_data = self.imap_server.check_response(command, response)
        data = self.imap_server.extract_fetch_data(response_data)
        # The data will be a dictionary - hopefully with only one element,
        # but maybe more than one.  The key is the message number, which we
        # do not have (we use the UID instead).  So we look through the
        # message and use the last data of the right type we find.
        msg_time = self.extractTime()
        flags = None
        for msg_data in data.itervalues():
            if "INTERNALDATE" in msg_data:
                msg_time = msg_data["INTERNALDATE"]
            if "FLAGS" in msg_data:
                flags = msg_data["FLAGS"]
                # The \Recent flag can be fetched, but cannot be stored
                # We must remove it from the list if it is there.
                flags = self.recent_re.sub("", flags)
                
        # We try to save with flags and time, then with just the
        # time, then with the flags and the current time, then with just
        # the current time.  The first should work, but the first three
        # sometimes (due to the quirky IMAP server) fail.
        for flgs, tme in [(flags, msg_time),
                          (None, msg_time),
                          (flags, Time2Internaldate(time.time())),
                          (None, Time2Internaldate(time.time()))]:
            try:
                response = self.imap_server.append(self.folder.name, flgs, tme,
                                                   self.as_string())
            except BaseIMAP.error:
                continue
            try:
                self.imap_server.check_response("", response)
            except BadIMAPResponseError:
                pass
            else:
                break
        else:
            command = "append %s %s %s %s" % (self.folder.name, flgs, tme,
                                              self.as_string)
            raise BadIMAPResponseError(command)

        if self.previous_folder is None:
            self.imap_server.SelectFolder(self.folder.name)
        else:
            self.imap_server.SelectFolder(self.previous_folder.name)
            self.previous_folder = None
        response = self.imap_server.uid("STORE", self.uid, "+FLAGS.SILENT",
                                        "(\\Deleted \\Seen)")
        command = "set %s to be deleted and seen" % (self.uid,)
        self.imap_server.check_response(command, response)

        # Not all IMAP servers immediately offer the new message, but
        # we need to find it to get the new UID.  We need to wait until
        # the server offers up an EXISTS command, so we no-op until that
        # is the case.
        # See [ 941596 ] sb_imapfilter.py not adding headers / moving messages
        # We use the recent() function, which no-ops if necessary.  We try
        # 100 times, and then give up.  If a message arrives independantly,
        # and we are told about it before our message, then this could
        # cause trouble, but that would be one weird server.
        for i in xrange(100):
            response = self.imap_server.recent()
            data = self.imap_server.check_response("recent", response)
            if data[0] is not None:
                if options["globals", "verbose"]:
                    print >> sys.stderr, "[imapfilter] found saved message", self.uid,
                    print >> sys.stderr, "in iteration", i
                break
        else:
            if options["globals", "verbose"]:
                print >> sys.stderr, ("[imapfilter] can't find saved message after"
                                      "100 iterations:"), self.uid
            # raise BadIMAPResponseError("recent", "Cannot find saved message")

        # We need to update the UID, as it will have changed.
        # Although we don't use the UID to keep track of messages, we do
        # have to use it for IMAP operations.
        self.imap_server.SelectFolder(self.folder.name)
        search_string = "(UNDELETED HEADER %s \"%s\")" % \
                        (options["Headers", "mailid_header_name"],
                         self.id.replace('\\',r'\\').replace('"',r'\"'))
        response = self.imap_server.uid("SEARCH", search_string)
        data = self.imap_server.check_response("search " + search_string,
                                               response)
        new_id = data[0]

        # See [ 870799 ] imap trying to fetch invalid message UID
        # It seems that although the save gave a "NO" response to the
        # first save, the message was still saved (without the flags,
        # probably).  This really isn't good behaviour on the server's
        # part, but, as usual, we try and deal with it.  So, if we get
        # more than one undeleted message with the same SpamBayes id,
        # delete all of them apart from the last one, and use that.
        multiple_ids = new_id.split()
        for id_to_remove in multiple_ids[:-1]:
            response = self.imap_server.uid("STORE", id_to_remove,
                                            "+FLAGS.SILENT",
                                            "(\\Deleted \\Seen)")
            command = "silently delete and make seen %s" % (id_to_remove,)
            self.imap_server.check_response(command, response)

        if multiple_ids:
            new_id = multiple_ids[-1]
        else:
            # Let's hope it doesn't, but, just in case, if the search
            # turns up empty, we make the assumption that the new message
            # is the last one with a recent flag.
            response = self.imap_server.uid("SEARCH", "RECENT")
            data = self.imap_server.check_response("search recent",
                                                   response)
            new_id = data[0]
            if new_id.find(' ') > -1:
                ids = new_id.split(' ')
                new_id = ids[-1]

            # Ok, now we're in trouble if we still haven't found it.
            # We make a huge assumption that the new message is the one
            # with the highest UID (they are sequential, so this will be
            # ok as long as another message hasn't also arrived).
            if new_id == "":
                response = self.imap_server.uid("SEARCH", "ALL")
                data = self.imap_server.check_response("search all",
                                                       response)
                new_id = data[0]
                if new_id.find(' ') > -1:
                    ids = new_id.split(' ')
                    new_id = ids[-1]
        self.uid = new_id


class IMAPFolder(object):
    def __init__(self, folder_name, imap_server, stats):
        self.name = folder_name
        self.imap_server = imap_server
        self.stats = stats

        # Unique names for cached messages - see _generate_id below.
        self.lastBaseMessageName = ''
        self.uniquifier = 2

    def __cmp__(self, obj):
        """Two folders are equal if their names are equal."""
        if obj is None:
            return False
        return cmp(self.name, obj.name)

    def __iter__(self):
        """Iterate through the messages in this IMAP folder."""
        for key in self.keys():
            yield self[key]

    def keys(self):
        '''Returns *uids* for all the messages in the folder not
        marked as deleted.'''
        self.imap_server.SelectFolder(self.name)
        response = self.imap_server.uid("SEARCH", "UNDELETED")
        data = self.imap_server.check_response("search undeleted", response)
        if data[0]:
            return data[0].split(' ')
        else:
            return []

    custom_header_id_re = re.compile(re.escape(\
        options["Headers", "mailid_header_name"]) + "\:\s*(\d+(?:\-\d)?)",
                                     re.IGNORECASE)
    message_id_re = re.compile("Message-ID\: ?\<([^\n\>]+)\>",
                               re.IGNORECASE)
    def __getitem__(self, key):
        """Return message matching the given *uid*.

        The messages returned have no substance (so this should be
        reasonably quick, even with large messages).  You need to call
        get_full_message() on the returned message to get the substance of
        the message from the server."""
        self.imap_server.SelectFolder(self.name)

        # Using RFC822.HEADER.LINES would be better here, but it seems
        # that not all servers accept it, even though it is in the RFC
        response = self.imap_server.uid("FETCH", key, "RFC822.HEADER")
        response_data = self.imap_server.check_response(\
            "fetch %s rfc822.header" % (key,), response)
        data = self.imap_server.extract_fetch_data(response_data)
        # The data will be a dictionary - hopefully with only one element,
        # but maybe more than one.  The key is the message number, which we
        # do not have (we use the UID instead).  So we look through the
        # message and use the first data of the right type we find.
        headers = None
        for msg_data in data.itervalues():
            if "RFC822.HEADER" in msg_data:
                headers = msg_data["RFC822.HEADER"]
                break
        if headers is None:
            raise BadIMAPResponseError("FETCH response", response_data)

        # Create a new IMAPMessage object, which will be the return value.
        msg = IMAPMessage()
        msg.folder = self
        msg.uid = key
        msg.imap_server = self.imap_server

        # We use the MessageID header as the ID for the message, as long
        # as it is available, and if not, we add our own.
        # Search for our custom id first, for backwards compatibility.
        for id_header_re in [self.custom_header_id_re, self.message_id_re]:
            mo = id_header_re.search(headers)
            if mo:
                msg.setId(mo.group(1))
                break
        else:
            newid = self._generate_id()
            if options["globals", "verbose"]:
                print >> sys.stderr, "[imapfilter] saving", msg.uid, "with new id:", newid
            msg.setId(newid)
            # Unfortunately, we now have to re-save this message, so that
            # our id is stored on the IMAP server.  The vast majority of
            # messages have Message-ID headers, from what I can tell, so
            # we should only rarely have to do this.  It's less often than
            # with the previous solution, anyway!
            # msg = msg.get_full_message()
            # msg.Save()

        if options["globals", "verbose"]:
            sys.stdout.write(".")
        return msg

    # Lifted straight from sb_server.py (under the name getNewMessageName)
    def _generate_id(self):
        # The message id is the time it arrived, with a uniquifier
        # appended if two arrive within one clock tick of each other.
        messageName = "%10.10d" % long(time.time())
        if messageName == self.lastBaseMessageName:
            messageName = "%s-%d" % (messageName, self.uniquifier)
            self.uniquifier += 1
        else:
            self.lastBaseMessageName = messageName
            self.uniquifier = 2
        return messageName

    def Train(self, classifier, isSpam):
        """Train folder as spam/ham."""
        num_trained = 0
        for msg in self:
            if msg.GetTrained() == (not isSpam):
                msg = msg.get_full_message()
                if msg.could_not_retrieve:
                    # Something went wrong, and we couldn't even get
                    # an invalid message, so just skip this one.
                    # Annoyingly, we'll try to do it every time the
                    # script runs, but hopefully the user will notice
                    # the errors and move it soon enough.
                    continue
                msg.delSBHeaders()
                classifier.unlearn(msg.tokenize(), not isSpam)
                if isSpam:
                    old_class = options["Headers", "header_ham_string"]
                else:
                    old_class = options["Headers", "header_spam_string"]

                # Once the message has been untrained, it's training memory
                # should reflect that on the off chance that for some
                # reason the training breaks.
                msg.RememberTrained(None)
            else:
                old_class = None

            if msg.GetTrained() is None:
                msg = msg.get_full_message()
                if msg.could_not_retrieve:
                    continue
                saved_headers = msg.currentSBHeaders()
                msg.delSBHeaders()
                classifier.learn(msg.tokenize(), isSpam)
                num_trained += 1
                msg.RememberTrained(isSpam)
                self.stats.RecordTraining(not isSpam, old_class=old_class)
                if isSpam:
                    move_opt_name = "move_trained_spam_to_folder"
                else:
                    move_opt_name = "move_trained_ham_to_folder"
                if options["imap", move_opt_name] != "":
                    # We need to restore the SpamBayes headers.
                    for header, value in saved_headers.items():
                        msg[header] = value
                    msg.MoveTo(IMAPFolder(options["imap", move_opt_name],
                                           self.imap_server, self.stats))
                    msg.Save()
        return num_trained

    def Filter(self, classifier, spamfolder, unsurefolder, hamfolder):
        count = {}
        count["ham"] = 0
        count["spam"] = 0
        count["unsure"] = 0
        for msg in self:
            cls = msg.GetClassification()
            if cls is None or hamfolder is not None:
                if options["globals", "verbose"]:
                    print >> sys.stderr, "[imapfilter] classified as %s:" % cls, msg.uid
                
                msg = msg.get_full_message()
                if msg.could_not_retrieve:
                    # Something went wrong, and we couldn't even get
                    # an invalid message, so just skip this one.
                    # Annoyingly, we'll try to do it every time the
                    # script runs, but hopefully the user will notice
                    # the errors and move it soon enough.

                    if options["globals", "verbose"]:
                        print >> sys.stderr, "[imapfilter] could not retrieve:", msg.uid
                    continue
                
                (prob, clues) = classifier.spamprob(msg.tokenize(),
                                                    evidence=True)
                # Add headers and remember classification.
                msg.delSBHeaders()
                msg.addSBHeaders(prob, clues)
                self.stats.RecordClassification(prob)

                cls = msg.GetClassification()
                if cls == options["Headers", "header_ham_string"]:
                    if hamfolder:
                        if options["globals", "verbose"]:
                            print >> sys.stderr, "[imapfilter] moving to ham folder:",
                            print >> sys.stderr, msg.uid
                        msg.MoveTo(hamfolder)
                    # Otherwise, we leave ham alone.
                    count["ham"] += 1
                elif cls == options["Headers", "header_spam_string"]:
                    if options["globals", "verbose"]:
                        print >> sys.stderr, "[imapfilter] moving to spam folder:",
                        print >> sys.stderr, msg.uid
                    msg.MoveTo(spamfolder)
                    count["spam"] += 1
                else:
                    if options["globals", "verbose"]:
                        print >> sys.stderr, "[imapfilter] moving to unsure folder:", msg.uid
                    msg.MoveTo(unsurefolder)
                    count["unsure"] += 1
                msg.Save()
            else:
                if options["globals", "verbose"]:
                    print >> sys.stderr, "[imapfilter] already classified:", msg.uid
                
        return count


class IMAPFilter(object):
    def __init__(self, classifier, stats):
        self.spam_folder = None
        self.unsure_folder = None
        self.ham_folder = None
        self.classifier = classifier
        self.imap_server = None
        self.stats = stats

    def Train(self):
        assert self.imap_server, "Cannot do anything without IMAP server."
        
        if options["globals", "verbose"]:
            t = time.time()

        total_trained = 0
        for is_spam, option_name in [(False, "ham_train_folders"),
                                     (True, "spam_train_folders")]:
            training_folders = options["imap", option_name]
            for fol in training_folders:
                # Select the folder to make sure it exists
                try:
                    self.imap_server.SelectFolder(fol)
                except BadIMAPResponseError:
                    print >> sys.stderr, "Skipping", fol, "as it cannot be selected."
                    continue

                if options['globals', 'verbose']:
                    print >> sys.stderr, ("   Training %s folder %s" %
                                          (["ham", "spam"][is_spam], fol))
                folder = IMAPFolder(fol, self.imap_server, self.stats)
                num_trained = folder.Train(self.classifier, is_spam)
                total_trained += num_trained
                if options['globals', 'verbose']:
                    print >> sys.stderr, "\n      ", num_trained, "trained."

        if total_trained:
            self.classifier.store()

        if options["globals", "verbose"]:
            print >> sys.stderr, ("Training took %.4f seconds, %s messages were trained."
                                  % (time.time() - t, total_trained))

    def Filter(self):
        assert self.imap_server, "Cannot do anything without IMAP server."
        if not self.spam_folder:
            spam_folder_name = options["imap", "spam_folder"]
            if options["globals", "verbose"]:
                print >> sys.stderr, "[imapfilter] spam folder:", spam_folder_name
            self.spam_folder = IMAPFolder(
                spam_folder_name, self.imap_server, self.stats)
            
        if not self.unsure_folder:
            unsure_folder_name = options["imap", "unsure_folder"]
            if options["globals", "verbose"]:
                print >> sys.stderr, "[imapfilter] unsure folder:", unsure_folder_name
            self.unsure_folder = IMAPFolder(
                unsure_folder_name, self.imap_server, self.stats)

        ham_folder_name = options["imap", "ham_folder"]
        if options["globals", "verbose"]:
            print >> sys.stderr, "[imapfilter] ham folder:", ham_folder_name
            
        if ham_folder_name and not self.ham_folder:
            self.ham_folder = IMAPFolder(ham_folder_name, self.imap_server,
                                         self.stats)

        if options["globals", "verbose"]:
            t = time.time()

        count = {}
        count["ham"] = 0
        count["spam"] = 0
        count["unsure"] = 0

        # Select the ham, spam and unsure folders to make sure they exist.
        try:
            self.imap_server.SelectFolder(self.spam_folder.name)
        except BadIMAPResponseError:
            print >> sys.stderr, "Cannot select spam folder.  Please check configuration."
            sys.exit(-1)
        try:
            self.imap_server.SelectFolder(self.unsure_folder.name)
        except BadIMAPResponseError:
            print >> sys.stderr, "Cannot select unsure folder.  Please check configuration."
            sys.exit(-1)
        if self.ham_folder:
            try:
                self.imap_server.SelectFolder(self.ham_folder.name)
            except BadIMAPResponseError:
                print >> sys.stderr, "Cannot select ham folder.  Please check configuration."
                sys.exit(-1)
                
        for filter_folder in options["imap", "filter_folders"]:
            # Select the folder to make sure it exists.
            try:
                self.imap_server.SelectFolder(filter_folder)
            except BadIMAPResponseError:
                print >> sys.stderr, "Cannot select", filter_folder, "... skipping." 
                continue

            folder = IMAPFolder(filter_folder, self.imap_server, self.stats)
            subcount = folder.Filter(self.classifier, self.spam_folder,
                                     self.unsure_folder, self.ham_folder)
            for key in count.keys():
                count[key] += subcount.get(key, 0)

        if options["globals", "verbose"]:
            if count is not None:
                print >> sys.stderr, ("\nClassified %s ham, %s spam, and %s unsure." %
                                      (count["ham"], count["spam"], count["unsure"]))
            print >> sys.stderr, "Classifying took %.4f seconds." % (time.time() - t,)


def servers(promptForPass = False):
    """Returns a list containing a tuple (server,user,passwd) for each IMAP server in options.

If promptForPass is True or at least on password is missing from options,
prompts the user for each server's password.
"""
    
    servers = options["imap", "server"]
    usernames = options["imap", "username"]
    pwds = options["imap", "password"]

    if promptForPass or len(pwds) < len(usernames):
        pwds = []
        for u in usernames:
            pwds.append(getpass("Enter password for %s:" % (u,)))
            
    return zip(servers, usernames, pwds)
            
def run(force_UI=False):
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hbPtcvl:e:i:d:p:o:',
                                   ["verbose"])
    except getopt.error, msg:
        print >> sys.stderr, str(msg) + '\n\n' + __doc__
        sys.exit()

    doTrain = False
    doClassify = False
    doExpunge = options["imap", "expunge"]
    imapDebug = 0
    sleepTime = 0
    promptForPass = False
    launchUI = False

    for opt, arg in opts:
        if opt == '-h':
            print >> sys.stderr, __doc__
            sys.exit()
        elif opt == "-b":
            launchUI = True
        elif opt == '-t':
            doTrain = True
        elif opt == '-P':
            promptForPass = True
        elif opt == '-c':
            doClassify = True
        elif opt in ('-v', '--verbose'):
            options["globals", "verbose"] = True
        elif opt == '-e':
            if arg == 'y':
                doExpunge = True
            else:
                doExpunge = False
        elif opt == '-i':
            imapDebug = int(arg)
        elif opt == '-l':
            sleepTime = int(arg) * 60
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    bdbname, useDBM = storage.database_type(opts)

    # Let the user know what they are using...
    v = get_current_version();
    print "%s.\n" % (v.get_long_version("SpamBayes IMAP Filter"),)

    if options["globals", "verbose"]:
        print "Loading database %s..." % (bdbname),

    classifier = storage.open_storage(bdbname, useDBM)
    message_db = message.Message().message_info_db

    if options["globals", "verbose"]:
        print "Done."

    if not ( launchUI or force_UI or options["imap", "server"] ):
        print "You need to specify both a server and a username."
        sys.exit()

    servers_data = servers(promptForPass)
    
    # Load stats manager.
    stats = Stats.Stats(options, message_db)
    
    imap_filter = IMAPFilter(classifier, stats)

    # Web interface.  We have changed the rules about this many times.
    # With 1.0.x, the rule is that the interface is served if we are
    # not classifying or training.  However, this runs into the problem
    # that if we run with -l, we might still want to edit the options,
    # and we don't want to start a separate instance, because then the
    # database is accessed from two processes.
    # With 1.1.x, the rule is that the interface is also served if the
    # -l option is used, which means it is only not served if we are
    # doing a one-off classification/train.  In that case, there would
    # probably not be enough time to get to the interface and interact
    # with it (and we don't want it to die halfway through!), and we
    # don't want to slow classification/training down, either.
    if sleepTime or not (doClassify or doTrain):
        imaps = []
        for server, username, password in servers_data:
            if server == "":
                imaps.append(None)
            else:
                imaps.append(IMAPSession(server, imapDebug, doExpunge))

        def close_db():
            message_db.store()
            message_db.close()
            message.Message().message_info_db.store()
            message.Message().message_info_db.close()
            message.Message.message_info_db = None
            classifier.store()
            classifier.close()

        def change_db():
            classifier = storage.open_storage(*storage.database_type(opts))
            message.Message.message_info_db = message_db
            imap_filter = IMAPFilter(classifier, message_db)

        httpServer = UserInterfaceServer(options["html_ui", "port"])
        pwds = [ x[2] for x in servers_data ]
        httpServer.register(IMAPUserInterface(classifier, imaps, pwds,
                                              IMAPSession, stats=stats,
                                              close_db=close_db,
                                              change_db=change_db))
        launchBrowser = launchUI or options["html_ui", "launch_browser"]
        if sleepTime:
            # Run in a separate thread, as we have more work to do.
            thread.start_new_thread(Dibbler.run, (),
                                    {"launchBrowser":launchBrowser})
        else:
            Dibbler.run(launchBrowser=launchBrowser)
    if doClassify or doTrain:
        imaps = []
        for server, username, password in servers_data:
            imaps.append(((server, imapDebug, doExpunge),
                          username, password))

        # In order to make working with multiple servers easier, we
        # allow the user to have separate configuration files for each
        # server.  These may specify different folders to watch, different
        # spam/unsure folders, or any other options (e.g. thresholds).
        # For each server we use the default (global) options, and load
        # the specific options on top.  To facilitate this, we use a
        # restore point for the options with just the default (global)
        # options.
        # XXX What about when we are running with -l and change options
        # XXX via the web interface?  We need to handle that, really.
        options.set_restore_point()
        while True:
            for (server, imapDebug, doExpunge), username, password in imaps:
                imap = IMAPSession(server, imapDebug, doExpunge)
                if options["globals", "verbose"]:
                    print "Account: %s:%s" % (imap.server, imap.port)
                if imap.connected:
                    # As above, we load a separate configuration file
                    # for each server, if it exists.  We look for a
                    # file in the optionsPathname directory, with the
                    # name server.name.ini or .spambayes_server_name_rc
                    # XXX While 1.1 is in alpha these names can be
                    # XXX changed if desired.  Please let Tony know!
                    basedir = os.path.dirname(optionsPathname)
                    fn1 = os.path.join(basedir, imap.server + ".ini")
                    fn2 = os.path.join(basedir,
                                       imap.server.replace(".", "_") + \
                                       "_rc")
                    for fn in (fn1, fn2):
                        if os.path.exists(fn):
                            options.merge_file(fn)

                    try:                    
                        imap.login(username, password)
                    except LoginFailure, e:
                        print str(e)
                        continue
                    imap_filter.imap_server = imap

                    if doTrain:
                        if options["globals", "verbose"]:
                            print "Training"
                        imap_filter.Train()
                    if doClassify:
                        if options["globals", "verbose"]:
                            print "Classifying"
                        imap_filter.Filter()

                    imap.logout()
                    options.revert_to_restore_point()
                else:
                    # Failed to connect.  This may be a temporary problem,
                    # so just continue on and try again.  If we are only
                    # running once we will end, otherwise we'll try again
                    # in sleepTime seconds.
                    # XXX Maybe we should log this error message?
                    pass

            if sleepTime:
                time.sleep(sleepTime)
            else:
                break

if __name__ == '__main__':
    run()