File: cdb_classifier.py

package info (click to toggle)
spambayes 1.1b1%2Bgit20190201.1335ca8-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 4,300 kB
  • sloc: python: 35,239; ansic: 444; lisp: 83; sh: 69; makefile: 33
file content (26 lines) | stat: -rw-r--r-- 857 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
"""A classifier that uses a CDB database.

A CDB wordinfo database is quite small and fast but is slow to update.
It is appropriate if training is done rarely (e.g. monthly or weekly using
archived ham and spam).  See mailsort.py for an example application that
uses this classifier.
"""

from spambayes import cdb
from spambayes.classifier import Classifier

class CdbClassifier(Classifier):
    def __init__(self, cdbfile=None):
        Classifier.__init__(self)
        if cdbfile is not None:
            self.wordinfo = cdb.Cdb(cdbfile)

    def probability(self, record):
        return float(record)

    def save_wordinfo(self, db_file):
        items = []
        for word, record in self.wordinfo.iteritems():
            prob = Classifier.probability(self, record)
            items.append((word, str(prob)))
        cdb.cdb_make(db_file, items)