File: naivebayes.py

package info (click to toggle)
w3af 1.0-rc3svn3489-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd, squeeze, wheezy
  • size: 59,908 kB
  • ctags: 16,916
  • sloc: python: 136,990; xml: 63,472; sh: 153; ruby: 94; makefile: 40; asm: 35; jsp: 32; perl: 18; php: 5
file content (57 lines) | stat: -rw-r--r-- 2,441 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Natural Language Toolkit - NaiveBayes
#  Capable of classifying the test or gold data using Naive Bayes algorithm
#
# Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com>
#
# URL: <http://www.nltk.org/>
# This software is distributed under GPL, for license information see LICENSE.TXT

from nltk_contrib.classifier import instances as ins, decisionstump as ds, Classifier
from nltk_contrib.classifier.exceptions import invaliddataerror as inv

class NaiveBayes(Classifier):
    def __init__(self, training, attributes, klass):
        Classifier.__init__(self, training, attributes, klass)
        self.post_probs, self.class_freq_dist = None, None
        
    def train(self):
        Classifier.train(self)
        self.post_probs = self.training.posterior_probablities(self.attributes, self.klass)
        self.class_freq_dist = self.training.class_freq_dist()
        for klass_value in self.klass:
            self.class_freq_dist.inc(klass_value)#laplacian smoothing
            
    def classify(self, instances):
        for instance in instances:
            instance.classified_klass = self.estimate_klass(instance)

    def estimate_klass(self, instance):
        estimates_using_prob = {}
        for klass_value in self.klass:
            class_conditional_probability = self.class_conditional_probability(instance, klass_value)
            estimates_using_prob[class_conditional_probability] = klass_value
        keys = estimates_using_prob.keys()
        keys.sort()#find the one with max conditional prob
        return estimates_using_prob[keys[-1]]
    
    def prior_probability(self, klass_value):
        return self.class_freq_dist.freq(klass_value)
    
    def posterior_probability(self, attribute, attribute_value, klass_value):
        return self.post_probs.value(attribute, attribute_value, klass_value)
    
    def class_conditional_probability(self, instance, klass_value):
        class_cond_prob = 1.0
        for attribute in self.attributes:
            attr_value = instance.value(attribute)
            class_cond_prob *= self.posterior_probability(attribute, attr_value, klass_value)
        class_cond_prob *= self.prior_probability(klass_value)
        return class_cond_prob
    
    @classmethod
    def can_handle_continuous_attributes(self):
        return True
        
    def is_trained(self):
        return self.post_probs is not None and self.class_freq_dist is not None