File: bayesian_integration_test.rb

package info (click to toggle)
ruby-classifier-reborn 2.2.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,424 kB
  • sloc: ruby: 2,021; makefile: 7
file content (63 lines) | stat: -rw-r--r-- 2,054 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# encoding: utf-8

require File.dirname(__FILE__) + '/../test_helper'
require_relative '../data/test_data_loader'

class BayesianIntegrationTest < Minitest::Test
  TRAINING_SIZE = 4000
  TESTING_SIZE = 1000

  def setup
    begin
      @memory_classifier = ClassifierReborn::Bayes.new 'Ham', 'Spam'
      @redis_backend = ClassifierReborn::BayesRedisBackend.new
      @redis_backend.instance_variable_get(:@redis).config(:set, "save", "")
      @redis_classifier = ClassifierReborn::Bayes.new 'Ham', 'Spam', backend: @redis_backend
    rescue Redis::CannotConnectError => e
      skip(e)
    end
    data = TestDataLoader.sms_data
    if data.length < TRAINING_SIZE + TESTING_SIZE
      TestDataLoader.report_insufficient_data(data.length, TRAINING_SIZE + TESTING_SIZE)
      skip(e)
    end
    @training_set = data[0, TRAINING_SIZE]
    @testing_set = data[TRAINING_SIZE, TESTING_SIZE]
  end

  def teardown
    @redis_backend.reset unless @redis_backend.nil?
  end

  def test_equality_of_backends
    train_model @memory_classifier
    train_model @redis_classifier
    assert_equal classification_scores(@memory_classifier).hash, classification_scores(@redis_classifier).hash
    untrain_model @memory_classifier, TRAINING_SIZE/2
    untrain_model @redis_classifier, TRAINING_SIZE/2
    assert_equal classification_scores(@memory_classifier).hash, classification_scores(@redis_classifier).hash
  end

  def train_model(classifier)
    @training_set.each do |line|
      parts = line.strip.split("\t")
      classifier.train(parts.first, parts.last)
    end
  end

  def untrain_model(classifier, limit=Float::INFINITY)
    @training_set.each_with_index do |line, i|
      break if i >= limit
      parts = line.strip.split("\t")
      classifier.untrain(parts.first, parts.last)
    end
  end

  def classification_scores(classifier)
    @testing_set.collect do |line|
      parts = line.strip.split("\t")
      result, score = classifier.classify_with_score(parts.last)
      score.infinite? ? "irrelevant" : "#{result}:#{score}"
    end
  end
end