File: hasher_test.rb

package info (click to toggle)
ruby-classifier-reborn 2.2.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,424 kB
  • sloc: ruby: 2,021; makefile: 7
file content (67 lines) | stat: -rw-r--r-- 2,189 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
require_relative '../test_helper'
require 'tempfile'

class HasherTest < Minitest::Test
  def setup
    @original_stopwords_path = Hasher::STOPWORDS_PATH.dup
  end

  def test_word_hash
    hash = { good: 1, :'!' => 1, hope: 1, :"'" => 1, :'.' => 1, love: 1, word: 1, them: 1, test: 1 }
    assert_equal hash, Hasher.word_hash("here are some good words of test's. I hope you love them!")
  end

  def test_clean_word_hash
    hash = { good: 1, word: 1, hope: 1, love: 1, them: 1, test: 1 }
    assert_equal hash, Hasher.clean_word_hash("here are some good words of test's. I hope you love them!")
  end

  def test_clean_word_hash_without_stemming
    hash = { good: 1, words: 1, hope: 1, love: 1, them: 1, tests: 1 }
    assert_equal hash, Hasher.clean_word_hash("here are some good words of test's. I hope you love them!", 'en', false)
  end

  def test_default_stopwords
    refute_empty Hasher::STOPWORDS['en']
    refute_empty Hasher::STOPWORDS['fr']
    assert_empty Hasher::STOPWORDS['gibberish']
  end

  def test_loads_custom_stopwords
    default_english_stopwords = Hasher::STOPWORDS['en']

    # Remove the english stopwords
    Hasher::STOPWORDS.delete('en')

    # Add a custom stopwords path
    Hasher::STOPWORDS_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../data/stopwords')

    custom_english_stopwords = Hasher::STOPWORDS['en']

    refute_equal default_english_stopwords, custom_english_stopwords
  end

  def test_add_custom_stopword_path
    # Create stopword tempfile in current directory
    temp_stopwords = Tempfile.new('xy', "#{File.dirname(__FILE__) + "/"}")

    # Add some stopwords to tempfile
    temp_stopwords << "this words fun"
    temp_stopwords.close

    # Get path of tempfile
    temp_stopwords_path = File.dirname(temp_stopwords)

    # Get tempfile name.
    temp_stopwords_name = File.basename(temp_stopwords.path)

    Hasher.add_custom_stopword_path(temp_stopwords_path)
    hash = { list: 1, cool: 1 }
    assert_equal hash, Hasher.clean_word_hash("this is a list of cool words!", temp_stopwords_name)
  end

  def teardown
    Hasher::STOPWORDS.clear
    Hasher::STOPWORDS_PATH.clear.concat @original_stopwords_path
  end
end