File: tests.rb

package info (click to toggle)
ruby-jaro-winkler 1.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 216 kB
  • sloc: ansic: 379; ruby: 367; sh: 8; makefile: 7
file content (139 lines) | stat: -rw-r--r-- 5,651 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# encoding: utf-8
module Tests
  def test_similarity
    assert_similarity 0.9667, 'henka',       'henkan'
    assert_similarity 1.0,    'al',          'al'
    assert_similarity 0.9611, 'martha',      'marhta'
    assert_similarity 0.8324, 'jones',       'johnson'
    assert_similarity 0.9583, 'abcvwxyz',    'cabvwxyz'
    assert_similarity 0.84,   'dwayne',      'duane'
    assert_similarity 0.8133, 'dixon',       'dicksonx'
    assert_similarity 0.0,    'fvie',        'ten'
    assert_similarity 1.0,    'tony',        'tony'
    assert_similarity 1.0,    'tonytonyjan', 'tonytonyjan'
    assert_similarity 1.0,    'x',           'x'
    assert_similarity 0.0,    '',            ''
    assert_similarity 0.0,    'tony',        ''
    assert_similarity 0.0,    '',            'tony'
    assert_similarity 0.8727, 'tonytonyjan', 'tony'
    assert_similarity 0.8727, 'tony',        'tonytonyjan'
    assert_similarity 0.9407, 'necessary',   'nessecary'
    assert_similarity 0.9067, 'does_exist',  'doesnt_exist'
    assert_similarity 0.975,  '12345678',    '12345687'
    assert_similarity 0.975,  '12345678',    '12345867'
    assert_similarity 0.95,   '12345678',    '12348567'
  end

  def test_jaro_similarity
    assert_jaro_similarity 0.9444, 'henka',       'henkan'
    assert_jaro_similarity 1.0,    'al',          'al'
    assert_jaro_similarity 0.9444, 'martha',      'marhta'
    assert_jaro_similarity 0.7905, 'jones',       'johnson'
    assert_jaro_similarity 0.9583, 'abcvwxyz',    'cabvwxyz'
    assert_jaro_similarity 0.8222, 'dwayne',      'duane'
    assert_jaro_similarity 0.7667, 'dixon',       'dicksonx'
    assert_jaro_similarity 0.0,    'fvie',        'ten'
    assert_jaro_similarity 1.0,    'tony',        'tony'
    assert_jaro_similarity 1.0,    'tonytonyjan', 'tonytonyjan'
    assert_jaro_similarity 1.0,    'x',           'x'
    assert_jaro_similarity 0.0,    '',            ''
    assert_jaro_similarity 0.0,    'tony',        ''
    assert_jaro_similarity 0.0,    '',            'tony'
    assert_jaro_similarity 0.7879, 'tonytonyjan', 'tony'
    assert_jaro_similarity 0.7879, 'tony',        'tonytonyjan'
    assert_jaro_similarity 0.9259, 'necessary',   'nessecary'
    assert_jaro_similarity 0.8444, 'does_exist',  'doesnt_exist'
    assert_jaro_similarity 0.9583, '12345678',    '12345687'
    assert_jaro_similarity 0.9583, '12345678',    '12345867'
    assert_jaro_similarity 0.9167, '12345678',    '12348567'
    assert_jaro_similarity 0.604,  'tonytonyjan', 'janjantony'
  end

  def test_distance
    assert_distance 0.9667, 'henka', 'henkan'
  end

  def test_jaro_distance
    assert_jaro_distance 0.9444, 'henka', 'henkan'
  end

  def test_unicode
    assert_similarity 0.9818, '變形金剛4:絕跡重生', '變形金剛4: 絕跡重生'
    assert_similarity 0.8222, '連勝文',             '連勝丼'
    assert_similarity 0.8222, '馬英九',             '馬英丸'
    assert_similarity 0.6667, '良い',               'いい'
  end

  def test_ignore_case
    assert_similarity 0.9611, 'MARTHA', 'marhta', ignore_case: true
  end

  def test_weight
    assert_similarity 0.9778, 'MARTHA', 'MARHTA', weight: 0.2
  end

  def test_threshold
    assert_similarity 0.9444, 'MARTHA', 'MARHTA', threshold: 0.99
  end


  def test_adjusting_table
    assert_similarity 0.9667, 'HENKA',    'HENKAN',   adj_table: true
    assert_similarity 1.0,    'AL',       'AL',       adj_table: true
    assert_similarity 0.9611, 'MARTHA',   'MARHTA',   adj_table: true
    assert_similarity 0.8598, 'JONES',    'JOHNSON',  adj_table: true
    assert_similarity 0.9583, 'ABCVWXYZ', 'CABVWXYZ', adj_table: true
    assert_similarity 0.8730, 'DWAYNE',   'DUANE',    adj_table: true
    assert_similarity 0.8393, 'DIXON',    'DICKSONX', adj_table: true
    assert_similarity 0.0,    'FVIE',     'TEN',      adj_table: true
  end

  def test_error
    assert_raises JaroWinkler::InvalidWeightError do
      JaroWinkler.similarity 'MARTHA', 'MARHTA', weight: 0.26
    end
  end

  def test_long_string
    JaroWinkler.similarity 'haisai' * 20, 'haisai' * 20
  end

  def test_encoding
    assert_encoding '焦玟綾', '焦紋綾', Encoding::Big5
    assert_encoding '簡煒航', '簡偉航', Encoding::Big5_HKSCS
    assert_encoding '西島之', '西鳥志', Encoding::EUCJP
    assert_encoding '松本行弘', '枩本行弘', Encoding::Shift_JIS
    assert_similarity 1.0, "\xe8".force_encoding('iso8859-1'), 'è'
  end

  def test_raises_type_error
    assert_raises(TypeError){ JaroWinkler.similarity 'MARTHA', nil }
    assert_raises(TypeError){ JaroWinkler.similarity nil, 'MARTHA' }
    assert_raises(TypeError){ JaroWinkler.similarity nil, nil }
    assert_raises(TypeError){ JaroWinkler.similarity 'MARTHA', :non_string }
    assert_raises(TypeError){ JaroWinkler.similarity :non_string, 'MARTHA' }
    assert_raises(TypeError){ JaroWinkler.similarity :non_string, :non_string }
  end

private

  def assert_distance score, str1, str2, **options
    assert_in_delta score, JaroWinkler.distance(str1, str2, **options)
  end

  def assert_encoding str1, str2, encoding, **options
    assert_similarity JaroWinkler.distance(str1, str2), str1.encode(encoding), str2.encode(encoding)
  end

  def assert_jaro_distance score, str1, str2, **options
    assert_in_delta score, JaroWinkler.jaro_distance(str1, str2, **options)
  end

  def assert_similarity score, str1, str2, **options
    assert_in_delta score, JaroWinkler.similarity(str1, str2, **options)
  end

  def assert_jaro_similarity score, str1, str2, **options
    assert_in_delta score, JaroWinkler.jaro_similarity(str1, str2, **options)
  end
end