1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
|
# encoding: utf-8
module Tests
def test_similarity
assert_similarity 0.9667, 'henka', 'henkan'
assert_similarity 1.0, 'al', 'al'
assert_similarity 0.9611, 'martha', 'marhta'
assert_similarity 0.8324, 'jones', 'johnson'
assert_similarity 0.9583, 'abcvwxyz', 'cabvwxyz'
assert_similarity 0.84, 'dwayne', 'duane'
assert_similarity 0.8133, 'dixon', 'dicksonx'
assert_similarity 0.0, 'fvie', 'ten'
assert_similarity 1.0, 'tony', 'tony'
assert_similarity 1.0, 'tonytonyjan', 'tonytonyjan'
assert_similarity 1.0, 'x', 'x'
assert_similarity 0.0, '', ''
assert_similarity 0.0, 'tony', ''
assert_similarity 0.0, '', 'tony'
assert_similarity 0.8727, 'tonytonyjan', 'tony'
assert_similarity 0.8727, 'tony', 'tonytonyjan'
assert_similarity 0.9407, 'necessary', 'nessecary'
assert_similarity 0.9067, 'does_exist', 'doesnt_exist'
assert_similarity 0.975, '12345678', '12345687'
assert_similarity 0.975, '12345678', '12345867'
assert_similarity 0.95, '12345678', '12348567'
end
def test_jaro_similarity
assert_jaro_similarity 0.9444, 'henka', 'henkan'
assert_jaro_similarity 1.0, 'al', 'al'
assert_jaro_similarity 0.9444, 'martha', 'marhta'
assert_jaro_similarity 0.7905, 'jones', 'johnson'
assert_jaro_similarity 0.9583, 'abcvwxyz', 'cabvwxyz'
assert_jaro_similarity 0.8222, 'dwayne', 'duane'
assert_jaro_similarity 0.7667, 'dixon', 'dicksonx'
assert_jaro_similarity 0.0, 'fvie', 'ten'
assert_jaro_similarity 1.0, 'tony', 'tony'
assert_jaro_similarity 1.0, 'tonytonyjan', 'tonytonyjan'
assert_jaro_similarity 1.0, 'x', 'x'
assert_jaro_similarity 0.0, '', ''
assert_jaro_similarity 0.0, 'tony', ''
assert_jaro_similarity 0.0, '', 'tony'
assert_jaro_similarity 0.7879, 'tonytonyjan', 'tony'
assert_jaro_similarity 0.7879, 'tony', 'tonytonyjan'
assert_jaro_similarity 0.9259, 'necessary', 'nessecary'
assert_jaro_similarity 0.8444, 'does_exist', 'doesnt_exist'
assert_jaro_similarity 0.9583, '12345678', '12345687'
assert_jaro_similarity 0.9583, '12345678', '12345867'
assert_jaro_similarity 0.9167, '12345678', '12348567'
assert_jaro_similarity 0.604, 'tonytonyjan', 'janjantony'
end
def test_distance
assert_distance 0.9667, 'henka', 'henkan'
end
def test_jaro_distance
assert_jaro_distance 0.9444, 'henka', 'henkan'
end
def test_unicode
assert_similarity 0.9818, '變形金剛4:絕跡重生', '變形金剛4: 絕跡重生'
assert_similarity 0.8222, '連勝文', '連勝丼'
assert_similarity 0.8222, '馬英九', '馬英丸'
assert_similarity 0.6667, '良い', 'いい'
end
def test_ignore_case
assert_similarity 0.9611, 'MARTHA', 'marhta', ignore_case: true
end
def test_weight
assert_similarity 0.9778, 'MARTHA', 'MARHTA', weight: 0.2
end
def test_threshold
assert_similarity 0.9444, 'MARTHA', 'MARHTA', threshold: 0.99
end
def test_adjusting_table
assert_similarity 0.9667, 'HENKA', 'HENKAN', adj_table: true
assert_similarity 1.0, 'AL', 'AL', adj_table: true
assert_similarity 0.9611, 'MARTHA', 'MARHTA', adj_table: true
assert_similarity 0.8598, 'JONES', 'JOHNSON', adj_table: true
assert_similarity 0.9583, 'ABCVWXYZ', 'CABVWXYZ', adj_table: true
assert_similarity 0.8730, 'DWAYNE', 'DUANE', adj_table: true
assert_similarity 0.8393, 'DIXON', 'DICKSONX', adj_table: true
assert_similarity 0.0, 'FVIE', 'TEN', adj_table: true
end
def test_error
assert_raises JaroWinkler::InvalidWeightError do
JaroWinkler.similarity 'MARTHA', 'MARHTA', weight: 0.26
end
end
def test_long_string
JaroWinkler.similarity 'haisai' * 20, 'haisai' * 20
end
def test_encoding
assert_encoding '焦玟綾', '焦紋綾', Encoding::Big5
assert_encoding '簡煒航', '簡偉航', Encoding::Big5_HKSCS
assert_encoding '西島之', '西鳥志', Encoding::EUCJP
assert_encoding '松本行弘', '枩本行弘', Encoding::Shift_JIS
assert_similarity 1.0, "\xe8".force_encoding('iso8859-1'), 'è'
end
def test_raises_type_error
assert_raises(TypeError){ JaroWinkler.similarity 'MARTHA', nil }
assert_raises(TypeError){ JaroWinkler.similarity nil, 'MARTHA' }
assert_raises(TypeError){ JaroWinkler.similarity nil, nil }
assert_raises(TypeError){ JaroWinkler.similarity 'MARTHA', :non_string }
assert_raises(TypeError){ JaroWinkler.similarity :non_string, 'MARTHA' }
assert_raises(TypeError){ JaroWinkler.similarity :non_string, :non_string }
end
private
def assert_distance score, str1, str2, **options
assert_in_delta score, JaroWinkler.distance(str1, str2, **options)
end
def assert_encoding str1, str2, encoding, **options
assert_similarity JaroWinkler.distance(str1, str2), str1.encode(encoding), str2.encode(encoding)
end
def assert_jaro_distance score, str1, str2, **options
assert_in_delta score, JaroWinkler.jaro_distance(str1, str2, **options)
end
def assert_similarity score, str1, str2, **options
assert_in_delta score, JaroWinkler.similarity(str1, str2, **options)
end
def assert_jaro_similarity score, str1, str2, **options
assert_in_delta score, JaroWinkler.jaro_similarity(str1, str2, **options)
end
end
|