File: test_util_matcher.py

package info (click to toggle)
quodlibet 4.6.0-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 18,016 kB
  • sloc: python: 85,817; sh: 385; xml: 110; makefile: 91
file content (259 lines) | stat: -rw-r--r-- 9,998 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Copyright 2021 Joschua Gandert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
from dataclasses import dataclass
from typing import List

from quodlibet.util.matcher import ObjectListMatcher

from tests import TestCase


class TMatchBasics(TestCase):
    def test_empty_weight_not_allowed(self):
        self.assertRaises(ValueError, lambda: ObjectListMatcher({}))

    def test_negative_weights_not_allowed(self):
        self.assertRaises(ValueError, lambda: ObjectListMatcher({lambda i: int(i): -1}))


class TMatchIdentity(TestCase):
    def test_all_elements_in_both_but_different_order(self):
        matcher = ObjectListMatcher.of_identity()
        a = ['cookie', 'beach', 'house']
        b = ['house', 'cookie', 'beach']

        b_match_indices = matcher.get_indices(a, b)
        for a_item, b_idx in zip(a, b_match_indices):
            assert a_item == b[b_idx]

        b = ['house', 'beach', 'cookie']
        assert matcher.get_indices(a, b) == [2, 1, 0]

    def test_simple_unbalanced(self):
        matcher = ObjectListMatcher.of_identity()
        a = ['hell', 'gel', 'shell']
        b = ['yell']
        assert matcher.get_indices(a, b) == [0, None, None]

        a = ['gel', 'hell', 'shell']
        assert matcher.get_indices(a, b) == [None, 0, None]

    def test_minimum_similarity(self):
        matcher = ObjectListMatcher.of_identity()
        a = ['mess', 'blessed', 'chess']
        b = ['pudding', 'xylophone', 'yes']
        matcher.should_store_similarity_matrix = True

        assert matcher.get_indices(a, b) == [2, 0, 1], formatted_matrix(matcher)

        matcher.minimum_similarity_ratio = 0.45
        assert matcher.get_indices(a, b) == [2, None, 1], formatted_matrix(matcher)

        matcher.minimum_similarity_ratio = 0.6
        assert matcher.get_indices(a, b) == [None, None, None], formatted_matrix(
            matcher)


class TMatchListOfSequences(TestCase):
    def test_clear_match(self):
        matcher = ObjectListMatcher.for_sequence([3, 1.5, 2])
        a = [('cacc', 'cacc', 2), ('bacc', 'baca', 1)]
        b = [('caba', 'bacc', 2), ('abaa', 'bcca', 2)]

        assert matcher.get_indices(a, b) == [0, 1]

        # test that repeating the same call doesn't change the result
        assert matcher.get_indices(a, b) == [0, 1]

        # in this case (same length), reversing arguments should result in the same list
        assert matcher.get_indices(b, a) == [0, 1]

    def test_other_now_barely_better(self):
        matcher = ObjectListMatcher.for_sequence([3, 1.5, 2])
        a = [('cacc', 'cacc', 2), ('bacc', 'baca', 1)]
        b = [('caba', 'bacc', 1), ('abaa', 'bcca', 2)]  # third element of first changed

        assert matcher.get_indices(a, b) == [1, 0]
        assert matcher.get_indices(a, b) == [1, 0]
        assert matcher.get_indices(b, a) == [1, 0]

    def test_change_weights(self):
        # same as in previous test
        matcher = ObjectListMatcher.for_sequence([3, 1.5, 2])
        a = [('cacc', 'cacc', 2), ('bacc', 'baca', 1)]
        b = [('caba', 'bacc', 1), ('abaa', 'bcca', 2)]

        matcher.update_attr_to_weight({lambda i: i[1]: 1})
        assert matcher.get_indices(a, b) == [0, 1]

    def test_double_weight(self):
        matcher = ObjectListMatcher.for_sequence([4, 2])
        a = [('Great Song', 'Law', 2), ('Night Mix', 'Beach', 1)]
        b = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]

        assert matcher.get_indices(a, b) == [0, 1]

        # changed "Law" to "Low"
        a = [('Great Song', 'Low', 2), ('Night Mix', 'Beach', 1)]
        assert matcher.get_indices(a, b) == [1, 0]

    def test_nothing_to_match_b_to(self):
        matcher = ObjectListMatcher.for_sequence([7, 1])
        a = []
        b = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]

        # As this returns the indices of b to match a, it always has the size of a.
        assert matcher.get_indices(a, b) == []
        assert matcher.get_indices([], []) == []

    def test_match_a_to_nothing(self):
        matcher = ObjectListMatcher.for_sequence([7, 1])
        a = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]
        b = []

        # When no b element could be matched to an a element, -1 is used.
        assert matcher.get_indices(a, b) == [None, None]

    def test_more_in_a(self):
        matcher = ObjectListMatcher.for_sequence([7, 1])
        a = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]
        b = [('Great Sea', 'Light', 2)]

        assert matcher.get_indices(a, b) == [None, 0]

    def test_more_in_b(self):
        matcher = ObjectListMatcher.for_sequence([7, 1])
        a = [('Great Sea', 'Light', 2)]
        b = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]

        assert matcher.get_indices(a, b) == [1]

    def test_all_the_same(self):
        matcher = ObjectListMatcher.for_sequence([0.2, 0.7, 0.1])

        x = (9, 9, 9)
        assert matcher.get_indices([x, x, x], [x, x, x]) == [0, 1, 2]

    def test_numeric_if_both_good_match_current_order_preferred(self):
        # we pre-normalized the weights for clarity here (they're always normalized)
        matcher = ObjectListMatcher.for_sequence([0.6, 0.4])

        a = [(3, 2), (3, 4)]
        b = [(99, 99), (3, 3)]

        # despite having the same delta to (3, 3), here (3, 2) should be preferred to
        # (3, 4), as (3, 2) is a closer match (in terms of their indices)
        assert matcher.get_indices(a, b) == [1, 0]
        assert matcher.get_indices(b, a) == [1, 0]

    def test_numeric_asymmetry(self):
        matcher = ObjectListMatcher.for_sequence([0.6, 0.4])
        matcher.should_store_similarity_matrix = True

        a = [(3, 3), (8, 5), (9, 1)]
        b = [(9, 8), (3, 2), (3, 4)]

        # as (9, 8) is the best match for (9, 1), item (8, 5) will be matched to (3, 4)
        assert matcher.get_indices(a, b) == [1, 2, 0], formatted_matrix(matcher)

        matrix = matcher.similarity_matrix

        # matrix[0] are the match scores of element (3, 3) in a to all elements in b
        assert matrix[0][1] == matrix[0][2]

        # (9, 1) is most similar to (9, 8) and second most similar to (3, 2)
        assert matrix[2][0] > matrix[2][2]
        assert matrix[2][1] > matrix[2][2]

        # What happens here may seem confusing, but this is a result of the following
        # asymmetry: the best match in b for (9, 1) is (9, 8), but the best match in a
        # for (9, 8) isn't (9, 1), it's (8, 5). This can be seen by calculating the
        # weighted delta between (9, 8) and each of them (smaller delta = more similar):
        #   delta to (9, 1):    (9-9) * 0.6 + (8-1) * 0.4 = 2.8
        #   delta to (8, 5):    (9-8) * 0.6 + (8-5) * 0.4 = 1.8
        assert matcher.get_indices(b, a) == [1, 0, 2], formatted_matrix(matcher)

    def test_should_go_through_every_attribute(self):
        matcher = ObjectListMatcher.for_sequence([0.7, 0.3])
        matcher.should_store_similarity_matrix = True

        # There are undefeatable matches for the first attribute / weight here, and so
        # by default the algorithm will not even check the second attribute.
        a = [('a', -8), ('very clear', 0), ('way', 33), ('forward', 2)]
        b = [('very clear', 33), ('forward', -1), ('a', 5), ('way', 2)]

        assert matcher.get_indices(a, b) == [2, 0, 3, 1]
        partial_matrix = matcher.similarity_matrix

        matcher.should_go_through_every_attribute = True

        # Result definitely shouldn't change
        assert matcher.get_indices(a, b) == [2, 0, 3, 1], formatted_matrix(matcher)

        # But in this case the matrix should have
        assert matcher.similarity_matrix != partial_matrix, formatted_matrix(matcher)


def formatted_matrix(matcher: ObjectListMatcher) -> str:
    lines = ['<Similarity Matrix']
    for b_similarities in matcher.similarity_matrix:
        l = ''
        for b_sim in b_similarities:
            l += f'{b_sim:1.4f}  '
        lines.append(l)
    return '\n'.join(lines) + '\n>'


class TMatchClassFields(TestCase):
    def test_matching_works(self):
        a, b = self._get_car_lists()

        attr_to_weight = {(lambda c: c.seats): 4, (lambda c: c.name): 1,
                          (lambda c: c.features): 5}
        matcher = ObjectListMatcher(attr_to_weight)

        assert matcher.get_indices(a, b) == [2, None, 1, 0]

    def _get_car_lists(self):
        a = [Car(1, 'Speedy', ['gps', 'heater']), Car(2, 'Cheaporghiny', []),
             Car(16, 'Half-a-Bus', ['buttons']), Car(5, 'Normal Model 1', ['music'])]
        b = [Car(3, 'Mödel V5', ['gps']), Car(19, 'Cyberbus', ['buttons']),
             Car(2, 'Sheeporghiny', ['gps', 'heater', 'sheep sound button'])]
        return a, b

    def test_dominating_name_weights(self):
        a, b = self._get_car_lists()
        attr_to_weight = {(lambda c: c.seats): 0.5, (lambda c: c.name): 9,
                          (lambda c: c.features): 1.2}

        matcher = ObjectListMatcher(attr_to_weight)

        assert matcher.get_indices(a, b) == [None, 2, 1, 0]

    def test_minimum_similarity(self):
        a, b = self._get_car_lists()

        attr_to_weight = {(lambda c: c.seats): 3, (lambda c: c.features): 8}
        matcher = ObjectListMatcher(attr_to_weight)
        matcher.should_store_similarity_matrix = True

        assert matcher.get_indices(a, b) == [2, 0, 1, None], formatted_matrix(matcher)

        matcher.minimum_similarity_ratio = 0.71
        assert matcher.get_indices(a, b) == [2, None, 1, None], formatted_matrix(
            matcher)

        matcher.minimum_similarity_ratio = 0.9
        assert matcher.get_indices(a, b) == [None, None, 1, None], formatted_matrix(
            matcher)


@dataclass
class Car:
    seats: int
    name: str
    features: List[str]