File: test_cache.py

package info (click to toggle)
deepdiff 8.1.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,716 kB
  • sloc: python: 14,702; makefile: 164; sh: 9
file content (178 lines) | stat: -rw-r--r-- 7,811 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import pytest
from decimal import Decimal
from deepdiff import DeepDiff
from deepdiff.helper import py_current_version


class TestCache:

    @pytest.mark.slow
    def test_cache_deeply_nested_a1(self, nested_a_t1, nested_a_t2, nested_a_result, nested_a_affected_paths, benchmark):
        benchmark(self._test_cache_deeply_nested_a1, nested_a_t1, nested_a_t2, nested_a_result, nested_a_affected_paths)

    def _test_cache_deeply_nested_a1(self, nested_a_t1, nested_a_t2, nested_a_result, nested_a_affected_paths):
        diff = DeepDiff(nested_a_t1, nested_a_t2, ignore_order=True,
                        cache_size=5000, cache_tuning_sample_size=280,
                        cutoff_intersection_for_pairs=1)

        stats = diff.get_stats()
        expected_stats = {
            "PASSES COUNT": 1671,
            "DIFF COUNT": 8556,
            "DISTANCE CACHE HIT COUNT": 3445,
            "MAX PASS LIMIT REACHED": False,
            "MAX DIFF LIMIT REACHED": False,
        }
        # assert expected_stats == stats
        assert nested_a_result == diff
        diff_of_diff = DeepDiff(nested_a_result, diff.to_dict(), ignore_order=False)
        assert not diff_of_diff
        assert nested_a_affected_paths == diff.affected_paths
        assert [0, 1] == diff.affected_root_keys

    @pytest.mark.slow
    def test_cache_deeply_nested_a2(self, nested_a_t1, nested_a_t2, nested_a_result):

        diff = DeepDiff(nested_a_t1, nested_a_t2, ignore_order=True,
                        cache_size=500, cache_tuning_sample_size=500,
                        cutoff_intersection_for_pairs=1)

        # stats = diff.get_stats()
        # # Somehow just in python 3.5 the cache stats are different. Weird.
        # if py_current_version == Decimal('3.5'):
        #     expected_stats = {
        #         'PASSES COUNT': 3981,
        #         'DIFF COUNT': 19586,
        #         'DISTANCE CACHE HIT COUNT': 11925,
        #         'MAX PASS LIMIT REACHED': False,
        #         'MAX DIFF LIMIT REACHED': False
        #     }
        # else:
        #     expected_stats = {
        #         'PASSES COUNT': 3960,
        #         'DIFF COUNT': 19469,
        #         'DISTANCE CACHE HIT COUNT': 11847,
        #         'MAX PASS LIMIT REACHED': False,
        #         'MAX DIFF LIMIT REACHED': False
        #     }
        # assert expected_stats == stats
        assert nested_a_result == diff
        diff_of_diff = DeepDiff(nested_a_result, diff.to_dict(), ignore_order=False)
        assert not diff_of_diff

    def test_cache_deeply_nested_b(self, nested_b_t1, nested_b_t2, nested_b_result):

        diff = DeepDiff(nested_b_t1, nested_b_t2, ignore_order=True,
                        cache_size=5000, cache_tuning_sample_size=0,
                        cutoff_intersection_for_pairs=1)

        stats = diff.get_stats()
        expected_stats = {
            'PASSES COUNT': 110,
            'DIFF COUNT': 306,
            'DISTANCE CACHE HIT COUNT': 0,
            'MAX PASS LIMIT REACHED': False,
            'MAX DIFF LIMIT REACHED': False
        }
        stats_diff = DeepDiff(expected_stats, stats, use_log_scale=True, log_scale_similarity_threshold=0.15)
        assert not stats_diff
        assert nested_b_result == diff

        diff_of_diff = DeepDiff(nested_b_result, diff.to_dict(), ignore_order=False)
        assert not diff_of_diff

    def test_cache_1D_array_of_numbers_that_do_not_overlap(self):
        ignore_order = True
        cache_size = 5000
        max_diffs = 30000
        max_passes = 40000

        t1 = list(range(1, 30))

        t2 = list(range(100, 120))

        diff = DeepDiff(t1, t2, ignore_order=ignore_order, max_passes=max_passes,
                        max_diffs=max_diffs, cache_size=cache_size, cache_tuning_sample_size=100)

        stats = diff.get_stats()
        # Since there was no overlap between the 2 arrays, even though ignore_order=True,
        # the algorithm switched to comparing by order.
        expected_stats = {
            'PASSES COUNT': 0,
            'DIFF COUNT': 50,
            'DISTANCE CACHE HIT COUNT': 0,
            'MAX PASS LIMIT REACHED': False,
            'MAX DIFF LIMIT REACHED': False
        }
        assert expected_stats == stats

        expected = {'values_changed': {'root[0]': {'new_value': 100, 'old_value': 1}, 'root[1]': {'new_value': 101, 'old_value': 2}, 'root[2]': {'new_value': 102, 'old_value': 3}, 'root[3]': {'new_value': 103, 'old_value': 4}, 'root[4]': {'new_value': 104, 'old_value': 5}, 'root[5]': {'new_value': 105, 'old_value': 6}, 'root[6]': {'new_value': 106, 'old_value': 7}, 'root[7]': {'new_value': 107, 'old_value': 8}, 'root[8]': {'new_value': 108, 'old_value': 9}, 'root[9]': {'new_value': 109, 'old_value': 10}, 'root[10]': {'new_value': 110, 'old_value': 11}, 'root[11]': {'new_value': 111, 'old_value': 12}, 'root[12]': {'new_value': 112, 'old_value': 13}, 'root[13]': {'new_value': 113, 'old_value': 14}, 'root[14]': {'new_value': 114, 'old_value': 15}, 'root[15]': {'new_value': 115, 'old_value': 16}, 'root[16]': {'new_value': 116, 'old_value': 17}, 'root[17]': {'new_value': 117, 'old_value': 18}, 'root[18]': {'new_value': 118, 'old_value': 19}, 'root[19]': {'new_value': 119, 'old_value': 20}}, 'iterable_item_removed': {'root[20]': 21, 'root[21]': 22, 'root[22]': 23, 'root[23]': 24, 'root[24]': 25, 'root[25]': 26, 'root[26]': 27, 'root[27]': 28, 'root[28]': 29}}
        assert expected == diff

    def test_cache_1D_array_of_numbers_that_overlap(self):
        ignore_order = True
        cache_size = 5000
        max_diffs = 30000
        max_passes = 40000

        t1 = list(range(1, 30))

        t2 = list(range(13, 33))

        diff = DeepDiff(t1, t2, ignore_order=ignore_order, max_passes=max_passes,
                        max_diffs=max_diffs, cache_size=cache_size, cache_tuning_sample_size=100)

        stats = diff.get_stats()
        # In this case an optimization happened to compare numbers. Behind the scene we converted the python lists
        # into Numpy arrays and cached the distances between numbers.
        # So the distance cache was heavily used and stayed ON but the level cache was turned off due to low cache hit

        expected_stats = {
            'PASSES COUNT': 1,
            'DIFF COUNT': 16,
            'DISTANCE CACHE HIT COUNT': 0,
            'MAX PASS LIMIT REACHED': False,
            'MAX DIFF LIMIT REACHED': False
        }
        assert expected_stats == stats

        expected = {
            'values_changed': {
                'root[11]': {
                    'new_value': 30,
                    'old_value': 12
                },
                'root[10]': {
                    'new_value': 31,
                    'old_value': 11
                },
                'root[9]': {
                    'new_value': 32,
                    'old_value': 10
                }
            },
            'iterable_item_removed': {
                'root[0]': 1,
                'root[1]': 2,
                'root[2]': 3,
                'root[3]': 4,
                'root[4]': 5,
                'root[5]': 6,
                'root[6]': 7,
                'root[7]': 8,
                'root[8]': 9
            }
        }
        assert expected == diff

    def test_cache_does_not_affect_final_results(self):
        t1 = [[['b'], 8, 1, 2, 3, 4]]
        t2 = [[['a'], 5, 1, 2, 3, 4]]

        diff1 = DeepDiff(t1, t2, ignore_order=True, cache_size=0,
                         cache_tuning_sample_size=0, cutoff_intersection_for_pairs=1)
        diff2 = DeepDiff(t1, t2, ignore_order=True, cache_size=5000,
                         cache_tuning_sample_size=0, cutoff_intersection_for_pairs=1)
        print(diff1.get_stats())
        print(diff1)
        assert diff1 == diff2