File: test_itemrepository.cpp

package info (click to toggle)
kdevelop 4%3A22.12.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 70,096 kB
  • sloc: cpp: 284,635; javascript: 3,558; python: 3,422; sh: 1,319; ansic: 685; xml: 331; php: 95; lisp: 66; makefile: 39; sed: 12
file content (387 lines) | stat: -rw-r--r-- 15,263 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
#include "itemrepositorytestbase.h"

#include <QTest>
#include <serialization/itemrepository.h>
#include <serialization/indexedstring.h>
#include <cstdlib>
#include <ctime>

using namespace KDevelop;

struct TestItem
{
    TestItem(uint hash, uint dataSize) : m_hash(hash)
        , m_dataSize(dataSize)
    {
    }

    TestItem& operator=(const TestItem& rhs) = delete;

    //Every item has to implement this function, and return a valid hash.
    //Must be exactly the same hash value as ExampleItemRequest::hash() has returned while creating the item.
    unsigned int hash() const
    {
        return m_hash;
    }

    //Every item has to implement this function, and return the complete size this item takes in memory.
    //Must be exactly the same value as ExampleItemRequest::itemSize() has returned while creating the item.
    unsigned int itemSize() const
    {
        return sizeof(TestItem) + m_dataSize;
    }

    bool equals(const TestItem* rhs) const
    {
        return rhs->m_hash == m_hash
               && itemSize() == rhs->itemSize()
               && memcmp(reinterpret_cast<const char*>(this), rhs, itemSize()) == 0;
    }

    uint m_hash;
    uint m_dataSize;
};

struct TestItemRequest
{
    TestItem& m_item;
    bool m_compareData;

    TestItemRequest(TestItem& item, bool compareData = false)
        : m_item(item)
        , m_compareData(compareData)
    {
    }
    enum {
        AverageSize = 700 //This should be the approximate average size of an Item
    };

    uint hash() const
    {
        return m_item.hash();
    }

    //Should return the size of an item created with createItem
    uint itemSize() const
    {
        return m_item.itemSize();
    }

    void createItem(TestItem* item) const
    {
        memcpy(reinterpret_cast<void*>(item), &m_item, m_item.itemSize());
    }

    static void destroy(TestItem* /*item*/, AbstractItemRepository&)
    {
        //Nothing to do
    }

    static bool persistent(const TestItem* /*item*/)
    {
        return true;
    }

    //Should return whether the here requested item equals the given item
    bool equals(const TestItem* item) const
    {
        return hash() == item->hash() && (!m_compareData || m_item.equals(item));
    }
};

uint smallItemsFraction = 20; //Fraction of items betwen 0 and 1 kb
uint largeItemsFraction = 1; //Fraction of items between 0 and 200 kb
uint cycles = 10000;
uint deletionProbability = 1; //Percentual probability that a checked item is deleted. Per-cycle probability must be multiplied with checksPerCycle.
uint checksPerCycle = 10;

TestItem* createItem(uint id, uint size)
{
    TestItem* ret;
    char* data = new char[size];
    uint dataSize = size - sizeof(TestItem);
    ret = new (data) TestItem(id, dataSize);

    //Fill in same random pattern
    for (uint a = 0; a < dataSize; ++a)
        data[sizeof(TestItem) + a] = ( char )(a + id);

    return ret;
}

///@todo Add a test where the complete content is deleted again, and make sure the result has a nice structure
///@todo More consistency and lost-space tests, especially about monster-buckets. Make sure their space is re-claimed
class TestItemRepository
    : public ItemRepositoryTestBase
{
    Q_OBJECT

private Q_SLOTS:
    void testItemRepository()
    {
        QMutex mutex;
        ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex);
        uint itemId = 0;
        QHash<uint, TestItem*> realItemsByIndex;
        QHash<uint, TestItem*> realItemsById;
        uint totalInsertions = 0, totalDeletions = 0;
        uint maxSize = 0;
        uint totalSize = 0;
        srand(time(nullptr));
        uint highestSeenIndex = 0;

        for (uint a = 0; a < cycles; ++a) {
            {
                //Insert an item
                uint itemDecision = rand() % (smallItemsFraction + largeItemsFraction);
                uint itemSize;
                if (itemDecision < largeItemsFraction) {
                    //Create a large item: Up to 200kb
                    itemSize = (rand() % 200000) + sizeof(TestItem);
                } else
                    itemSize = (rand() % 1000) + sizeof(TestItem);
                TestItem* item = createItem(++itemId, itemSize);
                Q_ASSERT(item->hash() == itemId);
                QVERIFY(item->equals(item));
                uint index = repository.index(TestItemRequest(*item));
                if (index > highestSeenIndex)
                    highestSeenIndex = index;
                Q_ASSERT(index);
                realItemsByIndex.insert(index, item);
                realItemsById.insert(itemId, item);
                ++totalInsertions;
                totalSize += itemSize;
                if (itemSize > maxSize)
                    maxSize = itemSize;
            }

            for (uint a = 0; a < checksPerCycle; ++a) {
                //Check an item
                uint pick = rand() % itemId;
                if (realItemsById.contains(pick)) {
                    uint index = repository.findIndex(*realItemsById[pick]);
                    QVERIFY(index);
                    QVERIFY(realItemsByIndex.contains(index));
                    QVERIFY(realItemsByIndex[index]->equals(repository.itemFromIndex(index)));

                    if (( uint ) (rand() % 100) < deletionProbability) {
                        ++totalDeletions;
                        //Delete the item
                        repository.deleteItem(index);
                        QVERIFY(!repository.findIndex(*realItemsById[pick]));
                        uint newIndex = repository.index(*realItemsById[pick]);
                        QVERIFY(newIndex);
                        QVERIFY(realItemsByIndex[index]->equals(repository.itemFromIndex(newIndex)));

#ifdef POSITION_TEST
                        //Since we have previously deleted the item, there must be enough space
                        if (!((newIndex >> 16) <= (highestSeenIndex >> 16))) {
                            qDebug() << "size:" << realItemsById[pick]->itemSize();
                            qDebug() << "previous highest seen bucket:" << (highestSeenIndex >> 16);
                            qDebug() << "new bucket:" << (newIndex >> 16);
                        }
                        QVERIFY((newIndex >> 16) <= (highestSeenIndex >> 16));
#endif
                        repository.deleteItem(newIndex);
                        QVERIFY(!repository.findIndex(*realItemsById[pick]));
                        delete[] realItemsById[pick];
                        realItemsById.remove(pick);
                        realItemsByIndex.remove(index);
                    }
                }
            }
        }

        // cleanup
        {
            for (auto it = realItemsByIndex.constBegin(); it != realItemsByIndex.constEnd(); ++it) {
                repository.deleteItem(it.key());
                delete[] it.value();
            }

            realItemsById.clear();
            realItemsByIndex.clear();
        }

        qDebug() << "total insertions:" << totalInsertions << "total deletions:" << totalDeletions <<
        "average item size:" << (totalSize / totalInsertions) << "biggest item size:" << maxSize;

        const auto stats = repository.statistics();
        qDebug() << stats;
        QVERIFY(stats.freeUnreachableSpace < stats.freeSpaceInBuckets / 100); // < 1% of the free space is unreachable
        QVERIFY(stats.freeSpaceInBuckets < stats.usedSpaceForBuckets); // < 20% free space
    }
    void testLeaks()
    {
        QMutex mutex;
        ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex);
        QList<TestItem*> items;
        for (int i = 0; i < 10000; ++i) {
            TestItem* item = createItem(i, (rand() % 1000) + sizeof(TestItem));
            items << item;
            repository.index(TestItemRequest(*item));
        }

        for (auto item : qAsConst(items)) {
            delete[] item;
        }
    }
    void testStringSharing()
    {
        QString qString;
        qString.fill('.', 1000);
        IndexedString indexedString(qString);
        const int repeat = 10000;
        QVector<QString> strings;
        strings.resize(repeat);
        for (int i = 0; i < repeat; ++i) {
            strings[i] = indexedString.str();
            QCOMPARE(qString, strings[i]);
        }
    }
    void deleteClashingMonsterBucket()
    {
        QMutex mutex;
        ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex);
        const uint hash = 1235;

        QScopedArrayPointer<TestItem> monsterItem(createItem(hash, ItemRepositoryBucketSize + 10));
        QScopedArrayPointer<TestItem> smallItem(createItem(hash, 20));
        QVERIFY(!monsterItem->equals(smallItem.data()));

        uint smallIndex = repository.index(TestItemRequest(*smallItem, true));
        uint monsterIndex = repository.index(TestItemRequest(*monsterItem, true));
        QVERIFY(monsterIndex != smallIndex);

        repository.deleteItem(smallIndex);
        QVERIFY(!repository.findIndex(TestItemRequest(*smallItem, true)));
        QCOMPARE(monsterIndex, repository.findIndex(TestItemRequest(*monsterItem, true)));
        repository.deleteItem(monsterIndex);

        // now in reverse order, with different data see: https://bugs.kde.org/show_bug.cgi?id=272408

        monsterItem.reset(createItem(hash + 1, ItemRepositoryBucketSize + 10));
        smallItem.reset(createItem(hash + 1, 20));
        QVERIFY(!monsterItem->equals(smallItem.data()));
        monsterIndex = repository.index(TestItemRequest(*monsterItem, true));
        smallIndex = repository.index(TestItemRequest(*smallItem, true));

        repository.deleteItem(monsterIndex);
        QCOMPARE(smallIndex, repository.findIndex(TestItemRequest(*smallItem, true)));
        QVERIFY(!repository.findIndex(TestItemRequest(*monsterItem, true)));
        repository.deleteItem(smallIndex);
    }
    void usePermissiveModuloWhenRemovingClashLinks()
    {
        QMutex mutex;
        ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("PermissiveModulo"), &mutex);

        const uint bucketHashSize = decltype(repository)::bucketHashSize;
        const uint nextBucketHashSize = decltype(repository)::MyBucket::NextBucketHashSize;
        auto bucketNumberForIndex = [](const uint index) {
                                        return index >> 16;
                                    };

        const uint clashValue = 2;

        // Choose sizes that ensure that the items fit in the desired buckets
        const uint bigItemSize = ItemRepositoryBucketSize * 0.55 - 1;
        const uint smallItemSize = ItemRepositoryBucketSize * 0.25 - 1;

        // Will get placed in bucket 1 (bucket zero is invalid), so the root bucket table at position 'clashValue' will be '1'
        const QScopedArrayPointer<TestItem> firstChainFirstLink(createItem(clashValue, bigItemSize));
        const uint firstChainFirstLinkIndex = repository.index(*firstChainFirstLink);
        QCOMPARE(bucketNumberForIndex(firstChainFirstLinkIndex), 1u);

        // Will also get placed in bucket 1, so root bucket table at position 'nextBucketHashSize + clashValue' will be '1'
        const QScopedArrayPointer<TestItem> secondChainFirstLink(createItem(nextBucketHashSize + clashValue,
                                                                            smallItemSize));
        const uint secondChainFirstLinkIndex = repository.index(*secondChainFirstLink);
        QCOMPARE(bucketNumberForIndex(secondChainFirstLinkIndex), 1u);

        // Will get placed in bucket 2, so bucket 1's next hash table at position 'clashValue' will be '2'
        const QScopedArrayPointer<TestItem> firstChainSecondLink(createItem(bucketHashSize + clashValue, bigItemSize));
        const uint firstChainSecondLinkIndex = repository.index(*firstChainSecondLink);
        QCOMPARE(bucketNumberForIndex(firstChainSecondLinkIndex), 2u);

        // Will also get placed in bucket 2, reachable since bucket 1's next hash table at position 'clashValue' is '2'
        const QScopedArrayPointer<TestItem> secondChainSecondLink(createItem(
                bucketHashSize + nextBucketHashSize + clashValue, smallItemSize));
        const uint secondChainSecondLinkIndex = repository.index(*secondChainSecondLink);
        QCOMPARE(bucketNumberForIndex(secondChainSecondLinkIndex), 2u);

        /*
         * At this point we have two chains in the repository, rooted at 'clashValue' and 'nextBucketHashSize + clashValue'
         * Both of the chains start in bucket 1 and end in bucket 2, but both chains share the same link to bucket 2
         * This is because two of the hashes clash the other two when % bucketHashSize, but all of them clash % nextBucketHashSize
         */

        repository.deleteItem(firstChainSecondLinkIndex);

        /*
         * Now we've deleted the second item in the first chain, this means the first chain no longer requires a link to the
         * second bucket where that item was... but the link must remain, since it's shared (clashed) by the second chain.
         *
         * When cutting a link out of the middle of the chain, we need to check if its items clash using the "permissive"
         * modulus (the size of the /next/ buckets map), which is always a factor of the "stricter" modulus (the size of the
         * /root/ buckets map).
         *
         * This behavior implies that there will sometimes be useless buckets in the bucket chain for a given hash, so when
         * cutting out the root link, it's safe to skip over them to the first clash with the 'stricter' modulus.
         */

        // The second item of the second chain must still be reachable
        QCOMPARE(repository.findIndex(*secondChainSecondLink), secondChainSecondLinkIndex);

        /*
         * As a memo to anyone who's still reading this, this also means the following situation can exist:
         *
         * bucketHashSize == 8
         * nextBucketHashSize == 4
         * U is a link table
         * B is a bucket
         * [...] are the hashes of the contained items
         *
         * U
         * U
         * U -> B1
         * U
         * U
         * U
         * U -> B2
         * U
         *
         * B0 (Invalid)
         * B1 -> [2, 6]
         *   U
         *   U
         *   U -> B3
         *   U
         * B2 -> [14]
         *   U
         *   U
         *   U -> B1
         *   U
         * B3 -> [10]
         *   U
         *   U
         *   U
         *   U
         *
         * The chain for hash 6 is:
         * Root[6] -> B2[2] -> B1[2] -> B3
         *
         * If you remove the item with hash 6, 6 and 2 will clash with mod 4 (permissive)
         *
         * So the useless link `B2[2] -> B1` will be preserved, even though its useless
         * as the item with hash 2 is reachable directly from the root.
         *
         * So TODO: Don't preserve links to items accessible from root buckets. This cannot
         * be done correctly using only Bucket::hasClashingItem as of now.
         */
    }
};

#include "test_itemrepository.moc"

QTEST_MAIN(TestItemRepository)