File: persistent_string_cache.h

package info (click to toggle)
persistent-cache-cpp 1.0.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 648 kB
  • sloc: cpp: 7,754; python: 183; ansic: 91; sh: 34; makefile: 7
file content (649 lines) | stat: -rw-r--r-- 24,103 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
/*
 * Copyright (C) 2015 Canonical Ltd.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License version 3 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Authored by: Michi Henning <michi.henning@canonical.com>
 */

#pragma once

#include <core/cache_discard_policy.h>
#include <core/cache_events.h>
#include <core/optional.h>
#include <core/persistent_cache_stats.h>

namespace core
{

namespace internal
{

class PersistentStringCacheImpl;

}  // namespace internal

/**
\brief A cache of key-value pairs with persistent storage.

PersistentStringCache provides a cache of `string` key-value pairs with a backing store.
It is intended for caching arbitrary (possibly large) amounts of data,
such as might be needed by a web browser cache.

See @ref overview for a more detailed description.

### Usage example

Typical use looks something like this:

\code{.cpp}
// Open cache or create it if it does not exist.
auto c = core::PersistentStringCache::open("my_db", 1024 * 1024 * 1024, core::CacheDiscardPolicy::lru_only);

// Look for an entry. If it doesn't exist, add it.
string key = "some_key";
auto value = c->get(key);
if (value)
{
    cout << *value << endl;
}
else
{
    string v = "some value";
    c->put(key, v);
}
\endcode
*/

class PersistentStringCache
{
public:
    /**
    Convenience typedef for the return type of open().
    */
    typedef std::unique_ptr<PersistentStringCache> UPtr;

    /**
    \brief Simple pair of value and metadata.
    */
    struct Data
    {
        /**
        \brief Stores the value of an entry.
        */
        std::string value;

        /**
        \brief Stores the metadata of an entry. If no metadata exists for an entry,
        `metadata` is returned as the empty string when it is retrieved.
        */
        std::string metadata;
    };

    /** @name Copy and Assignment
    Cache instances are not copyable, but can be moved.
    \note The constructors are private. Use one of the open()
    static member functions to create or open a cache.
    */

    //{@
    PersistentStringCache(PersistentStringCache const&) = delete;
    PersistentStringCache& operator=(PersistentStringCache const&) = delete;

    PersistentStringCache(PersistentStringCache&&);
    PersistentStringCache& operator=(PersistentStringCache&&);
    //@}

    /**
    Destroys the instance.
    */
    ~PersistentStringCache();

    /** @name Creation Methods
    */

    //{@

    /**
    \brief Creates or opens a PersistentStringCache.

    If no cache exists on disk, it will be created;
    otherwise, the pre-existing cache contents are used.

    An existing cache can be opened only if `max_size_in_bytes` and `policy` have
    the same values they had when the cache was last closed.

    \param cache_path The path to a directory in which to store the cache. The contents
    of this directory are exlusively owned by the cache; do not create additional files
    or directories there. The directory need not exist when creating a new cache.
    \param max_size_in_bytes The maximum size in bytes for the cache.
    \param policy The discard policy for the cache (`lru_only` or `lru_ttl`). The discard
    policy cannot be changed once a cache has been created.

    The size of an entry is the sum of the sizes of its key, value, and metadata.
    The maximum size of the cache is the sum of the sizes of all its entries.

    \return A <code>unique_ptr</code> to the instance.
    \throws invalid_argument `max_size_in_bytes` is &lt; 1.
    \throws logic_error `max_size_in_bytes` or `policy` do not match the settings of a pre-existing cache.
    */
    static UPtr open(std::string const& cache_path, int64_t max_size_in_bytes, CacheDiscardPolicy policy);

    /**
    \brief Opens an existing PersistentStringCache.
    \param cache_path The path to a directory containing the existing cache.
    \return A <code>unique_ptr</code> to the instance.
    */
    static UPtr open(std::string const& cache_path);

    //@}

    /** @name Accessors
    */

    //{@

    /**
    \brief Returns the value of an entry in the cache, provided the entry has not expired.
    \param key The key for the entry.
    \return A null value if the entry could not be retrieved; the value of the entry, otherwise.
    \throws invalid_argument `key` is the empty string.
    \note This operation updates the access time of the entry.
    */
    Optional<std::string> get(std::string const& key) const;

    /**
    \brief Returns the data for an entry in the cache, provided the entry has not expired.
    \param key The key for the entry.
    \return A null value if the entry could not be retrieved; the data of the entry, otherwise.
    If no metadata exists, `Data::metadata` is set to the empty string.
    \throws invalid_argument `key` is the empty string.
    \note This operation updates the access time of the entry.
    */
    Optional<Data> get_data(std::string const& key) const;

    /**
    \brief Returns the metadata for an entry in the cache, provided the entry has not expired.
    \param key The key for the entry.
    \return A null value if the entry could not be retrieved; the metadata of the entry, otherwise.
    \throws invalid_argument `key` is the empty string.
    \note This operation does _not_ update the access time of the entry.
    \see touch()
    */
    Optional<std::string> get_metadata(std::string const& key) const;

    /**
    \brief Tests if an (unexpired) entry is in the cache.
    \param key The key for the entry.
    \return `true` if the entry is in the cache; `false` otherwise.
    \throws invalid_argument `key` is the empty string.
    \note This operation does _not_ update the access time of the entry.
    \see touch()
    */
    bool contains_key(std::string const& key) const;

    /**
    \brief Returns the number of entries in the cache.
    \return The total number of entries in the cache.
    \note The returned count includes possibly expired entries.
    */
    int64_t size() const noexcept;

    /**
    \brief Returns the number of bytes consumed by entries in the cache.
    \return The total number of bytes in the cache.
    \note The returned size includes possibly expired entries.
    */
    int64_t size_in_bytes() const noexcept;

    /**
    \brief Returns the maximum size of the cache in bytes.
    \return The maximum number of bytes that can be stored in the cache.
    \see resize()
    */
    int64_t max_size_in_bytes() const noexcept;

    /**
    \brief Returns an estimate of the disk space consumed by the cache.
    \return The approximate number of bytes used by the cache on disk.
    \note The returned size may be smaller than the eventual size
    if there are updates to the cache that have not yet been written to disk.
    */
    int64_t disk_size_in_bytes() const;

    /**
    \brief Returns the discard policy of the cache.
    \return The discard policy (`lru_only` or `lru_ttl`).
    */
    CacheDiscardPolicy discard_policy() const noexcept;

    /**
    \brief Returns statistics for the cache.

    The returned statistics are persistent and are restored the next
    time an existing cache is opened. Call clear_stats() to explicitly
    reset the statistics counters and time stamps to zero.
    \return An object that provides accessors to statistics and settings.
    \see clear_stats()
    */
    PersistentCacheStats stats() const;

    //@}

    /** @name Modifiers
    */

    //{@

    /**
    \brief Adds or updates an entry.

    If an entry with the given key does not exist in the cache, it is added
    (possibly evicting a number of expired and/or older entries). If the entry
    still exists (whether expired or not), it is updated with the new value
    (and possibly expiry time).

    This operation deletes any metadata associated with the entry.

    \return `true` if the entry was added or updated. `false` if the policy
    is `lru_ttl` and `expiry_time` is in the past.

    \throws invalid_argument `key` is the empty string.
    \throws logic_error The size of the entry exceeds the maximum cache size.
    \throws logic_error The cache policy is `lru_only` and a non-infinite expiry time was provided.
    */
    bool put(std::string const& key,
             std::string const& value,
             std::chrono::time_point<std::chrono::system_clock> expiry_time = std::chrono::system_clock::time_point());

    /**
    \brief Adds or updates an entry.

    \note This overload is provided to avoid the need to construct a string value.

    If an entry with the given key does not exist in the cache, it is added
    (possibly evicting a number of expired and/or older entries). If the entry
    still exists (whether expired or not), it is updated with the new value
    (and possibly expiry time).

    This operation deletes any metadata associated with the entry.

    \return `true` if the entry was added or updated. `false` if the policy
    is `lru_ttl` and `expiry_time` is in the past.

    \param key The key of the entry.
    \param value A pointer to the first byte of the value.
    \param size The size of the value in bytes.
    \param expiry_time The time at which the entry expires.

    \throws invalid_argument `key` is the empty string.
    \throws invalid_argument `value` is `nullptr`.
    \throws invalid_argument `size` is negative.
    \throws logic_error The size of the entry exceeds the maximum cache size.
    \throws logic_error The cache policy is `lru_only` and a non-infinite expiry time was provided.
    */
    bool put(std::string const& key,
             char const* value,
             int64_t size,
             std::chrono::time_point<std::chrono::system_clock> expiry_time = std::chrono::system_clock::time_point());

    /**
    \brief Adds or updates an entry and its metadata.

    If an entry with the given key does not exist in the cache, it is added
    (possibly evicting a number of expired and/or older entries). If the entry
    still exists (whether expired or not), it is updated with the new value
    and metadata (and possibly expiry time).

    \return `true` if the entry was added or updated. `false` if the policy
    is `lru_ttl` and `expiry_time` is in the past.

    \throws invalid_argument `key` is the empty string.
    \throws logic_error The sum of sizes of the entry and metadata exceeds the maximum cache size.
    \throws logic_error The cache policy is `lru_only` and a non-infinite expiry time was provided.
    */
    bool put(std::string const& key,
             std::string const& value,
             std::string const& metadata,
             std::chrono::time_point<std::chrono::system_clock> expiry_time = std::chrono::system_clock::time_point());

    /**
    \brief Adds or updates an entry and its metadata.

    \note This overload is provided to avoid the need to construct strings
    for the value and metadata.

    If an entry with the given key does not exist in the cache, it is added
    (possibly evicting a number of expired and/or older entries). If the entry
    still exists (whether expired or not), it is updated with the new value
    and metadata (and possibly expiry time).

    \return `true` if the entry was added or updated. `false` if the policy
    is `lru_ttl` and `expiry_time` is in the past.

    \param key The key of the entry.
    \param value A pointer to the first byte of the value.
    \param value_size The size of the value in bytes.
    \param metadata A pointer to the first byte of the metadata.
    \param metadata_size The size of the metadata in bytes.
    \param expiry_time The time at which the entry expires.

    \throws invalid_argument `key` is the empty string.
    \throws logic_error The sum of sizes of the entry and metadata exceeds the maximum cache size.
    \throws logic_error The cache policy is `lru_only` and a non-infinite expiry time was provided.
    */
    bool put(std::string const& key,
             char const* value,
             int64_t value_size,
             char const* metadata,
             int64_t metadata_size,
             std::chrono::time_point<std::chrono::system_clock> expiry_time = std::chrono::system_clock::time_point());

    /**
    \brief Function called by the cache to load an entry after a cache miss.
    */
    typedef std::function<void(std::string const& key, PersistentStringCache& cache)> Loader;

    /**
    \brief Atomically retrieves or stores a cache entry.

    `get_or_put` attempts to retrieve the value of a (non-expired) entry.
    If the entry can be found, it returns its value. Otherwise, it calls
    `load_func`, which is expected to add the entry to the cache. If the
    load function succeeds in adding the entry, the value added by the load
    function is returned. The load function is called by the application thread.

    \return A null value if the entry could not be retrieved or loaded; the value of the entry, otherwise.
    \throws runtime_error The load function threw an exception.

    \note The load function must (synchronously) call one of the overloaded `put` methods to add a
    new entry for the provided key. Calling any other method on the cache from within the load
    function causes undefined behavior.

    \warning This operation holds a lock on the cache while the load function runs.
    This means that, if multiple threads call into the cache, they will be blocked
    for the duration of the load function.
    */
    Optional<std::string> get_or_put(std::string const& key, Loader const& load_func);

    /**
    \brief Atomically retrieves or stores a cache entry.

    `get_or_put` attempts to retrieve the value and metadata of a (non-expired) entry.
    If the entry can be found, it returns its data. Otherwise, it calls
    `load_func`, which is expected to add the entry to the cache. If the
    load function succeeds in adding the entry, the data added by the load
    function is returned. The load function is called by the application thread.

    \return A null value if the entry could not be retrieved or loaded; the value and metadata of the entry, otherwise.
    \throws runtime_error The load function threw an exception.

    \note The load function must (synchronously) call one of the overloaded `put` methods to add a
    new entry for the provided key. Calling any other method on the cache from within the load
    function causes undefined behavior.

    \warning This operation holds a lock on the cache while the load function runs.
    This means that, if multiple threads call into the cache, they will be blocked
    for the duration of the load function.
    */
    Optional<Data> get_or_put_data(std::string const& key, Loader const& load_func);

    /**
    \brief Adds or replaces the metadata for an entry.

    If a (non-expired) entry with the given key exists in the cache, its metadata
    is set to the provided value, replacing any previous metadata.

    \return `true` if the metadata was added or updated. `false` if the
    entry could not be found or was expired.

    \throws invalid_argument `key` is the empty string.
    \throws logic_error The new size of the entry would exceed the maximum cache size.

    \note This operation does _not_ update the access time of the entry.
    \see touch()
    */
    bool put_metadata(std::string const& key, std::string const& metadata);

    /**
    \brief Adds or replaces the metadata for an entry.

    \note This overload is provided to avoid the need to construct a string
    for the metadata.

    If a (non-expired) entry with the given key exists in the cache, its metadata
    is set to the provided value, replacing any previous metadata.

    \param key The key of the entry.
    \param metadata A pointer to the first byte of the metadata.
    \param size The size of the metadata in bytes.

    \return `true` if the metadata was added or updated. `false` if the
    entry could not be found or was expired.

    \throws invalid_argument `key` is the empty string.
    \throws invalid_argument `metadata` is `nullptr`.
    \throws invalid_argument `size` is negative.
    \throws logic_error The new size of the entry would exceed the maximum cache size.

    \note This operation does _not_ update the access time of the entry.
    \see touch()
    */
    bool put_metadata(std::string const& key, char const* metadata, int64_t size);

    /**
    \brief Removes an entry and returns its value.

    If a (non-expired) entry with the given key can be found, it is removed
    from the cache and its value returned.

    \return A null value if the entry could not be found; the value of the entry, otherwise.
    \throws invalid_argument `key` is the empty string.
    */
    Optional<std::string> take(std::string const& key);

    /**
    \brief Removes an entry and returns its value and metadata.

    If a (non-expired) entry with the given key can be found, it is removed
    from the cache and its data returned.
    If no metadata exists, `Data::metadata` is set to the empty string.

    \return A null value if the entry could not be retrieved; the value and metadata of the entry, otherwise.
    \throws invalid_argument `key` is the empty string.
    */
    Optional<Data> take_data(std::string const& key);

    /**
    \brief Removes an entry and its associated metadata (if any).

    If a (non-expired) entry with the given key can be found, it is removed
    from the cache.

    \return `true` if the entry was removed; `false` if the entry could not be found or was expired.
    \throws invalid_argument `key` is the empty string.
    */
    bool invalidate(std::string const& key);

    /**
    \brief Atomically removes the specified entries from the cache.

    \param keys A vector of keys for the entries to be removed. If the vector is empty, this operation is
    a no-op. If one or more keys are empty or specify non-existent entries, they are ignored.
    */
    void invalidate(std::vector<std::string> const& keys);

    /**
    \brief Atomically removes the specified entries from the cache.

    \param begin Iterator to the first key for the entries to be removed.
    \param end Iterator to the one-beyond-the-last key for the entries to be removed.

    If the iterator range is empty, this operation is a no-op.
    If one or more keys are empty or specify non-existent entries, they are ignored.
    */
    template<typename It>
    void invalidate(It begin, It end)
    {
        std::vector<std::string> keys;
        while (begin < end)
        {
            keys.push_back(*begin++);
        }
        invalidate(keys);
    }

    /**
    \brief Atomically removes the specified entries from the cache.

    \param keys The keys for the entries to be removed. If `keys` is empty, this operation is
    a no-op. If one or more keys are empty or specify non-existent entries, they are ignored.
    */
    void invalidate(std::initializer_list<std::string> const& keys);

    /**
    \brief Deletes all entries from the cache.

    This operation completely empties the cache.
    \note Clearing the cache also resets the statistics counters.
    \see clear_stats()
    */
    void invalidate();

    /**
    \brief Updates the access time of an entry.

    If the entry specified by `key` is still in the cache (whether expired or not),
    it is marked as the most-recently used entry. If the policy is `lru_ttl`, the
    entry's expiry time is updated with the specified time (infinite expiry by default).

    \return `true` if the entry was updated; `false` if the entry could not be found or
    `expiry_time` is in the past.
    \throws invalid_argument `key` is the empty string.
    \throws logic_error `key` is the empty string.
    \throws logic_error The cache policy is `lru_only` and a non-infinite expiry time was provided.
    */
    bool touch(
        std::string const& key,
        std::chrono::time_point<std::chrono::system_clock> expiry_time = std::chrono::system_clock::time_point());

    /**
    \brief Resets all statistics counters.
    */
    void clear_stats();

    /**
    \brief Changes the maximum size of the cache.

    If `size_in_bytes` is greater or equal to max_size_in_bytes(), the cache size is set to `size_in_bytes`.

    If `size_in_bytes` is less than max_size_in_bytes(), the cache discards existing entries until
    the size falls to (or below) `size_in_bytes` and sets the cache size to the new value.

    \throws invalid_argument `size_in_bytes` is &lt; 1

    \note If the new size is less than the current size, this operation compacts the database
    to use the smallest possible amount of disk space.
    */
    void resize(int64_t size_in_bytes);

    /**
    \brief Expires entries.

    Expires entries using the cache's expiration policy until the cache size falls to or below
    `used_size_in_bytes`. If `used_size_in_bytes` is less than the current cache size, this
    operation is a no-op.

    \throws invalid_argument `used_size_in_bytes` is &lt; 0
    \throws logic_error `used_size_in_bytes` is &gt; max_size_in_bytes().
    */
    void trim_to(int64_t used_size_in_bytes);

    /**
    \brief Compacts the database.

    This operation compacts the database to consume as little disk space as possible.
    Note that this operation can be slow. (Compacting a 100 MB cache can take around
    ten seconds on a machine with a spinning-platter disk.)
    */
    void compact();

    //@}

    /** @name Monitoring cache activity

    The cache allows you to register one or more callback functions that are called when
    the cache contents change.

    \note Callback functions are called by the application thread that triggered the
    corresponding event.

    \warning Do not invoke operations on the cache from within a callback function. Doing
    so has undefined behavior.
    */

    //{@

    /**
    \brief The type of a handler function.

    \note Callback functions are called by the application thread that triggered the
    corresponding event.

    \warning Do not invoke operations on the cache from within a callback function. Doing
    so has undefined behavior.

    \param key The key of the entry.
    \param ev The event type.
    \param stats The cache statistics. Note that the `stats` parameter reflects the state
    of the cache _after_ the corresponding event. For example, for a `Put` event, `stats.size_in_bytes()`
    _includes_ the size of the added entry.
    */
    typedef std::function<void(std::string const& key, CacheEvent ev, PersistentCacheStats const& stats)> EventCallback;

    /**
    \brief Installs a handler for one or more events.

    \param events A bitwise OR of the event types for which to install the handler. To install
    a handler for all events, you can use core::AllCacheEvents.

    \param cb The handler to install. To cancel an existing handler, pass `nullptr`.

    For example, to install a handler for `get` and `put` events, you could use:

    \code{.cpp}
    auto cache = PersistentStringCache::open("my_cache");

    auto handler = [](string const& key, CacheEvent event, PersistentCacheStats const& stats)
    {
        // ...
    };
    cache->set_handler(CacheEvent::get | CacheEvent::put, handler);
    \endcode

    \see CacheEvent
    */
    void set_handler(CacheEvent events, EventCallback cb);

    //@}

private:
    // @cond
    PersistentStringCache(std::string const& cache_path, int64_t max_size_in_bytes, CacheDiscardPolicy policy);
    PersistentStringCache(std::string const& cache_path);

    std::unique_ptr<internal::PersistentStringCacheImpl> p_;
    // @endcond
};

}  // namespace core