File: CountingLock.h

package info (click to toggle)
webkit2gtk 2.48.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 429,620 kB
  • sloc: cpp: 3,696,936; javascript: 194,444; ansic: 169,997; python: 46,499; asm: 19,276; ruby: 18,528; perl: 16,602; xml: 4,650; yacc: 2,360; sh: 2,098; java: 1,993; lex: 1,327; pascal: 366; makefile: 298
file content (269 lines) | stat: -rw-r--r-- 10,320 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
/*
 * Copyright (C) 2017 Apple Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */

#pragma once

#include <wtf/DataLog.h>
#include <wtf/LockAlgorithm.h>

namespace WTF {

// This is mostly just a word-sized WTF::Lock. It supports basically everything that lock supports. But as
// a bonus, it atomically counts lock() calls and allows you to perform an optimistic read transaction by
// comparing the count before and after the transaction. If at the start of the transaction the lock is
// not held and the count remains the same throughout the transaction, then you know that nobody could
// have modified your data structure while you ran. You can even use this to optimistically read pointers
// that could become dangling under concurrent writes, if you just revalidate the count every time you're
// about to do something dangerous.
//
// This is largely inspired by StampedLock from Java:
// https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/locks/CountingLock.html
//
// This is simplified a lot compared to StampedLock. Unlike StampedLock, it uses an exclusive lock as a
// fallback. There is no way to acquire a CountingLock for read. The only read access is via optimistic
// read transactions.
//
// CountingLock provides two ways of doing optimistic reads:
//
// - The easy way, where CountingLock does all of the fencing for you. That fencing is free on x86 but
//   somewhat expensive on ARM.
// - The hard way, where you do fencing yourself using Dependency. This allows you to be fenceless on both
//   x86 and ARM.
//
// The latter is important for us because some GC paths are known to be sensitive to fences on ARM.

class CountingLock final {
    WTF_MAKE_NONCOPYABLE(CountingLock);
    WTF_MAKE_FAST_ALLOCATED;

    typedef unsigned LockType;
    
    static constexpr LockType isHeldBit = 1;
    static constexpr LockType hasParkedBit = 2;
    static constexpr LockType mask = isHeldBit | hasParkedBit;
    static constexpr LockType shift = 2;
    static constexpr LockType countUnit = 4;
    
    struct LockHooks {
        static LockType lockHook(LockType value)
        {
            return value + countUnit;
        }
        
        static LockType unlockHook(LockType value) { return value; }
        static LockType parkHook(LockType value) { return value; }
        static LockType handoffHook(LockType value) { return value; }
    };
    
    typedef LockAlgorithm<LockType, isHeldBit, hasParkedBit, LockHooks> ExclusiveAlgorithm;
    
public:
    CountingLock() = default;
    
    bool tryLock()
    {
        return ExclusiveAlgorithm::tryLock(m_word);
    }

    void lock()
    {
        if (UNLIKELY(!ExclusiveAlgorithm::lockFast(m_word)))
            lockSlow();
    }
    
    void unlock()
    {
        if (UNLIKELY(!ExclusiveAlgorithm::unlockFast(m_word)))
            unlockSlow();
    }
    
    bool isHeld() const
    {
        return ExclusiveAlgorithm::isLocked(m_word);
    }
    
    bool isLocked() const
    {
        return isHeld();
    }
    
    // The only thing you're allowed to infer from this value is that if it's zero, then you didn't get
    // a real count.
    class Count {
    public:
        explicit operator bool() const { return !!m_value; }
        
        friend bool operator==(const Count&, const Count&) = default;
        
    private:
        friend class CountingLock;
        
        LockType m_value { 0 };
    };
    
    // Example of how to use this:
    //
    //     int read()
    //     {
    //         if (CountingLock::Count count = m_lock.tryOptimisticRead()) {
    //             int value = m_things;
    //             if (m_lock.validate(count))
    //                 return value; // success!
    //         }
    //         Locker locker { m_lock };
    //         int value = m_things;
    //         return value;
    //     }
    //
    // If tryOptimisitcRead() runs when the lock is not held, this thread will run a critical section
    // without ever writing to memory. However, on ARM, this requires fencing. We use a load-acquire for
    // tryOptimisticRead(). We have no choice but to use the more expensive `dmb ish` in validate(). If
    // you want to avoid that, you could try to use tryOptimisticFencelessRead().
    Count tryOptimisticRead()
    {
        LockType currentValue = m_word.load();
        // FIXME: We could eliminate this check, if we think it's OK to proceed with the optimistic read
        // path even after knowing that it must fail. That's probably good for perf since we expect
        // failure to be super rare. We would get rid of this check and instead of calling getCount below,
        // we would return currentValue ^ mask. If the lock state was empty to begin with, the result
        // would be a properly blessed count (both low bits set). If the lock state was anything else, we
        // would get an improperly blessed count that would not possibly succeed in validate. We could
        // actually do something like "return (currentValue | hasParkedBit) ^ isHeldBit", which would mean
        // that we allow parked-but-not-held-locks through.
        // https://bugs.webkit.org/show_bug.cgi?id=180394
        if (currentValue & isHeldBit)
            return Count();
        return getCount(currentValue);
    }
    
    bool validate(Count count)
    {
        WTF::loadLoadFence();
        LockType currentValue = m_word.loadRelaxed();
        return getCount(currentValue) == count;
    }
    
    // Example of how to use this:
    //
    //     int read()
    //     {
    //         return m_lock.doOptimizedRead(
    //             [&] () -> int {
    //                 int value = m_things;
    //                 return value;
    //             });
    //     }
    template<typename Func>
    auto doOptimizedRead(const Func& func)
    {
        Count count = tryOptimisticRead();
        if (count) {
            auto result = func();
            if (validate(count))
                return result;
        }
        lock();
        auto result = func();
        unlock();
        return result;
    }
    
    // Example of how to use this:
    //
    //     int read()
    //     {
    //         auto result = m_lock.tryOptimisticFencelessRead();
    //         if (CountingLock::Count count = result.value) {
    //             Dependency fenceBefore = Dependency::fence(result.input);
    //             auto* fencedThis = fenceBefore.consume(this);
    //             int value = fencedThis->m_things;
    //             if (m_lock.fencelessValidate(count, Dependency::fence(value)))
    //                 return value; // success!
    //         }
    //         Locker locker { m_lock };
    //         int value = m_things;
    //         return value;
    //     }
    //
    // Use this to create a read transaction using dependency chains only. You have to be careful to
    // thread the dependency input (the `input` field that the returns) through a Dependency, and then
    // thread that Dependency into every load (except for loads that are chasing pointers loaded from
    // loads that already uses that dependency). Then, to validate the read transaction, you have to pass
    // both the count and another Dependency that is based on whatever loads you used to produce the
    // output.
    //
    // On non-ARM platforms, the Dependency objects don't do anything except for Dependency::fence, which
    // is a load-load fence. The idiom above does the right thing on both ARM and TSO.
    //
    // WARNING: This can be hard to get right. Please only use this for very short critical sections that
    // are known to be sufficiently perf-critical to justify the risk.
    InputAndValue<LockType, Count> tryOptimisticFencelessRead()
    {
        LockType currentValue = m_word.loadRelaxed();
        if (currentValue & isHeldBit)
            return inputAndValue(currentValue, Count());
        return inputAndValue(currentValue, getCount(currentValue));
    }
    
    bool fencelessValidate(Count count, Dependency dependency)
    {
        LockType currentValue = dependency.consume(this)->m_word.loadRelaxed();
        return getCount(currentValue) == count;
    }
    
    template<typename OptimisticFunc, typename Func>
    auto doOptimizedFencelessRead(const OptimisticFunc& optimisticFunc, const Func& func)
    {
        auto count = tryOptimisticFencelessRead();
        if (count.value) {
            Dependency dependency = Dependency::fence(count.input);
            auto result = optimisticFunc(dependency, count.value);
            if (fencelessValidate(count.value, dependency))
                return result;
        }
        lock();
        auto result = func();
        unlock();
        return result;
    }
    
private:
    WTF_EXPORT_PRIVATE void lockSlow();
    WTF_EXPORT_PRIVATE void unlockSlow();
    
    Count getCount(LockType value)
    {
        Count result;
        result.m_value = value | mask;
        return result;
    }
    
    Atomic<LockType> m_word { 0 };
};

} // namespace WTF

using WTF::CountingLock;