File: StringHasher.h

package info (click to toggle)
webkit2gtk 2.48.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 429,620 kB
  • sloc: cpp: 3,696,936; javascript: 194,444; ansic: 169,997; python: 46,499; asm: 19,276; ruby: 18,528; perl: 16,602; xml: 4,650; yacc: 2,360; sh: 2,098; java: 1,993; lex: 1,327; pascal: 366; makefile: 298
file content (129 lines) | stat: -rw-r--r-- 4,572 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
 * Copyright (C) 2005-2023 Apple Inc. All rights reserved.
 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 */

#pragma once

#include <array>
#include <unicode/utypes.h>
#include <wtf/FastMalloc.h>
#include <wtf/text/LChar.h>

namespace WTF {

// Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero.
static constexpr unsigned stringHashingStartValue = 0x9E3779B9U;

class SuperFastHash;
class WYHash;

class StringHasher {
    WTF_MAKE_FAST_ALLOCATED;
public:
    static constexpr unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags.
    static constexpr unsigned maskHash = (1U << (sizeof(unsigned) * 8 - flagCount)) - 1;
    static constexpr unsigned numberOfCharactersInLargestBulkForWYHash = 24; // Don't change this value. It's fixed for WYhash algorithm.

    // Things need to do to update this threshold:
    // 1. This threshold must stay in sync with the threshold in the scripts create_hash_table, Hasher.pm, and hasher.py.
    // 2. Run script `run-bindings-tests --reset-results` to update all CompactHashIndex's under path `WebCore/bindings/scripts/test/JS/`.
    // 3. Manually update all CompactHashIndex's in JSDollarVM.cpp by using createHashTable in hasher.py.
    static constexpr unsigned smallStringThreshold = numberOfCharactersInLargestBulkForWYHash * 2;

    struct DefaultConverter {
        template<typename CharType>
        static constexpr UChar convert(CharType character)
        {
            return static_cast<std::make_unsigned_t<CharType>>((character));
        }
    };

    StringHasher() = default;

    template<typename T, typename Converter = DefaultConverter>
    static unsigned computeHashAndMaskTop8Bits(std::span<const T> data);

    template<typename T, unsigned characterCount>
    static constexpr unsigned computeLiteralHashAndMaskTop8Bits(const T (&characters)[characterCount]);

    void addCharacter(UChar character);

    // hashWithTop8BitsMasked will reset to initial status.
    unsigned hashWithTop8BitsMasked();

private:
    friend class SuperFastHash;
    friend class WYHash;

    ALWAYS_INLINE static constexpr unsigned avalancheBits(unsigned hash)
    {
        unsigned result = hash;

        result ^= result << 3;
        result += result >> 5;
        result ^= result << 2;
        result += result >> 15;
        result ^= result << 10;

        return result;
    }

    ALWAYS_INLINE static constexpr unsigned finalize(unsigned hash)
    {
        return avoidZero(avalancheBits(hash));
    }

    ALWAYS_INLINE static constexpr unsigned finalizeAndMaskTop8Bits(unsigned hash)
    {
        // Reserving space from the high bits for flags preserves most of the hash's
        // value, since hash lookup typically masks out the high bits anyway.
        return avoidZero(avalancheBits(hash) & StringHasher::maskHash);
    }

    // This avoids ever returning a hash code of 0, since that is used to
    // signal "hash not computed yet". Setting the high bit maintains
    // reasonable fidelity to a hash code of 0 because it is likely to yield
    // exactly 0 when hash lookup masks out the high bits.
    ALWAYS_INLINE static constexpr unsigned avoidZero(unsigned hash)
    {
        if (hash)
            return hash;
        return 0x80000000 >> flagCount;
    }

#if ENABLE(WYHASH_STRING_HASHER)
    bool m_pendingHashValue { false };
    unsigned m_numberOfProcessedCharacters { 0 };
    uint64_t m_seed { 0 };
    uint64_t m_see1 { 0 };
    uint64_t m_see2 { 0 };

    unsigned m_bufferSize { 0 };
    std::array<UChar, smallStringThreshold> m_buffer;
#else
    unsigned m_hash { stringHashingStartValue };
    UChar m_pendingCharacter { 0 };
    bool m_hasPendingCharacter { false };
#endif
};

} // namespace WTF

using WTF::StringHasher;