File: StringHasher.h

package info (click to toggle)
webkit2gtk 2.51.1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 455,340 kB
  • sloc: cpp: 3,865,253; javascript: 197,710; ansic: 165,177; python: 49,241; asm: 21,868; ruby: 18,095; perl: 16,926; xml: 4,623; sh: 2,409; yacc: 2,356; java: 2,019; lex: 1,330; pascal: 372; makefile: 210
file content (130 lines) | stat: -rw-r--r-- 4,616 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
 * Copyright (C) 2005-2023 Apple Inc. All rights reserved.
 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 */

#pragma once

#include <array>
#include <unicode/utypes.h>
#include <wtf/FastMalloc.h>
#include <wtf/StdLibExtras.h>
#include <wtf/text/Latin1Character.h>

namespace WTF {

// Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero.
static constexpr unsigned stringHashingStartValue = 0x9E3779B9U;

class SuperFastHash;
class WYHash;

class StringHasher {
    WTF_DEPRECATED_MAKE_FAST_ALLOCATED(StringHasher);
public:
    static constexpr unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags.
    static constexpr unsigned maskHash = (1U << (sizeof(unsigned) * 8 - flagCount)) - 1;
    static constexpr unsigned numberOfCharactersInLargestBulkForWYHash = 24; // Don't change this value. It's fixed for WYhash algorithm.

    // Things need to do to update this threshold:
    // 1. This threshold must stay in sync with the threshold in the scripts create_hash_table, Hasher.pm, and hasher.py.
    // 2. Run script `run-bindings-tests --reset-results` to update all CompactHashIndex's under path `WebCore/bindings/scripts/test/JS/`.
    // 3. Manually update all CompactHashIndex's in JSDollarVM.cpp by using createHashTable in hasher.py.
    static constexpr unsigned smallStringThreshold = numberOfCharactersInLargestBulkForWYHash * 2;

    struct DefaultConverter {
        template<typename CharType>
        static constexpr char16_t convert(CharType character)
        {
            return unsignedCast(character);
        }
    };

    StringHasher() = default;

    template<typename T, typename Converter = DefaultConverter>
    static unsigned computeHashAndMaskTop8Bits(std::span<const T> data);

    template<typename T, unsigned characterCount>
    static constexpr unsigned computeLiteralHashAndMaskTop8Bits(const T (&characters)[characterCount]);

    void addCharacter(char16_t character);

    // hashWithTop8BitsMasked will reset to initial status.
    unsigned hashWithTop8BitsMasked();

private:
    friend class SuperFastHash;
    friend class WYHash;

    ALWAYS_INLINE static constexpr unsigned avalancheBits(unsigned hash)
    {
        unsigned result = hash;

        result ^= result << 3;
        result += result >> 5;
        result ^= result << 2;
        result += result >> 15;
        result ^= result << 10;

        return result;
    }

    ALWAYS_INLINE static constexpr unsigned finalize(unsigned hash)
    {
        return avoidZero(avalancheBits(hash));
    }

    ALWAYS_INLINE static constexpr unsigned finalizeAndMaskTop8Bits(unsigned hash)
    {
        // Reserving space from the high bits for flags preserves most of the hash's
        // value, since hash lookup typically masks out the high bits anyway.
        return avoidZero(avalancheBits(hash) & StringHasher::maskHash);
    }

    // This avoids ever returning a hash code of 0, since that is used to
    // signal "hash not computed yet". Setting the high bit maintains
    // reasonable fidelity to a hash code of 0 because it is likely to yield
    // exactly 0 when hash lookup masks out the high bits.
    ALWAYS_INLINE static constexpr unsigned avoidZero(unsigned hash)
    {
        if (hash)
            return hash;
        return 0x80000000 >> flagCount;
    }

#if ENABLE(WYHASH_STRING_HASHER)
    bool m_pendingHashValue { false };
    unsigned m_numberOfProcessedCharacters { 0 };
    uint64_t m_seed { 0 };
    uint64_t m_see1 { 0 };
    uint64_t m_see2 { 0 };

    unsigned m_bufferSize { 0 };
    std::array<char16_t, smallStringThreshold> m_buffer;
#else
    unsigned m_hash { stringHashingStartValue };
    char16_t m_pendingCharacter { 0 };
    bool m_hasPendingCharacter { false };
#endif
};

} // namespace WTF

using WTF::StringHasher;