File: bloom.go

package info (click to toggle)
golang-github-golang-leveldb 0.0~git20161231.0.3435554-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 1,004 kB
  • sloc: cpp: 166; makefile: 11
file content (142 lines) | stat: -rw-r--r-- 3,737 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Copyright 2013 The LevelDB-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package bloom implements Bloom filters.
package bloom // import "github.com/golang/leveldb/bloom"

// Filter is an encoded set of []byte keys.
type Filter []byte

// MayContain returns whether the filter may contain given key. False positives
// are possible, where it returns true for keys not in the original set.
func (f Filter) MayContain(key []byte) bool {
	if len(f) < 2 {
		return false
	}
	k := f[len(f)-1]
	if k > 30 {
		// This is reserved for potentially new encodings for short Bloom filters.
		// Consider it a match.
		return true
	}
	nBits := uint32(8 * (len(f) - 1))
	h := hash(key)
	delta := h>>17 | h<<15
	for j := uint8(0); j < k; j++ {
		bitPos := h % nBits
		if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
			return false
		}
		h += delta
	}
	return true
}

// TODO: delete the buf argument to NewFilter, if it's always going to be nil?

// NewFilter returns a new Bloom filter that encodes a set of []byte keys with
// the given number of bits per key, approximately. The returned Filter may be
// a sub-slice of buf[:cap(buf)] if it is large enough, otherwise the Filter
// will be allocated separately.
//
// A good bitsPerKey value is 10, which yields a filter with ~ 1% false
// positive rate.
func NewFilter(buf []byte, keys [][]byte, bitsPerKey int) Filter {
	if bitsPerKey < 0 {
		bitsPerKey = 0
	}
	// 0.69 is approximately ln(2).
	k := uint32(float64(bitsPerKey) * 0.69)
	if k < 1 {
		k = 1
	}
	if k > 30 {
		k = 30
	}

	nBits := len(keys) * int(bitsPerKey)
	// For small n, we can see a very high false positive rate. Fix it
	// by enforcing a minimum bloom filter length.
	if nBits < 64 {
		nBits = 64
	}
	nBytes := (nBits + 7) / 8
	nBits = nBytes * 8

	if nBytes+1 <= cap(buf) {
		buf = buf[:nBytes+1]
		for i := range buf {
			buf[i] = 0
		}
	} else {
		buf = make([]byte, nBytes+1)
	}

	for _, key := range keys {
		h := hash(key)
		delta := h>>17 | h<<15
		for j := uint32(0); j < k; j++ {
			bitPos := h % uint32(nBits)
			buf[bitPos/8] |= 1 << (bitPos % 8)
			h += delta
		}
	}
	buf[nBytes] = uint8(k)
	return Filter(buf)
}

// hash implements a hashing algorithm similar to the Murmur hash.
func hash(b []byte) uint32 {
	const (
		seed = 0xbc9f1d34
		m    = 0xc6a4a793
	)
	h := uint32(seed) ^ uint32(len(b)*m)
	for ; len(b) >= 4; b = b[4:] {
		h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
		h *= m
		h ^= h >> 16
	}
	switch len(b) {
	case 3:
		h += uint32(b[2]) << 16
		fallthrough
	case 2:
		h += uint32(b[1]) << 8
		fallthrough
	case 1:
		h += uint32(b[0])
		h *= m
		h ^= h >> 24
	}
	return h
}

// FilterPolicy implements the db.FilterPolicy interface from the leveldb/db
// package.
//
// The integer value is the approximate number of bits used per key. A good
// value is 10, which yields a filter with ~ 1% false positive rate.
//
// It is valid to use the other API in this package (leveldb/bloom) without
// using this type or the leveldb/db package.
type FilterPolicy int

// Name implements the db.FilterPolicy interface.
func (p FilterPolicy) Name() string {
	// This string looks arbitrary, but its value is written to LevelDB .ldb
	// files, and should be this exact value to be compatible with those files
	// and with the C++ LevelDB code.
	return "leveldb.BuiltinBloomFilter2"
}

// NewFilter implements the db.FilterPolicy interface.
func (p FilterPolicy) NewFilter(keys [][]byte) []byte {
	return NewFilter(nil, keys, int(p))
}

// MayContain implements the db.FilterPolicy interface.
func (p FilterPolicy) MayContain(filter, key []byte) bool {
	return Filter(filter).MayContain(key)
}