1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
// Copyright © 2020 Martin Holst Swende, continued on the work of Barry Allard
package v2
import (
"errors"
"hash"
"sync"
)
var (
errHashMismatch = errors.New("hash mismatch, bloom filter corruption or wrong version")
)
// Filter is an opaque Bloom filter type
type Filter struct {
lock sync.RWMutex
bits []uint64
keys []uint64
m uint64 // number of bits the "bits" field should recognize
n uint64 // number of inserted elements
}
// M is the size of Bloom filter, in bits
func (f *Filter) M() uint64 {
return f.m
}
// K is the count of keys
func (f *Filter) K() uint64 {
return uint64(len(f.keys))
}
// Add a hashable item, v, to the filter
func (f *Filter) Add(v hash.Hash64) {
f.AddHash(v.Sum64())
}
// rotation sets how much to rotate the hash on each filter iteration. This
// is somewhat randomly set to a prime on the lower segment of 64. At 17, the cycle
// does not repeat for quite a while, but even for low number of filters the
// changes are quite rapid
const rotation = 17
// Adds an already hashes item to the filter.
// Identical to Add (but slightly faster)
func (f *Filter) AddHash(hash uint64) {
f.lock.Lock()
defer f.lock.Unlock()
var (
i uint64
)
for n := 0; n < len(f.keys); n++ {
hash = ((hash << rotation) | (hash >> (64 - rotation))) ^ f.keys[n]
i = hash % f.m
f.bits[i>>6] |= 1 << uint(i&0x3f)
}
f.n++
}
// ContainsHash tests if f contains the (already hashed) key
// Identical to Contains but slightly faster
func (f *Filter) ContainsHash(hash uint64) bool {
f.lock.RLock()
defer f.lock.RUnlock()
var (
i uint64
r = uint64(1)
)
for n := 0; n < len(f.keys) && r != 0; n++ {
hash = ((hash << rotation) | (hash >> (64 - rotation))) ^ f.keys[n]
i = hash % f.m
r &= (f.bits[i>>6] >> uint(i&0x3f)) & 1
}
return r != 0
}
// Contains tests if f contains v
// false: f definitely does not contain value v
// true: f maybe contains value v
func (f *Filter) Contains(v hash.Hash64) bool {
return f.ContainsHash(v.Sum64())
}
// Copy f to a new Bloom filter
func (f *Filter) Copy() (*Filter, error) {
f.lock.RLock()
defer f.lock.RUnlock()
out, err := f.NewCompatible()
if err != nil {
return nil, err
}
copy(out.bits, f.bits)
out.n = f.n
return out, nil
}
// UnionInPlace merges Bloom filter f2 into f
func (f *Filter) UnionInPlace(f2 *Filter) error {
if !f.IsCompatible(f2) {
return errors.New("incompatible bloom filters")
}
f.lock.Lock()
defer f.lock.Unlock()
for i, bitword := range f2.bits {
f.bits[i] |= bitword
}
// Also update the counters
f.n += f2.n
return nil
}
// Union merges f2 and f2 into a new Filter out
func (f *Filter) Union(f2 *Filter) (out *Filter, err error) {
if !f.IsCompatible(f2) {
return nil, errors.New("incompatible bloom filters")
}
f.lock.RLock()
defer f.lock.RUnlock()
out, err = f.NewCompatible()
if err != nil {
return nil, err
}
for i, bitword := range f2.bits {
out.bits[i] = f.bits[i] | bitword
}
// Also update the counters
out.n = f.n + f2.n
return out, nil
}
|