File: bloomfilter.go

package info (click to toggle)
golang-github-holiman-bloomfilter 2.0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 144 kB
  • sloc: makefile: 2
file content (145 lines) | stat: -rw-r--r-- 3,295 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
// Copyright © 2020 Martin Holst Swende, continued on the work of Barry Allard

package v2

import (
	"errors"
	"hash"
	"sync"
)

var (
	errHashMismatch = errors.New("hash mismatch, bloom filter corruption or wrong version")
)

// Filter is an opaque Bloom filter type
type Filter struct {
	lock sync.RWMutex
	bits []uint64
	keys []uint64
	m    uint64 // number of bits the "bits" field should recognize
	n    uint64 // number of inserted elements
}

// M is the size of Bloom filter, in bits
func (f *Filter) M() uint64 {
	return f.m
}

// K is the count of keys
func (f *Filter) K() uint64 {
	return uint64(len(f.keys))
}

// Add a hashable item, v, to the filter
func (f *Filter) Add(v hash.Hash64) {
	f.AddHash(v.Sum64())
}

// rotation sets how much to rotate the hash on each filter iteration. This
// is somewhat randomly set to a prime on the lower segment of 64. At 17, the cycle
// does not repeat for quite a while, but even for low number of filters the
// changes are quite rapid
const rotation = 17

// Adds an already hashes item to the filter.
// Identical to Add (but slightly faster)
func (f *Filter) AddHash(hash uint64) {
	f.lock.Lock()
	defer f.lock.Unlock()
	var (
		i uint64
	)
	for n := 0; n < len(f.keys); n++ {
		hash = ((hash << rotation) | (hash >> (64 - rotation))) ^ f.keys[n]
		i = hash % f.m
		f.bits[i>>6] |= 1 << uint(i&0x3f)
	}
	f.n++
}

// ContainsHash tests if f contains the (already hashed) key
// Identical to Contains but slightly faster
func (f *Filter) ContainsHash(hash uint64) bool {
	f.lock.RLock()
	defer f.lock.RUnlock()
	var (
		i uint64
		r = uint64(1)
	)
	for n := 0; n < len(f.keys) && r != 0; n++ {
		hash = ((hash << rotation) | (hash >> (64 - rotation))) ^ f.keys[n]
		i = hash % f.m
		r &= (f.bits[i>>6] >> uint(i&0x3f)) & 1
	}
	return r != 0
}

// Contains tests if f contains v
// false: f definitely does not contain value v
// true:  f maybe contains value v
func (f *Filter) Contains(v hash.Hash64) bool {
	return f.ContainsHash(v.Sum64())
}

// Copy f to a new Bloom filter
func (f *Filter) Copy() (*Filter, error) {
	f.lock.RLock()
	defer f.lock.RUnlock()

	out, err := f.NewCompatible()
	if err != nil {
		return nil, err
	}
	copy(out.bits, f.bits)
	out.n = f.n
	return out, nil
}

// UnionInPlace merges Bloom filter f2 into f
func (f *Filter) UnionInPlace(f2 *Filter) error {
	if !f.IsCompatible(f2) {
		return errors.New("incompatible bloom filters")
	}

	f.lock.Lock()
	defer f.lock.Unlock()

	for i, bitword := range f2.bits {
		f.bits[i] |= bitword
	}
	// Also update the counters
	f.n += f2.n
	return nil
}

// Union merges f2 and f2 into a new Filter out
func (f *Filter) Union(f2 *Filter) (out *Filter, err error) {
	if !f.IsCompatible(f2) {
		return nil, errors.New("incompatible bloom filters")
	}

	f.lock.RLock()
	defer f.lock.RUnlock()

	out, err = f.NewCompatible()
	if err != nil {
		return nil, err
	}
	for i, bitword := range f2.bits {
		out.bits[i] = f.bits[i] | bitword
	}
	// Also update the counters
	out.n = f.n + f2.n
	return out, nil
}