File: HashBloom.h

package info (click to toggle)
linuxdcpp 1.1.0-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd, wheezy
  • size: 4,492 kB
  • ctags: 4,874
  • sloc: cpp: 34,798; python: 235; makefile: 13
file content (43 lines) | stat: -rw-r--r-- 1,439 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#ifndef HASHBLOOM_H_
#define HASHBLOOM_H_

#include "MerkleTree.h"

namespace dcpp {
/**
 * According to http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf
 * the optimal number of hashes k is (m/n)*ln(2), m = number of bits in the filter and n = number
 * of items added. The largest k that we can get from a single TTH value depends on the number of
 * bits we need to address the bloom structure, which in turn depends on m, so the optimal size
 * for our filter is m = n * k / ln(2) where n is the number of TTH values, or in our case, number of
 * files in share since each file is identified by one TTH value. We try that for each even dividend
 * of the key size (2, 3, 4, 6, 8, 12) and if m fits within the bits we're able to address (2^(keysize/k)),
 * we can use that value when requesting the bloom filter.
 */
class HashBloom {
public:
	HashBloom() : k(0), h(0) { }

	/** Return a suitable value for k based on n */
	static size_t get_k(size_t n, size_t h);
	/** Optimal number of bits to allocate for n elements when using k hashes */
	static uint64_t get_m(size_t n, size_t k);

	void add(const TTHValue& tth);
	bool match(const TTHValue& tth) const;
	void reset(size_t k, size_t m, size_t h);
	void push_back(bool v);

	void copy_to(ByteVector& v) const;
private:

	size_t pos(const TTHValue& tth, size_t n) const;

	std::vector<bool> bloom;
	size_t k;
	size_t h;
};

}

#endif /*HASHBLOOM_H_*/