File: bloom.cc

package info (click to toggle)
tarantool 2.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 85,364 kB
  • sloc: ansic: 513,760; cpp: 69,489; sh: 25,650; python: 19,190; perl: 14,973; makefile: 4,173; yacc: 1,329; sql: 1,074; pascal: 620; ruby: 190; awk: 18; lisp: 7
file content (104 lines) | stat: -rw-r--r-- 2,595 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include "salad/bloom.h"
#include <unordered_set>
#include <vector>
#include <iostream>

using namespace std;

uint32_t h(uint32_t i)
{
	return i * 2654435761;
}

void
simple_test()
{
	cout << "*** " << __func__ << " ***" << endl;
	srand(time(0));
	uint32_t error_count = 0;
	uint32_t fp_rate_too_big = 0;
	for (double p = 0.001; p < 0.5; p *= 1.3) {
		uint64_t tests = 0;
		uint64_t false_positive = 0;
		for (uint32_t count = 1000; count <= 10000; count *= 2) {
			struct bloom bloom;
			bloom_create(&bloom, count, p);
			unordered_set<uint32_t> check;
			for (uint32_t i = 0; i < count; i++) {
				uint32_t val = rand() % (count * 10);
				check.insert(val);
				bloom_add(&bloom, h(val));
			}
			for (uint32_t i = 0; i < count * 10; i++) {
				bool has = check.find(i) != check.end();
				bool bloom_possible =
					bloom_maybe_has(&bloom, h(i));
				tests++;
				if (has && !bloom_possible)
					error_count++;
				if (!has && bloom_possible)
					false_positive++;
			}
			bloom_destroy(&bloom);
		}
		double fp_rate = (double)false_positive / tests;
		if (fp_rate > p + 0.001)
			fp_rate_too_big++;
	}
	cout << "error_count = " << error_count << endl;
	cout << "fp_rate_too_big = " << fp_rate_too_big << endl;
}

void
store_load_test()
{
	cout << "*** " << __func__ << " ***" << endl;
	srand(time(0));
	uint32_t error_count = 0;
	uint32_t fp_rate_too_big = 0;
	for (double p = 0.01; p < 0.5; p *= 1.5) {
		uint64_t tests = 0;
		uint64_t false_positive = 0;
		for (uint32_t count = 300; count <= 3000; count *= 10) {
			struct bloom bloom;
			bloom_create(&bloom, count, p);
			unordered_set<uint32_t> check;
			for (uint32_t i = 0; i < count; i++) {
				uint32_t val = rand() % (count * 10);
				check.insert(val);
				bloom_add(&bloom, h(val));
			}
			struct bloom test = bloom;
			char *buf = (char *)malloc(bloom_store_size(&bloom));
			bloom_store(&bloom, buf);
			bloom_destroy(&bloom);
			memset(&bloom, '#', sizeof(bloom));
			bloom_load_table(&test, buf);
			free(buf);
			for (uint32_t i = 0; i < count * 10; i++) {
				bool has = check.find(i) != check.end();
				bool bloom_possible =
					bloom_maybe_has(&test, h(i));
				tests++;
				if (has && !bloom_possible)
					error_count++;
				if (!has && bloom_possible)
					false_positive++;
			}
			bloom_destroy(&test);
		}
		double fp_rate = (double)false_positive / tests;
		double excess = fp_rate / p;
		if (fp_rate > p + 0.001)
			fp_rate_too_big++;
	}
	cout << "error_count = " << error_count << endl;
	cout << "fp_rate_too_big = " << fp_rate_too_big << endl;
}

int
main(void)
{
	simple_test();
	store_load_test();
}