File: histogram.c

package info (click to toggle)
tarantool 2.6.0-1.4
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 85,412 kB
  • sloc: ansic: 513,775; cpp: 69,493; sh: 25,650; python: 19,190; perl: 14,973; makefile: 4,178; yacc: 1,329; sql: 1,074; pascal: 620; ruby: 190; awk: 18; lisp: 7
file content (184 lines) | stat: -rw-r--r-- 3,824 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#include <stdint.h>
#include <time.h>

#include "histogram.h"
#include "memory.h"
#include "unit.h"
#include "trivia/util.h"

static int
int64_cmp(const void *p1, const void *p2)
{
	int64_t v1 = *(int64_t *)p1;
	int64_t v2 = *(int64_t *)p2;
	if (v1 > v2)
		return 1;
	if (v1 < v2)
		return -1;
	return 0;
}

static void
int64_sort(int64_t *data, size_t len)
{
	qsort(data, len, sizeof(*data), int64_cmp);
}

static int64_t *
gen_buckets(size_t *p_n_buckets)
{
	size_t n_buckets = 1 + rand() % 20;
	int64_t *buckets = calloc(n_buckets, sizeof(*buckets));
	for (size_t i = 0; i < n_buckets; i++)
		buckets[i] = (i > 0 ? buckets[i - 1] : 0) + 1 + rand() % 2000;
	*p_n_buckets = n_buckets;
	return buckets;
}

static int64_t *
gen_rand_data(size_t *p_len)
{
	size_t len = 900 + rand() % 200;
	int64_t *data = calloc(len, sizeof(*data));
	for (size_t i = 0; i < len; i++)
		data[i] = rand() % 10000;
	*p_len = len;
	return data;
}

static int64_t
gen_rand_value(int64_t min, int64_t max)
{
	assert(max >= min);
	return min + rand() % (max - min + 1);
}

static void
test_counts(void)
{
	header();

	size_t n_buckets;
	int64_t *buckets = gen_buckets(&n_buckets);

	size_t data_len;
	int64_t *data = gen_rand_data(&data_len);

	struct histogram *hist = histogram_new(buckets, n_buckets);
	for (size_t i = 0; i < data_len; i++)
		histogram_collect(hist, data[i]);

	fail_if(hist->total != data_len);

	for (size_t b = 0; b < n_buckets; b++) {
		size_t expected = 0;
		for (size_t i = 0; i < data_len; i++) {
			if (data[i] <= buckets[b] &&
			    (b == 0 || data[i] > buckets[b - 1]))
				expected++;
		}
		fail_if(hist->buckets[b].count != expected);
	}

	histogram_delete(hist);
	free(data);
	free(buckets);

	footer();
}

static void
test_discard(void)
{
	header();

	size_t n_buckets;
	int64_t *buckets = gen_buckets(&n_buckets);

	struct histogram *hist = histogram_new(buckets, n_buckets);

	size_t bucket_sz = gen_rand_value(2, 10);
	size_t data_len = (n_buckets + 1) * bucket_sz;
	int64_t *data = calloc(data_len, sizeof(*data));

	for (size_t b = 0; b <= n_buckets; b++) {
		int64_t min = (b == 0 ? INT64_MIN : buckets[b - 1] + 1);
		int64_t max = (b == n_buckets ? INT64_MAX : buckets[b]);
		for (size_t i = 0; i < bucket_sz; i++)
			data[b * bucket_sz + i] = gen_rand_value(min, max);
	}

	for (size_t i = 0; i < data_len; i++)
		histogram_collect(hist, data[i]);

	for (size_t i = 0; i < data_len; i++) {
		if (i % bucket_sz < bucket_sz / 2)
			histogram_discard(hist, data[i]);
	}
	bucket_sz = (bucket_sz + 1) / 2;

	for (size_t b = 0; b < n_buckets; b++)
		fail_if(hist->buckets[b].count != bucket_sz);
	fail_if(hist->total != bucket_sz * (n_buckets + 1));

	histogram_delete(hist);
	free(data);
	free(buckets);

	footer();
}

static void
test_percentile(void)
{
	header();

	size_t n_buckets;
	int64_t *buckets = gen_buckets(&n_buckets);

	size_t data_len;
	int64_t *data = gen_rand_data(&data_len);

	int64_t max = -1;
	for (size_t i = 0; i < data_len; i++) {
		if (max < data[i])
			max = data[i];
	}

	struct histogram *hist = histogram_new(buckets, n_buckets);
	for (size_t i = 0; i < data_len; i++)
		histogram_collect(hist, data[i]);

	int64_sort(data, data_len);
	for (int pct = 5; pct < 100; pct += 5) {
		int64_t val = data[data_len * pct / 100];
		int64_t expected = max;
		int64_t expected_lo = max;
		for (size_t b = 0; b < n_buckets; b++) {
			if (buckets[b] >= val) {
				expected = buckets[b];
				expected_lo = buckets[b > 0 ? b - 1 : 0];
				break;
			}
		}
		int64_t result = histogram_percentile(hist, pct);
		fail_if(result != expected);
		int64_t result_lo = histogram_percentile_lower(hist, pct);
		fail_if(result_lo != expected_lo);
	}

	histogram_delete(hist);
	free(data);
	free(buckets);

	footer();
}

int
main()
{
	srand(time(NULL));
	test_counts();
	test_discard();
	test_percentile();
}