File: counters.c

package info (click to toggle)
libhugetlbfs 2.24-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,272 kB
  • sloc: ansic: 10,830; python: 810; makefile: 670; sh: 660; asm: 170
file content (424 lines) | stat: -rw-r--r-- 12,006 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
/*
 * libhugetlbfs - Easy use of Linux hugepages
 * Copyright (C) 2005-2007 David Gibson & Adam Litke, IBM Corporation.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */
#include <sys/types.h>
#include <sys/shm.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <hugetlbfs.h>
#include "hugetests.h"

/*
 * Test Rationale:
 *
 * The hugetlb pool maintains 4 global counters to track pages as they
 * transition between various states.  Due to the complex relationships between
 * the counters, regressions are likely to occur in the future.  This test
 * performs operations that change the counters in known ways.  It emulates the
 * expected kernel behavior and compares the expected result to the actual
 * values after each operation.
 */

extern int errno;

/* Global test configuration */
#define DYNAMIC_SYSCTL "/proc/sys/vm/nr_overcommit_hugepages"
static long saved_nr_hugepages = -1;
static long saved_oc_hugepages = -1;
static long hpage_size;
static int private_resv;

/* State arrays for our mmaps */
#define NR_SLOTS	2
#define SL_SETUP	0
#define SL_TEST		1
static int map_fd[NR_SLOTS];
static char *map_addr[NR_SLOTS];
static unsigned long map_size[NR_SLOTS];
static unsigned int touched[NR_SLOTS];

/* Keep track of expected counter values */
static long prev_total;
static long prev_free;
static long prev_resv;
static long prev_surp;

#define min(a,b) (((a) < (b)) ? (a) : (b))
#define max(a,b) (((a) > (b)) ? (a) : (b))

/* Restore original nr_hugepages */
void cleanup(void) {
	if (hpage_size <= 0)
		return;
	if (saved_nr_hugepages >= 0)
		set_nr_hugepages(hpage_size, saved_nr_hugepages);
	if (saved_oc_hugepages >= 0)
		set_nr_overcommit_hugepages(hpage_size, saved_oc_hugepages);
}

void verify_dynamic_pool_support(void)
{
	saved_oc_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_OC);
	if (saved_oc_hugepages < 0)
		FAIL("Kernel appears to lack dynamic hugetlb pool support");
	if (set_nr_overcommit_hugepages(hpage_size, 10) < 0) {
		/*
		 * In case writing to nr_overcommit_hugepages failed with the
		 * reason that it was an attempt to write an invalid argument,
		 * it might be because the page size corresponds to gigantic
		 * pages which do not support this feature.
		 */
		if (errno == EINVAL)
			check_if_gigantic_page();
		FAIL("Couldn't set overcommit limit");
	}
}

void bad_value(int line, const char *name, long expect, long actual)
{
	if (actual == -1)
		ERROR("%s not found in /proc/meminfo", name);
	else
		FAIL("Line %i: Bad %s: expected %li, actual %li",
			line, name, expect, actual);
}

void verify_counters(int line, long et, long ef, long er, long es)
{
	long t, f, r, s;

	t = get_huge_page_counter(hpage_size, HUGEPAGES_TOTAL);
	f = get_huge_page_counter(hpage_size, HUGEPAGES_FREE);
	r = get_huge_page_counter(hpage_size, HUGEPAGES_RSVD);
	s = get_huge_page_counter(hpage_size, HUGEPAGES_SURP);

	/* Invariant checks */
	if (t < 0 || f < 0 || r < 0 || s < 0)
		ERROR("Negative counter value");
	if (f < r)
		ERROR("HugePages_Free < HugePages_Rsvd");

	/* Check actual values against expected values */
	if (t != et)
		bad_value(line, "HugePages_Total", et, t);

	if (f != ef)
		bad_value(line, "HugePages_Free", ef, f);

	if (r != er)
		bad_value(line, "HugePages_Rsvd", er, r);

	if (s != es)
		bad_value(line, "HugePages_Surp", es, s);

	/* Everything's good.  Update counters */
	prev_total = t;
	prev_free = f;
	prev_resv = r;
	prev_surp = s;
}

/* Memory operations:
 * Each of these has a predefined effect on the counters
 */
#define persistent_huge_pages (et - es)
void _set_nr_hugepages(unsigned long count, int line)
{
	long min_size;
	long et, ef, er, es;

	if (set_nr_hugepages(hpage_size, count))
		FAIL("Cannot set nr_hugepages");

	/* The code below is based on set_max_huge_pages in mm/hugetlb.c */
	es = prev_surp;
	et = prev_total;
	ef = prev_free;
	er = prev_resv;

	/*
	 * Increase the pool size
	 * First take pages out of surplus state.  Then make up the
	 * remaining difference by allocating fresh huge pages.
	 */
	while (es && count > persistent_huge_pages)
		es--;
	while (count > persistent_huge_pages) {
		et++;
		ef++;
	}
	if (count >= persistent_huge_pages)
		goto out;

	/*
	 * Decrease the pool size
	 * First return free pages to the buddy allocator (being careful
	 * to keep enough around to satisfy reservations).  Then place
	 * pages into surplus state as needed so the pool will shrink
	 * to the desired size as pages become free.
	 */
	min_size = max(count, er + et - ef);
	while (min_size < persistent_huge_pages) {
		ef--;
		et--;
	}
	while (count < persistent_huge_pages) {
		es++;
	}

out:
	verify_counters(line, et, ef, er, es);
}
#undef set_nr_hugepages
#define set_nr_hugepages(c) _set_nr_hugepages(c, __LINE__)

void _map(int s, int hpages, int flags, int line)
{
	long et, ef, er, es;

	map_fd[s] = hugetlbfs_unlinked_fd();
	if (map_fd[s] < 0)
		CONFIG("Unable to open hugetlbfs file: %s", strerror(errno));
	map_size[s] = hpages * hpage_size;
	map_addr[s] = mmap(NULL, map_size[s], PROT_READ|PROT_WRITE, flags,
				map_fd[s], 0);
	if (map_addr[s] == MAP_FAILED)
		FAIL("mmap failed: %s", strerror(errno));
	touched[s] = 0;

	et = prev_total;
	ef = prev_free;
	er = prev_resv;
	es = prev_surp;

	/*
	 * When using MAP_SHARED, a reservation will be created to guarantee
	 * pages to the process.  If not enough pages are available to
	 * satisfy the reservation, surplus pages are added to the pool.
	 * NOTE: This code assumes that the whole mapping needs to be
	 * reserved and hence, will not work with partial reservations.
	 *
	 * If the kernel supports private reservations, then MAP_PRIVATE
	 * mappings behave like MAP_SHARED at mmap time.  Otherwise,
	 * no counter updates will occur.
	 */
	if ((flags & MAP_SHARED) || private_resv) {
		unsigned long shortfall = 0;
		if (hpages + prev_resv > prev_free)
			shortfall = hpages - prev_free + prev_resv;
		et += shortfall;
		ef = prev_free + shortfall;
		er = prev_resv + hpages;
		es = prev_surp + shortfall;
	}

	verify_counters(line, et, ef, er, es);
}
#define map(s, h, f) _map(s, h, f, __LINE__)

void _unmap(int s, int hpages, int flags, int line)
{
	long et, ef, er, es;
	unsigned long i;

	munmap(map_addr[s], map_size[s]);
	close(map_fd[s]);
	map_fd[s] = -1;
	map_addr[s] = NULL;
	map_size[s] = 0;

	et = prev_total;
	ef = prev_free;
	er = prev_resv;
	es = prev_surp;

	/*
	 * When a VMA is unmapped, the instantiated (touched) pages are
	 * freed.  If the pool is in a surplus state, pages are freed to the
	 * buddy allocator, otherwise they go back into the hugetlb pool.
	 * NOTE: This code assumes touched pages have only one user.
	 */
	for (i = 0; i < touched[s]; i++) {
		if (es) {
			et--;
			es--;
		} else
			ef++;
	}

	/*
	 * mmap may have created some surplus pages to accomodate a
	 * reservation.  If those pages were not touched, then they will
	 * not have been freed by the code above.  Free them here.
	 */
	if ((flags & MAP_SHARED) || private_resv) {
		int unused_surplus = min(hpages - touched[s], es);
		et -= unused_surplus;
		ef -= unused_surplus;
		er -= hpages - touched[s];
		es -= unused_surplus;
	}

	verify_counters(line, et, ef, er, es);
}
#define unmap(s, h, f) _unmap(s, h, f, __LINE__)

void _touch(int s, int hpages, int flags, int line)
{
	long et, ef, er, es;
	int nr;
	char *c;

	for (c = map_addr[s], nr = hpages;
			hpages && c < map_addr[s] + map_size[s];
			c += hpage_size, nr--)
		*c = (char) (nr % 2);
	/*
	 * Keep track of how many pages were touched since we can't easily
	 * detect that from user space.
	 * NOTE: Calling this function more than once for a mmap may yield
	 * results you don't expect.  Be careful :)
	 */
	touched[s] = max(touched[s], hpages);

	/*
	 * Shared (and private when supported) mappings and consume resv pages
	 * that were previously allocated. Also deduct them from the free count.
	 *
	 * Unreserved private mappings may need to allocate surplus pages to
	 * satisfy the fault.  The surplus pages become part of the pool
	 * which could elevate total, free, and surplus counts.  resv is
	 * unchanged but free must be decreased.
	 */
	if (flags & MAP_SHARED || private_resv) {
		et = prev_total;
		ef = prev_free - hpages;
		er = prev_resv - hpages;
		es = prev_surp;
	} else {
		if (hpages + prev_resv > prev_free)
			et = prev_total + (hpages - prev_free + prev_resv);
		else
			et = prev_total;
		er = prev_resv;
		es = prev_surp + et - prev_total;
		ef = prev_free - hpages + et - prev_total;
	}
	verify_counters(line, et, ef, er, es);
}
#define touch(s, h, f) _touch(s, h, f, __LINE__)

void run_test(char *desc, int base_nr)
{
	verbose_printf("%s...\n", desc);
	set_nr_hugepages(base_nr);

	/* untouched, shared mmap */
	map(SL_TEST, 1, MAP_SHARED);
	unmap(SL_TEST, 1, MAP_SHARED);

	/* untouched, private mmap */
	map(SL_TEST, 1, MAP_PRIVATE);
	unmap(SL_TEST, 1, MAP_PRIVATE);

	/* touched, shared mmap */
	map(SL_TEST, 1, MAP_SHARED);
	touch(SL_TEST, 1, MAP_SHARED);
	unmap(SL_TEST, 1, MAP_SHARED);

	/* touched, private mmap */
	map(SL_TEST, 1, MAP_PRIVATE);
	touch(SL_TEST, 1, MAP_PRIVATE);
	unmap(SL_TEST, 1, MAP_PRIVATE);

	/* Explicit resizing during outstanding surplus */
	/* Consume surplus when growing pool */
	map(SL_TEST, 2, MAP_SHARED);
	set_nr_hugepages(max(base_nr, 1));

	/* Add pages once surplus is consumed */
	set_nr_hugepages(max(base_nr, 3));

	/* Release free huge pages first */
	set_nr_hugepages(max(base_nr, 2));

	/* When shrinking beyond committed level, increase surplus */
	set_nr_hugepages(base_nr);

	/* Upon releasing the reservation, reduce surplus counts */
	unmap(SL_TEST, 2, MAP_SHARED);

	verbose_printf("OK.\n");
}

int main(int argc, char ** argv)
{
	int base_nr;

	test_init(argc, argv);
	hpage_size = check_hugepagesize();
	saved_nr_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_TOTAL);
	verify_dynamic_pool_support();
	check_must_be_root();

	if ((private_resv = kernel_has_private_reservations()) == -1)
		FAIL("kernel_has_private_reservations() failed\n");

	/*
	 * This test case should require a maximum of 3 huge pages.
	 * Run through the battery of tests multiple times, with an increasing
	 * base pool size.  This alters the circumstances under which surplus
	 * pages need to be allocated and increases the corner cases tested.
	 */
	for (base_nr = 0; base_nr <= 3; base_nr++) {
		verbose_printf("Base pool size: %i\n", base_nr);
		/* Run the tests with a clean slate */
		run_test("Clean", base_nr);

		/* Now with a pre-existing untouched, shared mmap */
		map(SL_SETUP, 1, MAP_SHARED);
		run_test("Untouched, shared", base_nr);
		unmap(SL_SETUP, 1, MAP_SHARED);

		/* Now with a pre-existing untouched, private mmap */
		map(SL_SETUP, 1, MAP_PRIVATE);
		run_test("Untouched, private", base_nr);
		unmap(SL_SETUP, 1, MAP_PRIVATE);

		/* Now with a pre-existing touched, shared mmap */
		map(SL_SETUP, 1, MAP_SHARED);
		touch(SL_SETUP, 1, MAP_SHARED);
		run_test("Touched, shared", base_nr);
		unmap(SL_SETUP, 1, MAP_SHARED);

		/* Now with a pre-existing touched, private mmap */
		map(SL_SETUP, 1, MAP_PRIVATE);
		touch(SL_SETUP, 1, MAP_PRIVATE);
		run_test("Touched, private", base_nr);
		unmap(SL_SETUP, 1, MAP_PRIVATE);
	}

	PASS();
}