File: buf_mem.c

package info (click to toggle)
xfsprogs 6.17.0-2
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 11,324 kB
  • sloc: ansic: 167,334; sh: 4,604; makefile: 1,336; python: 835; cpp: 5
file content (408 lines) | stat: -rw-r--r-- 8,710 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "libxfs_priv.h"
#include "libxfs.h"
#include "libxfs/xfile.h"
#include "libxfs/buf_mem.h"
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>

/*
 * Buffer Cache for In-Memory Files
 * ================================
 *
 * Offline fsck wants to create ephemeral ordered recordsets.  The existing
 * btree infrastructure can do this, but we need the buffer cache to target
 * memory instead of block devices.
 *
 * xfiles meet those requirements.  Therefore, the xmbuf mechanism uses a
 * partition on an xfile to store the staging data.
 *
 * xmbufs assume that the caller will handle all required concurrency
 * management.  The resulting xfs_buf objects are kept private to the xmbuf
 * (they are not recycled to the LRU) because b_addr is mapped directly to the
 * memfd file.
 *
 * The only supported block size is the system page size.
 */

/* Figure out the xfile buffer cache block size here */
unsigned int	XMBUF_BLOCKSIZE;
unsigned int	XMBUF_BLOCKSHIFT;

long		xmbuf_max_mappings;
static atomic_t	xmbuf_mappings;
bool		xmbuf_unmap_early = false;

static long
get_max_mmap_count(void)
{
	char	buffer[64];
	char	*p = NULL;
	long	ret = -1;
	FILE	*file;

	file = fopen("/proc/sys/vm/max_map_count", "r");
	if (!file)
		return -1;

	while (fgets(buffer, sizeof(buffer), file)) {
		errno = 0;
		ret = strtol(buffer, &p, 0);
		if (errno || p == buffer)
			continue;

		/* only take half the maximum mmap count so others can use it */
		ret /= 2;
		break;
	}
	fclose(file);
	return ret;
}

void
xmbuf_libinit(void)
{
	long		ret = sysconf(_SC_PAGESIZE);

	/* If we don't find a power-of-two page size, go with 4k. */
	if (ret < 0 || !is_power_of_2(ret))
		ret = 4096;

	XMBUF_BLOCKSIZE = ret;
	XMBUF_BLOCKSHIFT = libxfs_highbit32(XMBUF_BLOCKSIZE);

	/*
	 * Figure out how many mmaps we will use simultaneously.  Pick a low
	 * default if we can't query procfs.
	 */
	xmbuf_max_mappings = get_max_mmap_count();
	if (xmbuf_max_mappings < 0)
		xmbuf_max_mappings = 1024;
}

/* Directly map a memfd page into the buffer cache. */
static int
xmbuf_map_page(
	struct xfs_buf		*bp)
{
	struct xfile		*xfile = bp->b_target->bt_xfile;
	void			*p;
	loff_t			pos;

	pos = xfile->partition_pos + BBTOB(xfs_buf_daddr(bp));
	p = mmap(NULL, BBTOB(bp->b_length), PROT_READ | PROT_WRITE, MAP_SHARED,
			xfile->fcb->fd, pos);
	if (p == MAP_FAILED) {
		if (errno == ENOMEM && !xmbuf_unmap_early) {
#ifdef DEBUG
			fprintf(stderr, "xmbuf could not make mappings!\n");
#endif
			xmbuf_unmap_early = true;
		}
		return errno;
	}

	if (!xmbuf_unmap_early &&
	    atomic_inc_return(&xmbuf_mappings) > xmbuf_max_mappings) {
#ifdef DEBUG
		fprintf(stderr, _("xmbuf hit too many mappings (%ld)!\n",
					xmbuf_max_mappings);
#endif
		xmbuf_unmap_early = true;
	}

	bp->b_addr = p;
	bp->b_flags |= LIBXFS_B_UPTODATE | LIBXFS_B_UNCHECKED;
	bp->b_error = 0;
	return 0;
}

/* Unmap a memfd page that was mapped into the buffer cache. */
static void
xmbuf_unmap_page(
	struct xfs_buf		*bp)
{
	if (!xmbuf_unmap_early)
		atomic_dec(&xmbuf_mappings);
	munmap(bp->b_addr, BBTOB(bp->b_length));
	bp->b_addr = NULL;
}


/* Allocate a new cache node (aka a xfs_buf) */
static struct cache_node *
xmbuf_cache_alloc(
	cache_key_t		key)
{
	struct xfs_bufkey	*bufkey = (struct xfs_bufkey *)key;
	struct xfs_buf		*bp;
	int			error;

	bp = kmem_cache_zalloc(xfs_buf_cache, 0);
	if (!bp)
		return NULL;

	bp->b_cache_key = bufkey->blkno;
	bp->b_length = bufkey->bblen;
	bp->b_target = bufkey->buftarg;
	bp->b_mount = bufkey->buftarg->bt_mount;

	pthread_mutex_init(&bp->b_lock, NULL);
	INIT_LIST_HEAD(&bp->b_li_list);
	bp->b_maps = &bp->__b_map;

	bp->b_nmaps = 1;
	bp->b_maps[0].bm_bn = bufkey->blkno;
	bp->b_maps[0].bm_len = bp->b_length;

	error = xmbuf_map_page(bp);
	if (error) {
		fprintf(stderr,
 _("%s: %s can't mmap %u bytes at xfile offset %llu: %s\n"),
				progname, __FUNCTION__, BBTOB(bp->b_length),
				(unsigned long long)BBTOB(bufkey->blkno),
				strerror(error));

		kmem_cache_free(xfs_buf_cache, bp);
		return NULL;
	}

	return &bp->b_node;
}

/* Flush a buffer to disk before purging the cache node */
static int
xmbuf_cache_flush(
	struct cache_node	*node)
{
	/* direct mapped buffers do not need writing */
	return 0;
}

/* Release resources, free the buffer. */
static void
xmbuf_cache_relse(
	struct cache_node	*node)
{
	struct xfs_buf		*bp;

	bp = container_of(node, struct xfs_buf, b_node);
	if (bp->b_addr)
		xmbuf_unmap_page(bp);
	kmem_cache_free(xfs_buf_cache, bp);
}

/* Release a bunch of buffers */
static unsigned int
xmbuf_cache_bulkrelse(
	struct cache		*cache,
	struct list_head	*list)
{
	struct cache_node	*cn, *n;
	int			count = 0;

	if (list_empty(list))
		return 0;

	list_for_each_entry_safe(cn, n, list, cn_mru) {
		xmbuf_cache_relse(cn);
		count++;
	}

	return count;
}

static int
xmbuf_cache_node_get(
	struct cache_node	*node)
{
	struct xfs_buf		*bp =
		container_of(node, struct xfs_buf, b_node);
	int			error;

	if (bp->b_addr != NULL)
		return 0;

	error = xmbuf_map_page(bp);
	if (error) {
		fprintf(stderr,
 _("%s: %s can't mmap %u bytes at xfile offset %llu: %s\n"),
				progname, __FUNCTION__, BBTOB(bp->b_length),
				(unsigned long long)xfs_buf_daddr(bp),
				strerror(error));
		return error;
	}

	return 0;
}

static void
xmbuf_cache_node_put(
	struct cache_node	*node)
{
	struct xfs_buf		*bp =
		container_of(node, struct xfs_buf, b_node);

	if (xmbuf_unmap_early)
		xmbuf_unmap_page(bp);
}

static struct cache_operations xmbuf_bcache_operations = {
	.hash		= libxfs_bhash,
	.alloc		= xmbuf_cache_alloc,
	.flush		= xmbuf_cache_flush,
	.relse		= xmbuf_cache_relse,
	.compare	= libxfs_bcompare,
	.bulkrelse	= xmbuf_cache_bulkrelse,
	.get		= xmbuf_cache_node_get,
	.put		= xmbuf_cache_node_put,
};

/*
 * Allocate a buffer cache target for a memory-backed file and set up the
 * buffer target.
 */
int
xmbuf_alloc(
	struct xfs_mount	*mp,
	const char		*descr,
	unsigned long long	maxpos,
	struct xfs_buftarg	**btpp)
{
	struct xfs_buftarg	*btp;
	struct xfile		*xfile;
	struct cache		*cache;
	int			error;

	btp = kzalloc(sizeof(*btp), GFP_KERNEL);
	if (!btp)
		return -ENOMEM;

	error = xfile_create(descr, maxpos, &xfile);
	if (error)
		goto out_btp;

	cache = cache_init(0, LIBXFS_BHASHSIZE(NULL), &xmbuf_bcache_operations);
	if (!cache) {
		error = -ENOMEM;
		goto out_xfile;
	}

	/* Initialize buffer target */
	btp->bt_mount = mp;
	btp->bt_bdev = (dev_t)-1;
	btp->bt_bdev_fd = -1;
	btp->bt_xfile = xfile;
	btp->bcache = cache;

	error = pthread_mutex_init(&btp->lock, NULL);
	if (error)
		goto out_cache;

	*btpp = btp;
	return 0;

out_cache:
	cache_destroy(cache);
out_xfile:
	xfile_destroy(xfile);
out_btp:
	kfree(btp);
	return error;
}

/* Free a buffer cache target for a memory-backed file. */
void
xmbuf_free(
	struct xfs_buftarg	*btp)
{
	ASSERT(xfs_buftarg_is_mem(btp));

	cache_destroy(btp->bcache);
	pthread_mutex_destroy(&btp->lock);
	xfile_destroy(btp->bt_xfile);
	kfree(btp);
}

/* Is this a valid daddr within the buftarg? */
bool
xmbuf_verify_daddr(
	struct xfs_buftarg	*btp,
	xfs_daddr_t		daddr)
{
	struct xfile		*xf = btp->bt_xfile;

	ASSERT(xfs_buftarg_is_mem(btp));

	return daddr < (xf->maxbytes >> BBSHIFT);
}

/* Discard the page backing this buffer. */
static void
xmbuf_stale(
	struct xfs_buf		*bp)
{
	struct xfile		*xf = bp->b_target->bt_xfile;
	loff_t			pos;

	ASSERT(xfs_buftarg_is_mem(bp->b_target));

	pos = BBTOB(xfs_buf_daddr(bp)) + xf->partition_pos;
	fallocate(xf->fcb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, pos,
			BBTOB(bp->b_length));
}

/*
 * Finalize a buffer -- discard the backing page if it's stale, or run the
 * write verifier to detect problems.
 */
int
xmbuf_finalize(
	struct xfs_buf		*bp)
{
	xfs_failaddr_t		fa;
	int			error = 0;

	if (bp->b_flags & LIBXFS_B_STALE) {
		xmbuf_stale(bp);
		return 0;
	}

	/*
	 * Although this btree is ephemeral, validate the buffer structure so
	 * that we can detect memory corruption errors and software bugs.
	 */
	fa = bp->b_ops->verify_struct(bp);
	if (fa) {
		error = -EFSCORRUPTED;
		xfs_verifier_error(bp, error, fa);
	}

	return error;
}

/*
 * Detach this xmbuf buffer from the transaction by any means necessary.
 * All buffers are direct-mapped, so they do not need bwrite.
 */
void
xmbuf_trans_bdetach(
	struct xfs_trans	*tp,
	struct xfs_buf		*bp)
{
	struct xfs_buf_log_item	*bli = bp->b_log_item;

	ASSERT(bli != NULL);

	bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
			    XFS_BLI_STALE);
	clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);

	while (bp->b_log_item != NULL)
		xfs_trans_bdetach(tp, bp);
}