File: dm-io.c

package info (click to toggle)
devmapper 2%3A1.01.00-4sarge1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 4,044 kB
  • ctags: 2,336
  • sloc: ansic: 11,777; sh: 2,695; makefile: 291; perl: 16
file content (361 lines) | stat: -rw-r--r-- 8,086 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/*
 * Copyright (C) 2003 Sistina Software
 *
 * This file is released under the GPL.
 */

#include "dm-io.h"

#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/bitops.h>

/* FIXME: can we shrink this ? */
struct io_context {
	int rw;
	unsigned int error;
	atomic_t count;
	struct task_struct *sleeper;
	io_notify_fn callback;
	void *context;
};

/*
 * We maintain a pool of buffer heads for dispatching the io.
 */
static unsigned int _num_bhs;
static mempool_t *_buffer_pool;

/*
 * io contexts are only dynamically allocated for asynchronous
 * io.  Since async io is likely to be the majority of io we'll
 * have the same number of io contexts as buffer heads ! (FIXME:
 * must reduce this).
 */
mempool_t *_io_pool;

static void *alloc_bh(int gfp_mask, void *pool_data)
{
	struct buffer_head *bh;

	bh = kmem_cache_alloc(bh_cachep, gfp_mask);
	if (bh) {
		bh->b_reqnext = NULL;
		init_waitqueue_head(&bh->b_wait);
		INIT_LIST_HEAD(&bh->b_inode_buffers);
	}

	return bh;
}

static void *alloc_io(int gfp_mask, void *pool_data)
{
	return kmalloc(sizeof(struct io_context), gfp_mask);
}

static void free_io(void *element, void *pool_data)
{
	kfree(element);
}

static unsigned int pages_to_buffers(unsigned int pages)
{
	return 4 * pages;	/* too many ? */
}

static int resize_pool(unsigned int new_bhs)
{
	int r = 0;

	if (_buffer_pool) {
		if (new_bhs == 0) {
			/* free off the pools */
			mempool_destroy(_buffer_pool);
			mempool_destroy(_io_pool);
			_buffer_pool = _io_pool = NULL;
		} else {
			/* resize the pools */
			r = mempool_resize(_buffer_pool, new_bhs, GFP_KERNEL);
			if (!r)
				r = mempool_resize(_io_pool,
						   new_bhs, GFP_KERNEL);
		}
	} else {
		/* create new pools */
		_buffer_pool = mempool_create(new_bhs, alloc_bh,
					      mempool_free_slab, bh_cachep);
		if (!_buffer_pool)
			r = -ENOMEM;

		_io_pool = mempool_create(new_bhs, alloc_io, free_io, NULL);
		if (!_io_pool) {
			mempool_destroy(_buffer_pool);
			_buffer_pool = NULL;
			r = -ENOMEM;
		}
	}

	if (!r)
		_num_bhs = new_bhs;

	return r;
}

int dm_io_get(unsigned int num_pages)
{
	return resize_pool(_num_bhs + pages_to_buffers(num_pages));
}

void dm_io_put(unsigned int num_pages)
{
	resize_pool(_num_bhs - pages_to_buffers(num_pages));
}

/*-----------------------------------------------------------------
 * We need to keep track of which region a buffer is doing io
 * for.  In order to save a memory allocation we store this in an
 * unused field of the buffer head, and provide these access
 * functions.
 *
 * FIXME: add compile time check that an unsigned int can fit
 * into a pointer.
 *
 *---------------------------------------------------------------*/
static inline void bh_set_region(struct buffer_head *bh, unsigned int region)
{
	bh->b_journal_head = (void *) region;
}

static inline int bh_get_region(struct buffer_head *bh)
{
	return (unsigned int) bh->b_journal_head;
}

/*-----------------------------------------------------------------
 * We need an io object to keep track of the number of bhs that
 * have been dispatched for a particular io.
 *---------------------------------------------------------------*/
static void dec_count(struct io_context *io, unsigned int region, int error)
{
	if (error)
		set_bit(region, &io->error);

	if (atomic_dec_and_test(&io->count)) {
		if (io->sleeper)
			wake_up_process(io->sleeper);

		else {
			int r = io->error;
			io_notify_fn fn = io->callback;
			void *context = io->context;

			mempool_free(io, _io_pool);
			fn(r, context);
		}
	}
}

static void endio(struct buffer_head *bh, int uptodate)
{
	struct io_context *io = (struct io_context *) bh->b_private;

	if (!uptodate && io->rw != WRITE) {
		/*
		 * We need to zero this region, otherwise people
		 * like kcopyd may write the arbitrary contents
		 * of the page.
		 */
		memset(bh->b_data, 0, bh->b_size);
	}

	dec_count((struct io_context *) bh->b_private,
		  bh_get_region(bh), !uptodate);
	mempool_free(bh, _buffer_pool);
}

/*
 * Primitives for alignment calculations.
 */
int fls(unsigned n)
{
	return generic_fls32(n);
}

static inline int log2_floor(unsigned n)
{
	return ffs(n) - 1;
}

static inline int log2_align(unsigned n)
{
	return fls(n) - 1;
}

/*
 * Returns the next block for io.
 */
static int do_page(kdev_t dev, sector_t *block, sector_t end_block,
		   unsigned int block_size,
		   struct page *p, unsigned int offset,
		   unsigned int region, struct io_context *io)
{
	struct buffer_head *bh;
	sector_t b = *block;
	sector_t blocks_per_page = PAGE_SIZE / block_size;
	unsigned int this_size; /* holds the size of the current io */
	sector_t len;

	if (!blocks_per_page) {
		DMERR("dm-io: PAGE_SIZE (%lu) < block_size (%u) unsupported",
		      PAGE_SIZE, block_size);
		return 0;
	}

	while ((offset < PAGE_SIZE) && (b != end_block)) {
		bh = mempool_alloc(_buffer_pool, GFP_NOIO);
		init_buffer(bh, endio, io);
		bh_set_region(bh, region);

		/*
		 * Block size must be a power of 2 and aligned
		 * correctly.
		 */

		len = min(end_block - b, blocks_per_page);
		len = min(len, blocks_per_page - offset / block_size);

		if (!len) {
			DMERR("dm-io: Invalid offset/block_size (%u/%u).",
			      offset, block_size);
			return 0;
		}

		this_size = 1 << log2_align(len);
		if (b)
			this_size = min(this_size,
					(unsigned) 1 << log2_floor(b));

		/*
		 * Add in the job offset.
		 */
		bh->b_blocknr = (b / this_size);
		bh->b_size = block_size * this_size;
		set_bh_page(bh, p, offset);
		bh->b_this_page = bh;

		bh->b_dev = dev;
		atomic_set(&bh->b_count, 1);

		bh->b_state = ((1 << BH_Uptodate) | (1 << BH_Mapped) |
			       (1 << BH_Lock));

		if (io->rw == WRITE)
			clear_bit(BH_Dirty, &bh->b_state);

		atomic_inc(&io->count);
		submit_bh(io->rw, bh);

		b += this_size;
		offset += block_size * this_size;
	}

	*block = b;
	return (b == end_block);
}

static void do_region(unsigned int region, struct io_region *where,
		      struct page *page, unsigned int offset,
		      struct io_context *io)
{
	unsigned int block_size = get_hardsect_size(where->dev);
	unsigned int sblock_size = block_size >> 9;
	sector_t block = where->sector / sblock_size;
	sector_t end_block = (where->sector + where->count) / sblock_size;

	while (1) {
		if (do_page(where->dev, &block, end_block, block_size,
			    page, offset, region, io))
			break;

		offset = 0;	/* only offset the first page */

		page = list_entry(page->list.next, struct page, list);
	}
}

static void dispatch_io(unsigned int num_regions, struct io_region *where,
			struct page *pages, unsigned int offset,
			struct io_context *io)
{
	int i;

	for (i = 0; i < num_regions; i++)
		if (where[i].count)
			do_region(i, where + i, pages, offset, io);

	/*
	 * Drop the extra refence that we were holding to avoid
	 * the io being completed too early.
	 */
	dec_count(io, 0, 0);
}

/*
 * Synchronous io
 */
int dm_io_sync(unsigned int num_regions, struct io_region *where,
	       int rw, struct page *pages, unsigned int offset,
	       unsigned int *error_bits)
{
	struct io_context io;

	BUG_ON(num_regions > 1 && rw != WRITE);

	io.rw = rw;
	io.error = 0;
	atomic_set(&io.count, 1); /* see dispatch_io() */
	io.sleeper = current;

	dispatch_io(num_regions, where, pages, offset, &io);
	run_task_queue(&tq_disk);

	while (1) {
		set_current_state(TASK_UNINTERRUPTIBLE);

		if (!atomic_read(&io.count))
			break;

		schedule();
	}
	set_current_state(TASK_RUNNING);

	*error_bits = io.error;
	return io.error ? -EIO : 0;
}

/*
 * Asynchronous io
 */
int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
		struct page *pages, unsigned int offset,
		io_notify_fn fn, void *context)
{
	struct io_context *io = mempool_alloc(_io_pool, GFP_NOIO);

	io->rw = rw;
	io->error = 0;
	atomic_set(&io->count, 1); /* see dispatch_io() */
	io->sleeper = NULL;
	io->callback = fn;
	io->context = context;

	dispatch_io(num_regions, where, pages, offset, io);
	return 0;
}

EXPORT_SYMBOL(dm_io_get);
EXPORT_SYMBOL(dm_io_put);
EXPORT_SYMBOL(dm_io_sync);
EXPORT_SYMBOL(dm_io_async);