File: rescue-fix-data-checksum.c

package info (click to toggle)
btrfs-progs 6.16-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 20,504 kB
  • sloc: ansic: 126,181; sh: 7,642; python: 1,386; makefile: 900; asm: 296
file content (511 lines) | stat: -rw-r--r-- 13,646 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
/*
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include "kerncompat.h"
#include <ctype.h>
#include "kernel-shared/disk-io.h"
#include "kernel-shared/ctree.h"
#include "kernel-shared/volumes.h"
#include "kernel-shared/backref.h"
#include "kernel-shared/transaction.h"
#include "kernel-shared/file-item.h"
#include "common/messages.h"
#include "common/open-utils.h"
#include "cmds/rescue.h"

/*
 * Record one corrupted data block.
 *
 * We do not report immediately, this is for future file deleting support.
 */
struct corrupted_block {
	struct list_head list;
	/* The logical bytenr of the exact corrupted block. */
	u64 logical;

	/* The amount of mirrors above logical have. */
	unsigned int num_mirrors;

	/*
	 * Which mirror failed.
	 *
	 * Note, bit 0 means mirror 1, since mirror 0 means choosing a
	 * live mirror, and we never utilized that mirror 0.
	 */
	unsigned long *error_mirror_bitmap;
};

enum fix_data_checksum_action_value {
	ACTION_IGNORE,
	ACTION_UPDATE_CSUM,
	ACTION_LAST,
};

static const struct fix_data_checksum_action {
	enum fix_data_checksum_action_value value;
	const char *string;
} actions[] = {
	[ACTION_IGNORE] = {
		.value = ACTION_IGNORE,
		.string = "ignore"
	},
	[ACTION_UPDATE_CSUM] = {
		.value = ACTION_UPDATE_CSUM,
		.string = "update-csum"
	},
};

static int global_repair_mode;
LIST_HEAD(corrupted_blocks);

static int add_corrupted_block(struct btrfs_fs_info *fs_info, u64 logical,
			       unsigned int mirror, unsigned int num_mirrors)
{
	struct corrupted_block *last;

	if (list_empty(&corrupted_blocks))
		goto add;

	last = list_entry(corrupted_blocks.prev, struct corrupted_block, list);
	/* The last entry is the same, just set update the error mirror bitmap. */
	if (last->logical == logical) {
		UASSERT(last->error_mirror_bitmap);
		set_bit(mirror, last->error_mirror_bitmap);
		return 0;
	}
add:
	last = calloc(1, sizeof(*last));
	if (!last)
		return -ENOMEM;
	last->error_mirror_bitmap = calloc(1, BITS_TO_LONGS(num_mirrors));
	if (!last->error_mirror_bitmap) {
		free(last);
		return -ENOMEM;
	}
	set_bit(mirror - 1, last->error_mirror_bitmap);
	last->logical = logical;
	last->num_mirrors = num_mirrors;

	list_add_tail(&last->list, &corrupted_blocks);
	return 0;
}

/*
 * Verify all mirrors for @logical.
 *
 * If something critical happened, return <0 and should end the run immediately.
 * Otherwise return 0, including data checksum mismatch or read failure.
 */
static int verify_one_data_block(struct btrfs_fs_info *fs_info,
				 struct extent_buffer *leaf,
				 unsigned long leaf_offset, u64 logical,
				 unsigned int num_mirrors)
{
	const u32 blocksize = fs_info->sectorsize;
	const u32 csum_size = fs_info->csum_size;
	u8 *buf;
	u8 csum[BTRFS_CSUM_SIZE];
	u8 csum_expected[BTRFS_CSUM_SIZE];
	int ret = 0;

	buf = malloc(blocksize);
	if (!buf)
		return -ENOMEM;

	for (int mirror = 1; mirror <= num_mirrors; mirror++) {
		u64 read_len = blocksize;

		ret = read_data_from_disk(fs_info, buf, logical, &read_len, mirror);
		if (ret < 0) {
			/* IO error, add one record. */
			ret = add_corrupted_block(fs_info, logical, mirror, num_mirrors);
			if (ret < 0)
				break;
		}
		/* Verify the data checksum. */
		btrfs_csum_data(fs_info->csum_type, buf, csum, blocksize);
		read_extent_buffer(leaf, csum_expected, leaf_offset, csum_size);
		if (memcmp(csum_expected, csum, csum_size) != 0) {
			ret = add_corrupted_block(fs_info, logical, mirror, num_mirrors);
			if (ret < 0)
				break;
		}
	}

	free(buf);
	return ret;
}

static int iterate_one_csum_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path)
{
	struct btrfs_key key;
	const unsigned long item_ptr_off = btrfs_item_ptr_offset(path->nodes[0],
								 path->slots[0]);
	const u32 blocksize = fs_info->sectorsize;
	int num_mirrors;
	u64 data_size;
	u64 cur;
	char *buf;
	int ret = 0;

	buf = malloc(blocksize);
	if (!buf)
		return -ENOMEM;

	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
	data_size = btrfs_item_size(path->nodes[0], path->slots[0]) /
		    fs_info->csum_size * blocksize;
	num_mirrors = btrfs_num_copies(fs_info, key.offset, data_size);

	for (cur = 0; cur < data_size; cur += blocksize) {
		const unsigned long leaf_offset = item_ptr_off +
			cur / blocksize * fs_info->csum_size;

		ret = verify_one_data_block(fs_info, path->nodes[0], leaf_offset,
					    key.offset + cur, num_mirrors);
		if (ret < 0)
			break;
	}
	free(buf);
	return ret;
}

static int print_filenames(u64 ino, u64 offset, u64 rootid, void *ctx)
{
	struct btrfs_fs_info *fs_info = ctx;
	struct btrfs_root *root;
	struct btrfs_key key;
	struct inode_fs_paths *ipath;
	struct btrfs_path path = { 0 };
	int ret;

	key.objectid = rootid;
	key.type = BTRFS_ROOT_ITEM_KEY;
	key.offset = (u64)-1;

	root = btrfs_read_fs_root(fs_info, &key);
	if (IS_ERR(root)) {
		ret = PTR_ERR(root);
		errno = -ret;
		error("failed to get subvolume %llu: %m", rootid);
		return ret;
	}
	ipath = init_ipath(128 * BTRFS_PATH_NAME_MAX, root, &path);
	if (IS_ERR(ipath)) {
		ret = PTR_ERR(ipath);
		errno = -ret;
		error("failed to initialize ipath: %m");
		return ret;
	}
	ret = paths_from_inode(ino, ipath);
	if (ret < 0) {
		errno = -ret;
		error("failed to resolve root %llu ino %llu to paths: %m", rootid, ino);
		goto out;
	}
	for (int i = 0; i < ipath->fspath->elem_cnt; i++)
		pr_verbose(LOG_DEFAULT, "  (subvolume %llu)/%s\n", rootid,
			   (char *)ipath->fspath->val[i]);
	if (ipath->fspath->elem_missed)
		pr_verbose(LOG_DEFAULT, "  (subvolume %llu) %d files not printed\n",
			   rootid, ipath->fspath->elem_missed);
out:
	free_ipath(ipath);
	return ret;
}

static int iterate_csum_root(struct btrfs_fs_info *fs_info, struct btrfs_root *csum_root)
{
	struct btrfs_path path = { 0 };
	struct btrfs_key key;
	int ret;

	key.objectid = 0;
	key.type = 0;
	key.offset = 0;

	ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0);
	if (ret < 0) {
		errno = -ret;
		error("failed to get the first tree block of csum tree: %m");
		return ret;
	}
	UASSERT(ret > 0);
	while (true) {
		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
		if (key.type != BTRFS_EXTENT_CSUM_KEY)
			goto next;
		ret = iterate_one_csum_item(fs_info, &path);
		if (ret < 0)
			break;
next:
		ret = btrfs_next_item(csum_root, &path);
		if (ret > 0) {
			ret = 0;
			break;
		}
		if (ret < 0) {
			errno = -ret;
			error("failed to get next csum item: %m");
		}
	}
	btrfs_release_path(&path);
	return ret;
}

#define ASK_ACTION_BUFSIZE	(32)
static enum fix_data_checksum_action_value ask_action(unsigned int num_mirrors,
						      unsigned int *mirror_ret)
{
	unsigned long ret;
	char buf[ASK_ACTION_BUFSIZE] = { 0 };
	bool printed;
	char *endptr;

again:
	printed = false;
	for (int i = 0; i < ACTION_LAST; i++) {
		if (printed)
			pr_verbose(LOG_DEFAULT, "/");
		/* Mark Ignore as default. */
		if (i == ACTION_IGNORE) {
			pr_verbose(LOG_DEFAULT, "<<%c>>%s", toupper(actions[i].string[0]),
				   actions[i].string + 1);
		} else if (i == ACTION_UPDATE_CSUM) {
			/*
			 * For update-csum action, we need a mirror number,
			 * so output all valid mirrors numbers instead.
			 */
			for (int cur_mirror = 1; cur_mirror <= num_mirrors; cur_mirror++)
				pr_verbose(LOG_DEFAULT, "<%u>", cur_mirror);
		} else {
			pr_verbose(LOG_DEFAULT, "<%c>%s", toupper(actions[i].string[0]),
				   actions[i].string + 1);
		}
		printed = true;
	}
	pr_verbose(LOG_DEFAULT, ":");
	fflush(stdout);
	/* Default to Ignore if no action provided. */
	if (fgets(buf, sizeof(buf) - 1, stdin) == 0)
		return ACTION_IGNORE;
	if (buf[0] == '\n')
		return ACTION_IGNORE;
	/* Check exact match or matching the initial letter. */
	for (int i = 0; i < ACTION_LAST; i++) {
		if ((strncasecmp(buf, actions[i].string, 1) == 0 ||
		     strncasecmp(buf, actions[i].string, ASK_ACTION_BUFSIZE) == 0) &&
		     actions[i].value != ACTION_UPDATE_CSUM)
			return actions[i].value;
	}
	/* No match, check if it's some numeric string. */
	ret = strtoul(buf, &endptr, 10);
	if (endptr == buf || ret == ULONG_MAX) {
		/* No valid action found, retry. */
		warning("invalid action, please retry");
		goto again;
	}
	if (ret > num_mirrors || ret == 0) {
		warning("invalid mirror number %lu, must be in range [1, %d], please retry",
			ret, num_mirrors);
		goto again;
	}
	*mirror_ret = ret;
	return ACTION_UPDATE_CSUM;
}

static int update_csum_item(struct btrfs_fs_info *fs_info, u64 logical,
			    unsigned int mirror)
{
	struct btrfs_trans_handle *trans;
	struct btrfs_root *csum_root = btrfs_csum_root(fs_info, logical);
	struct btrfs_path path = { 0 };
	struct btrfs_csum_item *citem;
	u64 read_len = fs_info->sectorsize;
	u8 csum[BTRFS_CSUM_SIZE] = { 0 };
	u8 *buf;
	int ret;

	buf = malloc(fs_info->sectorsize);
	if (!buf)
		return -ENOMEM;
	ret = read_data_from_disk(fs_info, buf, logical, &read_len, mirror);
	if (ret < 0) {
		errno = -ret;
		error("failed to read block at logical %llu mirror %u: %m",
			logical, mirror);
		goto out;
	}
	trans = btrfs_start_transaction(csum_root, 1);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		errno = -ret;
		error_msg(ERROR_MSG_START_TRANS, "%m");
		goto out;
	}
	citem = btrfs_lookup_csum(trans, csum_root, &path, logical,
				  BTRFS_EXTENT_CSUM_OBJECTID, fs_info->csum_type, 1);
	if (IS_ERR(citem)) {
		ret = PTR_ERR(citem);
		errno = -ret;
		error("failed to find csum item for logical %llu: $m", logical);
		btrfs_abort_transaction(trans, ret);
		goto out;
	}
	btrfs_csum_data(fs_info->csum_type, buf, csum, fs_info->sectorsize);
	write_extent_buffer(path.nodes[0], csum, (unsigned long)citem, fs_info->csum_size);
	btrfs_release_path(&path);
	ret = btrfs_commit_transaction(trans, csum_root);
	if (ret < 0) {
		errno = -ret;
		error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
	}
	printf("Csum item for logical %llu updated using data from mirror %u\n",
		logical, mirror);
out:
	free(buf);
	btrfs_release_path(&path);
	return ret;
}

static void report_corrupted_blocks(struct btrfs_fs_info *fs_info,
				    enum btrfs_fix_data_checksum_mode mode,
				    unsigned int mirror)
{
	struct corrupted_block *entry;
	struct btrfs_path path = { 0 };
	enum fix_data_checksum_action_value action;

	if (list_empty(&corrupted_blocks)) {
		pr_verbose(LOG_DEFAULT, "no data checksum mismatch found\n");
		return;
	}

	list_for_each_entry(entry, &corrupted_blocks, list) {
		bool has_printed = false;
		int ret;

		pr_verbose(LOG_DEFAULT, "logical=%llu corrtuped mirrors=", entry->logical);
		/* Open coded bitmap print. */
		for (int i = 0; i < entry->num_mirrors; i++) {
			if (test_bit(i, entry->error_mirror_bitmap)) {
				if (has_printed)
					pr_verbose(LOG_DEFAULT, ",");
				/*
				 * Bit 0 means mirror 1, thus we need to increase
				 * the value by 1.
				 */
				pr_verbose(LOG_DEFAULT, "%d", i + 1);
				has_printed=true;
			}
		}
		pr_verbose(LOG_DEFAULT, " affected files:\n");
		ret = iterate_inodes_from_logical(entry->logical, fs_info, &path,
						  print_filenames, fs_info);
		if (ret < 0) {
			errno = -ret;
			error("failed to iterate involved files: %m");
			break;
		}
		switch (mode) {
		case BTRFS_FIX_DATA_CSUMS_INTERACTIVE:
			action = ask_action(entry->num_mirrors, &mirror);
			break;
		case BTRFS_FIX_DATA_CSUMS_READONLY:
			action = ACTION_IGNORE;
			break;
		case BTRFS_FIX_DATA_CSUMS_UPDATE_CSUM_ITEM:
			action = ACTION_UPDATE_CSUM;
			mirror = mirror % (entry->num_mirrors + 1);
			break;
		default:
			UASSERT(0);
		}

		switch (action) {
		case ACTION_IGNORE:
			break;
		case ACTION_UPDATE_CSUM:
			UASSERT(mirror > 0 && mirror <= entry->num_mirrors);
			ret = update_csum_item(fs_info, entry->logical, mirror);
			break;
		default:
			UASSERT(0);
		}
	}
}

static void free_corrupted_blocks(void)
{
	while (!list_empty(&corrupted_blocks)) {
		struct corrupted_block *entry;

		entry = list_entry(corrupted_blocks.next, struct corrupted_block, list);
		list_del_init(&entry->list);
		free(entry->error_mirror_bitmap);
		free(entry);
	}
}

int btrfs_recover_fix_data_checksum(const char *path, enum btrfs_fix_data_checksum_mode mode,
				    unsigned int mirror)
{
	struct btrfs_fs_info *fs_info;
	struct btrfs_root *csum_root;
	struct open_ctree_args oca = { 0 };
	int ret;

	if (mode >= BTRFS_FIX_DATA_CSUMS_LAST)
		return -EINVAL;

	if (mode == BTRFS_FIX_DATA_CSUMS_UPDATE_CSUM_ITEM)
		UASSERT(mirror > 0);
	ret = check_mounted(path);
	if (ret < 0) {
		errno = -ret;
		error("could not check mount status: %m");
		return ret;
	}
	if (ret > 0) {
		error("%s is currently mounted", path);
		return -EBUSY;
	}

	global_repair_mode = mode;
	oca.filename = path;
	oca.flags = OPEN_CTREE_WRITES;
	fs_info = open_ctree_fs_info(&oca);
	if (!fs_info) {
		error("failed to open btrfs at %s", path);
		return -EIO;
	}
	csum_root = btrfs_csum_root(fs_info, 0);
	if (!csum_root) {
		error("failed to get csum root");
		ret = -EIO;
		goto out_close;
	}
	ret = iterate_csum_root(fs_info, csum_root);
	if (ret) {
		errno = -ret;
		error("failed to iterate csum tree: %m");
	}
	report_corrupted_blocks(fs_info, mode, mirror);
out_close:
	free_corrupted_blocks();
	close_ctree_fs_info(fs_info);
	return ret;
}