File: disk.c

package info (click to toggle)
xfsprogs 6.17.0-2
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 11,324 kB
  • sloc: ansic: 167,334; sh: 4,604; makefile: 1,336; python: 835; cpp: 5
file content (354 lines) | stat: -rw-r--r-- 8,605 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2018-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/statvfs.h>
#include <scsi/sg.h>
#include <linux/hdreg.h>
#include "platform_defs.h"
#include "libfrog/util.h"
#include "libfrog/paths.h"
#include "xfs_scrub.h"
#include "common.h"
#include "disk.h"
#include "platform_defs.h"

#ifndef BLKROTATIONAL
# define BLKROTATIONAL	_IO(0x12, 126)
#endif

/*
 * Disk Abstraction
 *
 * These routines help us to discover the geometry of a block device,
 * estimate the amount of concurrent IOs that we can send to it, and
 * abstract the process of performing read verification of disk blocks.
 */

/* Figure out how many disk heads are available. */
static unsigned int
__disk_heads(
	struct disk		*disk)
{
	int			iomin;
	int			ioopt;
	int			nproc = platform_nproc();
	unsigned short		rot;
	int			error;

	/* If it's not a block device, throw all the CPUs at it. */
	if (!S_ISBLK(disk->d_sb.st_mode))
		return nproc;

	/* Non-rotational device?  Throw all the CPUs at the problem. */
	rot = 1;
	error = ioctl(disk->d_fd, BLKROTATIONAL, &rot);
	if (error == 0 && rot == 0)
		return nproc;

	/*
	 * Sometimes we can infer the number of devices from the
	 * min/optimal IO sizes.
	 */
	iomin = ioopt = 0;
	if (ioctl(disk->d_fd, BLKIOMIN, &iomin) == 0 &&
	    ioctl(disk->d_fd, BLKIOOPT, &ioopt) == 0 &&
	    iomin > 0 && ioopt > 0) {
		return min(nproc, max(1, ioopt / iomin));
	}

	/* Rotating device?  I guess? */
	return 2;
}

/* Figure out how many disk heads are available. */
unsigned int
disk_heads(
	struct disk		*disk)
{
	if (force_nr_threads)
		return force_nr_threads;
	return __disk_heads(disk);
}

/*
 * Execute a SCSI VERIFY(16) to verify disk contents.
 * For devices that support this command, this can sharply reduce the
 * runtime of the data block verification phase if the storage device's
 * internal bandwidth exceeds its link bandwidth.  However, it only
 * works if we're talking to a raw SCSI device, and only if we trust the
 * firmware.
 */
#define SENSE_BUF_LEN		64
#define VERIFY16_CMDLEN	16
#define VERIFY16_CMD		0x8F

#ifndef SG_FLAG_Q_AT_TAIL
# define SG_FLAG_Q_AT_TAIL	0x10
#endif
static int
disk_scsi_verify(
	struct disk		*disk,
	uint64_t		startblock, /* lba */
	uint64_t		blockcount) /* lba */
{
	struct sg_io_hdr	iohdr;
	unsigned char		cdb[VERIFY16_CMDLEN];
	unsigned char		sense[SENSE_BUF_LEN];
	uint64_t		llba;
	uint64_t		veri_len = blockcount;
	int			error;

	assert(!debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"));

	llba = startblock + (disk->d_start >> BBSHIFT);

	/* Borrowed from sg_verify */
	cdb[0] = VERIFY16_CMD;
	cdb[1] = 0; /* skip PI, DPO, and byte check. */
	cdb[2] = (llba >> 56) & 0xff;
	cdb[3] = (llba >> 48) & 0xff;
	cdb[4] = (llba >> 40) & 0xff;
	cdb[5] = (llba >> 32) & 0xff;
	cdb[6] = (llba >> 24) & 0xff;
	cdb[7] = (llba >> 16) & 0xff;
	cdb[8] = (llba >> 8) & 0xff;
	cdb[9] = llba & 0xff;
	cdb[10] = (veri_len >> 24) & 0xff;
	cdb[11] = (veri_len >> 16) & 0xff;
	cdb[12] = (veri_len >> 8) & 0xff;
	cdb[13] = veri_len & 0xff;
	cdb[14] = 0;
	cdb[15] = 0;
	memset(sense, 0, SENSE_BUF_LEN);

	/* v3 SG_IO */
	memset(&iohdr, 0, sizeof(iohdr));
	iohdr.interface_id = 'S';
	iohdr.dxfer_direction = SG_DXFER_NONE;
	iohdr.cmdp = cdb;
	iohdr.cmd_len = VERIFY16_CMDLEN;
	iohdr.sbp = sense;
	iohdr.mx_sb_len = SENSE_BUF_LEN;
	iohdr.flags |= SG_FLAG_Q_AT_TAIL;
	iohdr.timeout = 30000; /* 30s */

	error = ioctl(disk->d_fd, SG_IO, &iohdr);
	if (error < 0)
		return error;

	dbg_printf("VERIFY(16) fd %d lba %"PRIu64" len %"PRIu64" info %x "
			"status %d masked %d msg %d host %d driver %d "
			"duration %d resid %d\n",
			disk->d_fd, startblock, blockcount, iohdr.info,
			iohdr.status, iohdr.masked_status, iohdr.msg_status,
			iohdr.host_status, iohdr.driver_status, iohdr.duration,
			iohdr.resid);

	if (iohdr.info & SG_INFO_CHECK) {
		dbg_printf("status: msg %x host %x driver %x\n",
				iohdr.msg_status, iohdr.host_status,
				iohdr.driver_status);
		errno = EIO;
		return -1;
	}

	return blockcount << BBSHIFT;
}

/* Test the availability of the kernel scrub ioctl. */
static bool
disk_can_scsi_verify(
	struct disk		*disk)
{
	int			error;

	if (debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"))
		return false;

	error = disk_scsi_verify(disk, 0, 1);
	return error == 0;
}

/* Open a disk device and discover its geometry. */
struct disk *
disk_open(
	const char		*pathname)
{
	struct hd_geometry	bdgeo;
	struct disk		*disk;
	bool			suspicious_disk = false;
	int			error;

	disk = calloc(1, sizeof(struct disk));
	if (!disk)
		return NULL;

	disk->d_fd = open(pathname, O_RDONLY | O_DIRECT | O_NOATIME);
	if (disk->d_fd < 0)
		goto out_free;

	/* Try to get LBA size. */
	error = ioctl(disk->d_fd, BLKSSZGET, &disk->d_lbasize);
	if (error)
		disk->d_lbasize = 512;
	disk->d_lbalog = log2_roundup(disk->d_lbasize);

	/* Obtain disk's stat info. */
	error = fstat(disk->d_fd, &disk->d_sb);
	if (error)
		goto out_close;

	/* Determine bdev size, block size, and offset. */
	if (S_ISBLK(disk->d_sb.st_mode)) {
		error = ioctl(disk->d_fd, BLKGETSIZE64, &disk->d_size);
		if (error)
			disk->d_size = 0;
		error = ioctl(disk->d_fd, BLKBSZGET, &disk->d_blksize);
		if (error)
			disk->d_blksize = 0;
		error = ioctl(disk->d_fd, HDIO_GETGEO, &bdgeo);
		if (!error) {
			/*
			 * dm devices will pass through ioctls, which means
			 * we can't use SCSI VERIFY unless the start is 0.
			 * Most dm devices don't set geometry (unlike scsi
			 * and nvme) so use a zeroed out CHS to screen them
			 * out.
			 */
			if (bdgeo.start != 0 &&
			    (unsigned long long)bdgeo.heads * bdgeo.sectors *
					bdgeo.sectors == 0)
				suspicious_disk = true;
			disk->d_start = bdgeo.start << BBSHIFT;
		} else
			disk->d_start = 0;
	} else {
		disk->d_size = disk->d_sb.st_size;
		disk->d_blksize = disk->d_sb.st_blksize;
		disk->d_start = 0;
	}

	/* Can we issue SCSI VERIFY? */
	if (!suspicious_disk && disk_can_scsi_verify(disk))
		disk->d_flags |= DISK_FLAG_SCSI_VERIFY;

	return disk;
out_close:
	close(disk->d_fd);
out_free:
	free(disk);
	return NULL;
}

/* Close a disk device. */
int
disk_close(
	struct disk		*disk)
{
	int			error = 0;

	if (disk->d_fd >= 0)
		error = close(disk->d_fd);
	disk->d_fd = -1;
	free(disk);
	return error;
}

#define BTOLBAT(d, bytes)	((uint64_t)(bytes) >> (d)->d_lbalog)
#define LBASIZE(d)		(1ULL << (d)->d_lbalog)
#define BTOLBA(d, bytes)	(((uint64_t)(bytes) + LBASIZE(d) - 1) >> (d)->d_lbalog)

/* Simulate disk errors. */
static int
disk_simulate_read_error(
	struct disk		*disk,
	uint64_t		start,
	uint64_t		*length)
{
	static int64_t		interval;
	uint64_t		start_interval;

	/* Simulated disk errors are disabled. */
	if (interval < 0)
		return 0;

	/* Figure out the disk read error interval. */
	if (interval == 0) {
		char		*p;

		/* Pretend there's bad media every so often, in bytes. */
		p = getenv("XFS_SCRUB_DISK_ERROR_INTERVAL");
		if (p == NULL) {
			interval = -1;
			return 0;
		}
		interval = strtoull(p, NULL, 10);
		interval &= ~((1U << disk->d_lbalog) - 1);
	}
	if (interval <= 0) {
		interval = -1;
		return 0;
	}

	/*
	 * We simulate disk errors by pretending that there are media errors at
	 * predetermined intervals across the disk.  If a read verify request
	 * crosses one of those intervals we shorten it so that the next read
	 * will start on an interval threshold.  If the read verify request
	 * starts on an interval threshold, we send back EIO as if it had
	 * failed.
	 */
	if ((start % interval) == 0) {
		dbg_printf("fd %d: simulating disk error at %"PRIu64".\n",
				disk->d_fd, start);
		return EIO;
	}

	start_interval = start / interval;
	if (start_interval != (start + *length) / interval) {
		*length = ((start_interval + 1) * interval) - start;
		dbg_printf(
"fd %d: simulating short read at %"PRIu64" to length %"PRIu64".\n",
				disk->d_fd, start, *length);
	}

	return 0;
}

/* Read-verify an extent of a disk device. */
ssize_t
disk_read_verify(
	struct disk		*disk,
	void			*buf,
	uint64_t		start,
	uint64_t		length)
{
	if (debug) {
		int		ret;

		ret = disk_simulate_read_error(disk, start, &length);
		if (ret) {
			errno = ret;
			return -1;
		}

		/* Don't actually issue the IO */
		if (getenv("XFS_SCRUB_DISK_VERIFY_SKIP"))
			return length;
	}

	/* Convert to logical block size. */
	if (disk->d_flags & DISK_FLAG_SCSI_VERIFY)
		return disk_scsi_verify(disk, BTOLBAT(disk, start),
				BTOLBA(disk, length));

	return pread(disk->d_fd, buf, length, start);
}