1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
|
# SPDX-License-Identifier: CDDL-1.0
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
# Copyright (c) 2012, 2019 by Delphix. All rights reserved.
# Copyright 2016 Nexenta Systems, Inc.
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
# Copyright (c) 2017 Datto Inc.
# Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
# Copyright 2019 Richard Elling
#
#
# Returns SCSI host number for the given disk
#
function get_scsi_host #disk
{
typeset disk=$1
ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
}
#
# Cause a scan of all scsi host adapters by default
#
# $1 optional host number
#
function scan_scsi_hosts
{
typeset hostnum=${1}
if is_linux; then
if [[ -z $hostnum ]]; then
for host in /sys/class/scsi_host/host*; do
log_must eval "echo '- - -' > $host/scan"
done
else
log_note "/sys/class/scsi_host/host$hostnum/scan"
log_must eval \
"echo '- - -' > /sys/class/scsi_host/host$hostnum/scan"
fi
fi
}
#
# Wait for newly created block devices to have their minors created.
# Additional arguments can be passed to udevadm trigger, with the expected
# arguments to typically be a block device pathname. This is useful when
# checking waiting on a specific device to settle rather than triggering
# all devices and waiting for them all to settle.
#
# The udevadm settle timeout can be 120 or 180 seconds by default for
# some distros. If a long delay is experienced, it could be due to some
# strangeness in a malfunctioning device that isn't related to the devices
# under test. To help debug this condition, a notice is given if settle takes
# too long.
#
# Note: there is no meaningful return code if udevadm fails. Consumers
# should not expect a return code (do not call as argument to log_must)
#
function block_device_wait
{
if is_linux; then
udevadm trigger $* 2>/dev/null
typeset start=$SECONDS
udevadm settle
typeset elapsed=$((SECONDS - start))
[[ $elapsed > 60 ]] && \
log_note udevadm settle time too long: $elapsed
elif is_freebsd; then
if [[ ${#@} -eq 0 ]]; then
# Do something that has to go through the geom event
# queue to complete.
sysctl kern.geom.conftxt >/dev/null
return
fi
fi
# Poll for the given paths to appear, but give up eventually.
typeset -i i
for (( i = 0; i < 5; ++i )); do
typeset missing=false
typeset dev
for dev in "${@}"; do
if ! [[ -e $dev ]]; then
missing=true
break
fi
done
if ! $missing; then
break
fi
sleep ${#@}
done
}
#
# Check if the given device is physical device
#
function is_physical_device #device
{
typeset device=${1#$DEV_DSKDIR/}
device=${device#$DEV_RDSKDIR/}
if is_linux; then
is_disk_device "$DEV_DSKDIR/$device" && \
[ -f /sys/module/loop/parameters/max_part ]
elif is_freebsd; then
is_disk_device "$DEV_DSKDIR/$device" && \
echo $device | grep -qE \
-e '^a?da[0-9]+$' \
-e '^md[0-9]+$' \
-e '^mfid[0-9]+$' \
-e '^nda[0-9]+$' \
-e '^nvd[0-9]+$' \
-e '^vtbd[0-9]+$'
else
echo $device | grep -qE "^c[0-F]+([td][0-F]+)+$"
fi
}
#
# Check if the given device is a real device (ie SCSI device)
#
function is_real_device #disk
{
typeset disk=$1
[[ -z $disk ]] && log_fail "No argument for disk given."
if is_linux; then
lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
grep -q disk
fi
}
#
# Check if the given device is a loop device
#
function is_loop_device #disk
{
typeset disk=$1
[[ -z $disk ]] && log_fail "No argument for disk given."
if is_linux; then
lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
grep -q loop
fi
}
#
# Linux:
# Check if the given device is a multipath device and if there is a symbolic
# link to a device mapper and to a disk
# Currently no support for dm devices alone without multipath
#
# FreeBSD:
# Check if the given device is a gmultipath device.
#
# Others:
# No multipath detection.
#
function is_mpath_device #disk
{
typeset disk=$1
[[ -z $disk ]] && log_fail "No argument for disk given."
if is_linux; then
if lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \
grep -q mpath; then
readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1
else
false
fi
elif is_freebsd; then
is_disk_device $DEV_MPATHDIR/$disk
else
false
fi
}
#
# Check if the given path is the appropriate sort of device special node.
#
function is_disk_device #path
{
typeset path=$1
if is_freebsd; then
# FreeBSD doesn't have block devices, only character devices.
test -c $path
else
test -b $path
fi
}
# Set the slice prefix for disk partitioning depending
# on whether the device is a real, multipath, or loop device.
# Currently all disks have to be of the same type, so only
# checks first disk to determine slice prefix.
#
function set_slice_prefix
{
typeset disk
typeset -i i=0
if is_linux; then
while (( i < $DISK_ARRAY_NUM )); do
disk="$(echo $DISKS | awk '{print $(i + 1)}')"
if is_mpath_device $disk && ! echo $disk | awk 'substr($1,18,1) ~ /^[[:digit:]]+$/ {exit 1}' || is_real_device $disk; then
export SLICE_PREFIX=""
return 0
elif is_mpath_device $disk || is_loop_device $disk; then
export SLICE_PREFIX="p"
return 0
else
log_fail "$disk not supported for partitioning."
fi
(( i = i + 1))
done
fi
}
#
# Set the directory path of the listed devices in $DISK_ARRAY_NUM
# Currently all disks have to be of the same type, so only
# checks first disk to determine device directory
# default = /dev (linux)
# real disk = /dev (linux)
# multipath device = /dev/mapper (linux)
#
function set_device_dir
{
typeset disk
typeset -i i=0
if is_linux; then
while (( i < $DISK_ARRAY_NUM )); do
disk="$(echo $DISKS | awk '{print $(i + 1)}')"
if is_mpath_device $disk; then
export DEV_DSKDIR=$DEV_MPATHDIR
return 0
else
export DEV_DSKDIR=$DEV_RDSKDIR
return 0
fi
(( i = i + 1))
done
else
export DEV_DSKDIR=$DEV_RDSKDIR
fi
}
#
# Get the directory path of given device
#
function get_device_dir #device
{
typeset device=$1
if ! is_freebsd && ! is_physical_device $device; then
if [[ $device != "/" ]]; then
device=${device%/*}
fi
if is_disk_device "$DEV_DSKDIR/$device"; then
device="$DEV_DSKDIR"
fi
echo $device
else
echo "$DEV_DSKDIR"
fi
}
#
# Get persistent name for given disk
#
function get_persistent_disk_name #device
{
typeset device=$1
if is_linux; then
if is_real_device $device; then
udevadm info -q all -n $DEV_DSKDIR/$device \
| awk '/disk\/by-id/ {print $2; exit}' | cut -d/ -f3-
elif is_mpath_device $device; then
udevadm info -q all -n $DEV_DSKDIR/$device \
| awk '/disk\/by-id\/dm-uuid/ {print $2; exit}' \
| cut -d/ -f3
else
echo $device
fi
else
echo $device
fi
}
#
# Online or offline a disk on the system
#
# First checks state of disk. Test will fail if disk is not properly onlined
# or offlined. Online is a full rescan of SCSI disks by echoing to every
# host entry.
#
function on_off_disk # disk state{online,offline} host
{
typeset disk=$1
typeset state=$2
typeset host=$3
[[ -z $disk ]] || [[ -z $state ]] && \
log_fail "Arguments invalid or missing"
if is_linux; then
if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
dep="$(ls /sys/block/${dm_name}/slaves | awk '{print $1}')"
while [[ -n $dep ]]; do
#check if disk is online
if lsscsi | grep -qF $dep; then
dep_dir="/sys/block/${dm_name}"
dep_dir+="/slaves/${dep}/device"
ss="${dep_dir}/state"
sd="${dep_dir}/delete"
log_must eval "echo 'offline' > ${ss}"
log_must eval "echo '1' > ${sd}"
if lsscsi | grep -qF $dep; then
log_fail "Offlining $disk failed"
fi
fi
dep="$(ls /sys/block/$dm_name/slaves 2>/dev/null | awk '{print $1}')"
done
elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
#check if disk is online
if lsscsi | grep -qF $disk; then
dev_state="/sys/block/$disk/device/state"
dev_delete="/sys/block/$disk/device/delete"
log_must eval "echo 'offline' > ${dev_state}"
log_must eval "echo '1' > ${dev_delete}"
if lsscsi | grep -qF $disk; then
log_fail "Offlining $disk failed"
fi
else
log_note "$disk is already offline"
fi
elif [[ $state == "online" ]]; then
#force a full rescan
scan_scsi_hosts $host
block_device_wait
if is_mpath_device $disk; then
dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
dep="$(ls /sys/block/$dm_name/slaves | awk '{print $1}')"
if lsscsi | grep -qF $dep; then
log_fail "Onlining $disk failed"
fi
elif is_real_device $disk; then
block_device_wait
typeset -i retries=0
while ! lsscsi | grep -qF $disk; do
if (( $retries > 2 )); then
log_fail "Onlining $disk failed"
break
fi
(( ++retries ))
sleep 1
done
else
log_fail "$disk is not a real dev"
fi
else
log_fail "$disk failed to $state"
fi
fi
}
#
# Simulate disk removal
#
function remove_disk #disk
{
typeset disk=$1
on_off_disk $disk "offline"
block_device_wait
}
#
# Simulate disk insertion for the given SCSI host
#
function insert_disk #disk scsi_host
{
typeset disk=$1
typeset scsi_host=$2
on_off_disk $disk "online" $scsi_host
block_device_wait
}
#
# Load scsi_debug module with specified parameters
# $blksz can be either one of: < 512b | 512e | 4Kn >
#
function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
{
typeset devsize=$1
typeset hosts=$2
typeset tgts=$3
typeset luns=$4
typeset blksz=$5
[[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
[[ -z $luns ]] || [[ -z $blksz ]] && \
log_fail "Arguments invalid or missing"
case "$5" in
'512b')
typeset sector=512
typeset blkexp=0
;;
'512e')
typeset sector=512
typeset blkexp=3
;;
'4Kn')
typeset sector=4096
typeset blkexp=0
;;
*) log_fail "Unsupported blksz value: $5" ;;
esac
if is_linux; then
modprobe -n scsi_debug ||
log_unsupported "Platform does not have scsi_debug module"
if lsmod | grep -q scsi_debug; then
log_fail "scsi_debug module already installed"
else
log_must modprobe scsi_debug dev_size_mb=$devsize \
add_host=$hosts num_tgts=$tgts max_luns=$luns \
sector_size=$sector physblk_exp=$blkexp
block_device_wait
if ! lsscsi | grep -q scsi_debug; then
log_fail "scsi_debug module install failed"
fi
fi
fi
}
#
# Unload scsi_debug module, if needed.
#
function unload_scsi_debug
{
log_must_retry "in use" 5 modprobe -r scsi_debug
}
#
# Get scsi_debug device name.
# Returns basename of scsi_debug device (for example "sdb").
#
function get_debug_device
{
for i in {1..10} ; do
val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3)
# lsscsi can take time to settle
if [ "$val" != "-" ] ; then
break
fi
sleep 1
done
echo "$val"
}
#
# Get actual devices used by the pool (i.e. linux sdb1 not sdb).
#
function get_pool_devices #testpool #devdir
{
typeset testpool=$1
typeset devdir=$2
typeset out=""
case "$UNAME" in
Linux|FreeBSD)
zpool status -P $testpool | awk -v d="$devdir" '$1 ~ d {sub(d "/", ""); printf("%s ", $1)}'
;;
esac
}
#
# Write to standard out giving the level, device name, offset and length
# of all blocks in an input file. The offset and length are in units of
# 512 byte blocks. In the case of mirrored vdevs, only the first
# device is listed, as the levels, blocks and offsets will be the same
# on other devices. Note that this function only works with mirrored
# or non-redundant pools, not raidz.
#
# The output of this function can be used to introduce corruption at
# varying levels of indirection.
#
function list_file_blocks # input_file
{
typeset input_file=$1
[[ -f $input_file ]] || log_fail "Couldn't find $input_file"
typeset ds="$(zfs list -H -o name $input_file)"
typeset pool="${ds%%/*}"
typeset objnum="$(get_objnum $input_file)"
#
# Establish a mapping between vdev ids as shown in a DVA and the
# pathnames they correspond to in ${VDEV_MAP[][]}.
#
# The vdev bits in a DVA refer to the top level vdev id.
# ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev.
#
eval $(zdb -C $pool | awk '
BEGIN { printf "typeset -a VDEV_MAP;" }
function subscript(s) {
# "[#]" is more convenient than the bare "#"
match(s, /\[[0-9]*\]/)
return substr(s, RSTART, RLENGTH)
}
id && !/^ / {
# left a top level vdev
id = 0
}
id && $1 ~ /^path:$/ {
# found a vdev path; save it in the map
printf "VDEV_MAP%s%s=%s;", id, child, $2
}
/^ children/ {
# entering a top level vdev
id = subscript($0)
child = "[0]" # default in case there is no nested vdev
printf "typeset -a VDEV_MAP%s;", id
}
/^ children/ {
# entering a nested vdev (e.g. child of a top level mirror)
child = subscript($0)
}
')
#
# The awk below parses the output of zdb, printing out the level
# of each block along with vdev id, offset and length. The last
# two are converted to decimal in the while loop. 4M is added to
# the offset to compensate for the first two labels and boot
# block. Lastly, the offset and length are printed in units of
# 512B blocks for ease of use with dd.
#
typeset level vdev path offset length
sync_all_pools true
zdb -dddddd $ds $objnum | awk '
/^$/ { looking = 0 }
looking {
level = $2
field = 3
while (split($field, dva, ":") == 3) {
print level, int(dva[1]), "0x"dva[2], "0x"dva[3]
++field
}
}
/^Indirect blocks:/ { looking = 1 }
' | \
while read level vdev offset length; do
for path in ${VDEV_MAP[$vdev][@]}; do
echo "$level $path $(( ($offset + (4<<20)) / 512 ))" \
"$(( $length / 512 ))"
done
done 2>/dev/null
}
function corrupt_blocks_at_level # input_file corrupt_level
{
typeset input_file=$1
typeset corrupt_level="L${2:-0}"
typeset level path offset length
[[ -f $input_file ]] || log_fail "Couldn't find $input_file"
if is_freebsd; then
# Temporarily allow corrupting an inuse device.
debugflags=$(sysctl -n kern.geom.debugflags)
sysctl kern.geom.debugflags=16
fi
list_file_blocks $input_file | \
while read level path offset length; do
if [[ $level = $corrupt_level ]]; then
log_must dd if=/dev/urandom of=$path bs=512 \
count=$length seek=$offset conv=notrunc
fi
done
if is_freebsd; then
sysctl kern.geom.debugflags=$debugflags
fi
# This is necessary for pools made of loop devices.
sync
}
function corrupt_label_checksum # label_number vdev_path
{
typeset label_size=$((256*1024))
typeset vdev_size=$(stat_size ${2})
typeset -a offsets=("$((128*1024 - 32))" \
"$(($label_size + (128*1024 - 32)))" \
"$(($vdev_size - $label_size - (128*1024 + 32)))" \
"$(($vdev_size - (128*1024 + 32)))")
dd if=/dev/urandom of=${2} seek=${offsets[$1]} bs=1 count=32 \
conv=notrunc
}
|