1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
|
#!/usr/bin/env bash
#
# Test case for image corruption (overlapping data structures) in qcow2
#
# Copyright (C) 2013 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=mreitz@redhat.com
seq="$(basename $0)"
echo "QA output created by $seq"
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# Sometimes the error line might be dumped before/after an event
# randomly. Mask it out for specific test that may trigger this
# uncertainty for current test for now.
_filter_io_error()
{
sed '/Input\/output error/d'
}
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
# This tests qcow2-specific low-level functionality
_supported_fmt qcow2
_supported_proto file
_supported_os Linux
# These tests only work for compat=1.1 images without an external
# data file with refcount_bits=16
_unsupported_imgopts 'compat=0.10' data_file \
'refcount_bits=\([^1]\|.\([^6]\|$\)\)'
# The repair process will create a large file - so check for availability first
_require_large_file 64G
rt_offset=65536 # 0x10000 (XXX: just an assumption)
rb_offset=131072 # 0x20000 (XXX: just an assumption)
l1_offset=196608 # 0x30000 (XXX: just an assumption)
l2_offset=262144 # 0x40000 (XXX: just an assumption)
l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
OPEN_RW="open -o overlap-check=all $TEST_IMG"
# Overlap checks are done before write operations only, therefore opening an
# image read-only makes the overlap-check option irrelevant
OPEN_RO="open -r $TEST_IMG"
echo
echo "=== Testing L2 reference into L1 ==="
echo
_make_test_img 64M
# Link first L1 entry (first L2 table) onto itself
# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
# later write will result in a COW operation, effectively ruining this attempt
# on image corruption)
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
_check_test_img
# The corrupt bit should not be set anyway
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to write something, thereby forcing the corrupt bit to be set
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
# The corrupt bit must now be set
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# This information should be available through qemu-img info
_img_info --format-specific
# Try to open the image R/W (which should fail)
$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
| _filter_testdir \
| _filter_imgfmt
# Try to open it RO (which should succeed)
$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
# We could now try to fix the image, but this would probably fail (how should an
# L2 table linked onto the L1 table be fixed?)
echo
echo "=== Testing cluster data reference into refcount block ==="
echo
_make_test_img 64M
# Allocate L2 table
truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
# Mark cluster as used
poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
# Redirect new data cluster onto refcount block
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
_check_test_img
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to fix it
_check_test_img -r all
# The corrupt bit should be cleared
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Look if it's really really fixed
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
echo
echo "=== Testing cluster data reference into inactive L2 table ==="
echo
_make_test_img 64M
$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
$QEMU_IMG snapshot -c foo "$TEST_IMG"
$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
# The inactive L2 table remains at its old offset
poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
"\x80\x00\x00\x00\x00\x04\x00\x00"
_check_test_img
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
_check_test_img -r all
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Check data
$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
$QEMU_IMG snapshot -a foo "$TEST_IMG"
_check_test_img
$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
echo
echo "=== Testing overlap while COW is in flight ==="
echo
BACKING_IMG=$TEST_IMG.base
TEST_IMG=$BACKING_IMG _make_test_img 1G
$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
_make_test_img -b "$BACKING_IMG" -F $IMGFMT 1G
# Write two clusters, the second one enforces creation of an L2 table after
# the first data cluster.
$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
# Free the first cluster. This cluster will soon enough be reallocated and
# used for COW.
poke_file "$TEST_IMG" "$l2_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
poke_file "$TEST_IMG" "$(($rb_offset+10))" "\x00\x00"
# Now, corrupt the image by marking the second L2 table cluster as free.
poke_file "$TEST_IMG" "$(($rb_offset+12))" "\x00\x00"
# Start a write operation requiring COW on the image stopping it right before
# doing the read; then, trigger the corruption prevention by writing anything to
# any unallocated cluster, leading to an attempt to overwrite the second L2
# table. Finally, resume the COW write and see it fail (but not crash).
echo "open -o file.driver=blkdebug $TEST_IMG
break cow_read 0
aio_write 0k 1k
wait_break 0
write 64k 64k
resume 0" | $QEMU_IO | _filter_qemu_io
echo
echo "=== Testing unallocated image header ==="
echo
_make_test_img 64M
# Create L1/L2
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing unaligned L1 entry ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
# aligned or not does not matter
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
# Test how well zero cluster expansion can cope with this
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
echo
echo "=== Testing unaligned L2 entry ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing unaligned pre-allocated zero cluster ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
# zero cluster expansion
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
echo
echo "=== Testing unaligned reftable entry ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing non-fatal corruption on freeing ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing read-only corruption report ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
# Should only emit a single error message
$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
echo
echo "=== Testing non-fatal and then fatal corruption report ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
# Should emit two error messages
$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing empty refcount table ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
# Since the first data cluster is already allocated this triggers an
# allocation with an explicit offset (using qcow2_alloc_clusters_at())
# causing a refcount block to be allocated at offset 0
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing empty refcount block ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing empty refcount block with compressed write ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
# The previous write already allocated an L2 table, so now this new
# write will try to allocate a compressed data cluster at offset 0.
$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing zero refcount table size ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
# Repair the image
_check_test_img -r all
echo
echo "=== Testing incorrect refcount table offset ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing dirty corrupt image ==="
echo
_make_test_img 64M
# Let the refblock appear unaligned
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00"
# Mark the image dirty, thus forcing an automatic check when opening it
poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
# Open the image (qemu should refuse to do so)
$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
echo '--- Repairing ---'
# The actual repair should have happened (because of the dirty bit),
# but some cleanup may have failed (like freeing the old reftable)
# because the image was already marked corrupt by that point
_check_test_img -r all
echo
echo "=== Writing to an unaligned preallocated zero cluster ==="
echo
_make_test_img 64M
# Allocate the L2 table
$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
# Pretend there is a preallocated zero cluster somewhere inside the
# image header
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
# Let's write to it!
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
echo '--- Repairing ---'
_check_test_img -r all
echo
echo '=== Discarding with an unaligned refblock ==='
echo
_make_test_img 64M
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
# Make our refblock unaligned
poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
# Now try to discard something that will be submitted as two requests
# (main part + tail)
$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
echo '--- Repairing ---'
# Fails the first repair because the corruption prevents the check
# function from double-checking
# (Using -q for the first invocation, because otherwise the
# double-check error message appears above the summary for some
# reason -- so let's just hide the summary)
_check_test_img -q -r all
_check_test_img -r all
echo
echo "=== Discarding an out-of-bounds refblock ==="
echo
_make_test_img 64M
# Pretend there's a refblock really up high
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
# Let's try to shrink the qcow2 image so that the block driver tries
# to discard that refblock (and see what happens!)
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
echo '--- Checking and retrying ---'
# Image should not be resized
_img_info | grep 'virtual size'
# But it should pass this check, because the "partial" resize has
# already overwritten refblocks past the end
_check_test_img -r all
# So let's try again
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
_img_info | grep 'virtual size'
echo
echo "=== Discarding a non-covered in-bounds refblock ==="
echo
_make_test_img -o 'refcount_bits=1' 64M
# Pretend there's a refblock somewhere where there is no refblock to
# cover it (but the covering refblock has a valid index in the
# reftable)
# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
# to 0x10_0000_0000 (64G) to point to the third refblock
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
echo '--- Checking and retrying ---'
# Image should not be resized
_img_info | grep 'virtual size'
# But it should pass this check, because the "partial" resize has
# already overwritten refblocks past the end
_check_test_img -r all
# So let's try again
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
_img_info | grep 'virtual size'
echo
echo "=== Discarding a refblock covered by an unaligned refblock ==="
echo
_make_test_img -o 'refcount_bits=1' 64M
# Same as above
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
# But now we actually "create" an unaligned third refblock
poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
echo '--- Repairing ---'
# Fails the first repair because the corruption prevents the check
# function from double-checking
# (Using -q for the first invocation, because otherwise the
# double-check error message appears above the summary for some
# reason -- so let's just hide the summary)
_check_test_img -q -r all
_check_test_img -r all
echo
echo "=== Testing the QEMU shutdown with a corrupted image ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
echo "{'execute': 'qmp_capabilities'}
{'execute': 'human-monitor-command',
'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
{'execute': 'quit'}" \
| $QEMU -qmp stdio -nographic -nodefaults \
-drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
| _filter_qmp | _filter_qemu_io
echo
echo "=== Testing incoming inactive corrupted image ==="
echo
_make_test_img 64M
# Create an unaligned L1 entry, so qemu will signal a corruption when
# reading from the covered area
poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
# Inactive images are effectively read-only images, so this should be a
# non-fatal corruption (which does not modify the image)
echo "{'execute': 'qmp_capabilities'}
{'execute': 'human-monitor-command',
'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
{'execute': 'quit'}" \
| $QEMU -qmp stdio -nographic -nodefaults \
-blockdev "{'node-name': 'drive',
'driver': 'qcow2',
'file': {
'driver': 'file',
'filename': '$TEST_IMG'
}}" \
-incoming exec:'cat /dev/null' \
2>&1 \
| _filter_qmp | _filter_qemu_io | _filter_io_error
echo
# Image should not have been marked corrupt
_img_info --format-specific | grep 'corrupt:'
# success, all done
echo "*** done"
rm -f $seq.full
status=0
|