diff options
author | Boris Burkov <boris@bur.io> | 2024-03-11 12:13:44 -0700 |
---|---|---|
committer | Zorro Lang <zlang@kernel.org> | 2024-04-28 19:08:22 +0800 |
commit | bc9debb83c9867b2cdfd19e3a304616749f1912c (patch) | |
tree | 5460d1a6b85b3dd3d9cf42b8081d04fc012f6b64 | |
parent | b9785de77fcb7b4c9f58d6d7178a0ef63a059b84 (diff) |
btrfs: new test for devt change between mounts
It is possible to confuse the btrfs device cache (fs_devices) by
starting with a multi-device filesystem, then removing and re-adding a
device in a way which changes its dev_t while the filesystem is
unmounted. After this procedure, if we remount, then we are in a funny
state where struct btrfs_device's "devt" field does not match the bd_dev
of the "bdev" field. I would say this is bad enough, as we have violated
a pretty clear invariant.
But for style points, we can then remove the extra device from the fs,
making it a single device fs, which enables the "temp_fsid" feature,
which permits multiple separate mounts of different devices with the
same fsid. Since btrfs is confused and *thinks* there are different
devices (based on device->devt), it allows a second redundant mount of
the same device (not a bind mount!). This then allows us to corrupt the
original mount by doing stuff to the one that should be a bind mount.
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
[ use _create_loop_device, renamed $MNT $BIND and rm them before mkdir ]
Signed-off-by: Zorro Lang <zlang@kernel.org>
[ update the commit id of _fixed_by_kernel_commit ]
-rw-r--r-- | common/config | 1 | ||||
-rwxr-xr-x | tests/btrfs/318 | 107 | ||||
-rw-r--r-- | tests/btrfs/318.out | 2 |
3 files changed, 110 insertions, 0 deletions
diff --git a/common/config b/common/config index 6a0496fd..eac0710e 100644 --- a/common/config +++ b/common/config @@ -235,6 +235,7 @@ export BLKZONE_PROG="$(type -P blkzone)" export GZIP_PROG="$(type -P gzip)" export BTRFS_IMAGE_PROG="$(type -P btrfs-image)" export BTRFS_MAP_LOGICAL_PROG=$(type -P btrfs-map-logical) +export PARTED_PROG="$(type -P parted)" # use 'udevadm settle' or 'udevsettle' to wait for lv to be settled. # newer systems have udevadm command but older systems like RHEL5 don't. diff --git a/tests/btrfs/318 b/tests/btrfs/318 new file mode 100755 index 00000000..1eb72625 --- /dev/null +++ b/tests/btrfs/318 @@ -0,0 +1,107 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2024 Meta, Inc. All Rights Reserved. +# +# FS QA Test No. 318 +# +# Test an edge case of multi device volume management in btrfs. +# If a device changes devt between mounts of a multi device fs, we can trick +# btrfs into mounting the same device twice fully (not as a bind mount). From +# there, it is trivial to induce corruption. +# +. ./common/preamble +_begin_fstest auto quick volume scrub tempfsid + +_fixed_by_kernel_commit 9f7eb8405dcb \ + "btrfs: validate device maj:min during open" + +# real QA test starts here +_supported_fs btrfs +_require_test +_require_command "$PARTED_PROG" parted +_require_batched_discard "$TEST_DIR" + +_cleanup() { + cd / + $UMOUNT_PROG $MNT + $UMOUNT_PROG $BIND + losetup -d $DEV0 $DEV1 $DEV2 + rm -f $IMG0 $IMG1 $IMG2 + rm -rf $MNT $BIND +} + +IMG0=$TEST_DIR/$$.img0 +IMG1=$TEST_DIR/$$.img1 +IMG2=$TEST_DIR/$$.img2 +truncate -s 1G $IMG0 +truncate -s 1G $IMG1 +truncate -s 1G $IMG2 +DEV0=$(_create_loop_device $IMG0) +DEV1=$(_create_loop_device $IMG1) +DEV2=$(_create_loop_device $IMG2) +D0P1=$DEV0"p1" +D1P1=$DEV1"p1" +MNT=$TEST_DIR/mnt-${seq} +BIND=$TEST_DIR/bind-${seq} + +# Setup partition table with one partition on each device. +$PARTED_PROG $DEV0 'mktable gpt' --script +$PARTED_PROG $DEV1 'mktable gpt' --script +$PARTED_PROG $DEV0 'mkpart mypart 1M 100%' --script +$PARTED_PROG $DEV1 'mkpart mypart 1M 100%' --script + +# mkfs with two devices to avoid clearing devices on close +# single raid to allow removing DEV2. +$MKFS_BTRFS_PROG -f -msingle -dsingle $D0P1 $DEV2 >>$seqres.full 2>&1 || _fail "failed to mkfs.btrfs" + +# Cycle mount the two device fs to populate both devices into the +# stale device cache. +rm -rf $MNT +mkdir -p $MNT +_mount $D0P1 $MNT +$UMOUNT_PROG $MNT + +# Swap the partition dev_ts. This leaves the dev_t in the cache out of date. +$PARTED_PROG $DEV0 'rm 1' --script +$PARTED_PROG $DEV1 'rm 1' --script +$PARTED_PROG $DEV1 'mkpart mypart 1M 100%' --script +$PARTED_PROG $DEV0 'mkpart mypart 1M 100%' --script + +# Mount with mismatched dev_t! +_mount $D0P1 $MNT + +# Remove the extra device to bring temp-fsid back in the fray. +$BTRFS_UTIL_PROG device remove $DEV2 $MNT + +# Create the would be bind mount. +rm -rf $BIND +mkdir -p $BIND +_mount $D0P1 $BIND +mount_show=$($BTRFS_UTIL_PROG filesystem show $MNT) +bind_show=$($BTRFS_UTIL_PROG filesystem show $BIND) +# If they're different, we are in trouble. +[ "$mount_show" = "$bind_show" ] || echo "$mount_show != $bind_show" + +# Now really prove it by corrupting the first mount with the second. +for i in $(seq 20); do + $XFS_IO_PROG -f -c "pwrite 0 50M" $MNT/foo.$i >>$seqres.full 2>&1 +done +for i in $(seq 20); do + $XFS_IO_PROG -f -c "pwrite 0 50M" $BIND/foo.$i >>$seqres.full 2>&1 +done + +# sync so that we really write the large file data out to the shared device +sync + +# now delete from one view of the shared device +find $BIND -type f -delete +# sync so that fstrim definitely has deleted data to trim +sync +# This should blow up both mounts, if the writes somehow didn't overlap at all. +$FSTRIM_PROG $BIND +# drop caches to improve the odds we read from the corrupted device while scrubbing. +echo 3 > /proc/sys/vm/drop_caches +$BTRFS_UTIL_PROG scrub start -B $MNT | grep "Error summary:" + +status=0 +exit diff --git a/tests/btrfs/318.out b/tests/btrfs/318.out new file mode 100644 index 00000000..2798c4ba --- /dev/null +++ b/tests/btrfs/318.out @@ -0,0 +1,2 @@ +QA output created by 318 +Error summary: no errors found |