summaryrefslogtreecommitdiff
path: root/tests/fs/bcachefs/replication.ktest
diff options
context:
space:
mode:
Diffstat (limited to 'tests/fs/bcachefs/replication.ktest')
-rwxr-xr-xtests/fs/bcachefs/replication.ktest266
1 files changed, 258 insertions, 8 deletions
diff --git a/tests/fs/bcachefs/replication.ktest b/tests/fs/bcachefs/replication.ktest
index 14c74f7..cadeaa1 100755
--- a/tests/fs/bcachefs/replication.ktest
+++ b/tests/fs/bcachefs/replication.ktest
@@ -62,7 +62,7 @@ test_twodevices()
test_largebuckets()
{
- set_watchdog 30
+ set_watchdog 60
run_quiet "" bcachefs format -f \
--bucket_size=8M \
${ktest_scratch_dev[0]} \
@@ -425,10 +425,10 @@ test_device_set_state_offline()
mount -t bcachefs ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]}:${ktest_scratch_dev[2]} /mnt
local fioout="$ktest_out/fio-out"
- run_fio_randrw >"$fioout" 2>&1 &
+ run_fio_randrw --runtime=60 >"$fioout" 2>&1 &
local fiopid=$!
- sleep 1
+ sleep 10
bcachefs device set-state --force ro ${ktest_scratch_dev[1]}
@@ -460,10 +460,10 @@ test_device_readd()
mount -t bcachefs ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
local fioout="$ktest_out/fio-out"
- run_fio_randrw >"$fioout" 2>&1 &
+ run_fio_randrw --runtime=60 >"$fioout" 2>&1 &
local fiopid=$!
- sleep 1
+ sleep 10
echo -n "offlining ${ktest_scratch_dev[0]}... "
bcachefs device offline --force ${ktest_scratch_dev[0]}
@@ -500,11 +500,11 @@ test_device_repeated_add_remove()
for ii in {1..10}; do
echo "add-remove run #$ii ----------------------------------------------------"
- echo "bcachefs device add"
+ echo "bcachefs device add ${ktest_scratch_dev[1]}"
bcachefs device add -f /mnt ${ktest_scratch_dev[1]}
- echo "bcachefs device evacuate"
+ echo "bcachefs device evacuate ${ktest_scratch_dev[1]}"
bcachefs device evacuate ${ktest_scratch_dev[1]}
- echo "bcachefs device remove"
+ echo "bcachefs device remove ${ktest_scratch_dev[1]}"
bcachefs device remove ${ktest_scratch_dev[1]}
done
@@ -586,6 +586,193 @@ test_replicas_read_errors()
do_replicas_errors_test error_reads
}
+test_read_corrupt()
+{
+ setup_tracing
+ set_watchdog 180
+
+ echo 8 > /sys/module/bcachefs/parameters/read_corrupt_ratio
+
+ run_quiet "" bcachefs format -f \
+ ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+
+ mount -t bcachefs -o degraded ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+
+ run_fio_randrw --continue_on_error=io
+ echo 0 > /sys/module/bcachefs/parameters/read_corrupt_ratio
+
+ # Check the read retry path for indirect extents:
+ #cp --reflink /mnt/fiotest /mnt/fiotest.reflinked
+ #dd if=/mnt/fiotest of=/dev/null bs=4k iflag=direct
+
+ umount /mnt
+
+ bcachefs fsck -ny ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+ bcachefs_test_end_checks ${ktest_scratch_dev[0]}
+}
+
+test_btree_read_corrupt()
+{
+ setup_tracing
+ set_watchdog 180
+
+
+ run_quiet "" bcachefs format -f --replicas=2 \
+ ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+
+ mount -t bcachefs ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+ run_fio_randrw --continue_on_error=io
+ umount /mnt
+
+ echo 8 > /sys/module/bcachefs/parameters/btree_read_corrupt_ratio
+ mount -t bcachefs ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+ echo 0 > /sys/module/bcachefs/parameters/btree_read_corrupt_ratio
+
+ dd if=/mnt/fiotest of=/dev/null bs=1M
+ umount /mnt
+
+ bcachefs fsck -ny ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+ bcachefs_test_end_checks ${ktest_scratch_dev[0]}
+}
+
+test_kill_btree_node()
+{
+ set_watchdog 240
+ run_quiet "" bcachefs format -f --replicas=2 ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+
+ mount -t bcachefs ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+ cp -rL /usr/bin /mnt
+ umount /mnt
+
+ # Doesn't yet work with the alloc btree:
+ for btree in extents ; do
+ echo "Killing a btree node in btree $btree "
+ local index=1
+
+ [[ $btree = freespace ]] && index=0
+
+ bcachefs kill_btree_node -d0 -n$btree:0:$index ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+
+ echo "Running fsck"
+ # How to assert exit status equals something specific with -o errexit?
+ mount -t bcachefs -o fsck ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+ echo "Checking contents"
+ diff -rq /usr/bin /mnt/bin
+ umount /mnt
+ #bcachefs fsck -y ${ktest_scratch_dev[0]} || true
+
+ echo
+ echo "Running fsck again; should be clean"
+
+ bcachefs fsck -ny ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+ done
+
+ bcachefs_test_end_checks ${ktest_scratch_dev[0]}
+}
+
+test_read_corrupt_replicas()
+{
+ setup_tracing
+ set_watchdog 180
+
+ echo 64 > /sys/module/bcachefs/parameters/read_corrupt_ratio
+
+ run_quiet "" bcachefs format -f \
+ --replicas=2 \
+ ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+
+ mount -t bcachefs -o degraded ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+
+ #gc_torture_workload
+ run_fio_randrw --continue_on_error=io
+
+ # Check the read retry path for indirect extents:
+ #cp --reflink /mnt/fiotest /mnt/fiotest.reflinked
+ #dd if=/mnt/fiotest of=/dev/null bs=4k iflag=direct
+
+ umount /mnt
+
+ bcachefs fsck -ny ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+ bcachefs_test_end_checks ${ktest_scratch_dev[0]}
+ echo 0 > /sys/module/bcachefs/parameters/read_corrupt_ratio
+ true
+}
+
+test_write_corrupt()
+{
+ setup_tracing
+ set_watchdog 180
+
+ run_quiet "" bcachefs format -f ${ktest_scratch_dev[0]}
+
+ mount -t bcachefs ${ktest_scratch_dev[0]} /mnt
+
+ echo 1 > /sys/module/bcachefs/parameters/write_corrupt_ratio
+ #gc_torture_workload
+ #run_fio_randrw --continue_on_error=io
+ dd if=/dev/zero of=/mnt/foo bs=1M count=1024 oflag=direct
+ echo 0 > /sys/module/bcachefs/parameters/write_corrupt_ratio
+
+ bcachefs device add -f --label=background /mnt ${ktest_scratch_dev[1]}
+
+ echo background > /sys/fs/bcachefs/*/options/background_target
+
+ umount /mnt
+
+ bcachefs fsck -ny ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+ bcachefs_test_end_checks ${ktest_scratch_dev[0]}
+
+ echo 0 > /sys/module/bcachefs/parameters/write_corrupt_ratio
+ true
+}
+
+test_evacuate_corrupted()
+{
+ setup_tracing
+ set_watchdog 180
+
+ run_quiet "" bcachefs format -f --replicas=2 \
+ ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+
+ mount -t bcachefs ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+
+ echo 32 > /sys/module/bcachefs/parameters/write_corrupt_ratio
+ run_fio_randrw --continue_on_error=io
+ echo 0 > /sys/module/bcachefs/parameters/write_corrupt_ratio
+
+ bcachefs device evacuate --force ${ktest_scratch_dev[1]}
+ bcachefs device remove --force ${ktest_scratch_dev[1]}
+
+ run_fio_randrw --continue_on_error=io --verify_only=1
+
+ umount /mnt
+
+ bcachefs fsck -ny ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]}
+
+ # we expect lots of io_move_start_fail when we're degraded:
+ #bcachefs_test_end_checks ${ktest_scratch_dev[0]}
+}
+
+test_evacuate_errors()
+{
+ setup_tracing
+ set_watchdog 180
+
+ run_quiet "" bcachefs format -f --metadata_replicas=2 \
+ ${ktest_scratch_dev[0]} ${ktest_scratch_dev[1]} ${ktest_scratch_dev[2]}
+
+ mount -t bcachefs ${ktest_scratch_dev[0]} /mnt
+
+ run_fio_randrw --continue_on_error=io
+
+ #bcachefs device evacuate /dev/mapper/flakey
+ bcachefs device remove --force ${ktest_scratch_dev[1]}
+
+ run_fio_randrw --continue_on_error=io --verify_only=1
+ umount /mnt
+ true
+}
+
test_cmd_fs_usage()
{
set_watchdog 240
@@ -650,6 +837,69 @@ test_rereplicate()
bcachefs_test_end_checks ${ktest_scratch_dev[0]}
}
+test_rereplicate2()
+{
+ echo ":: format with replicas=1 (default)"
+ run_quiet "" bcachefs format -f \
+ ${ktest_scratch_dev[0]} \
+ ${ktest_scratch_dev[1]}
+
+ mount -t bcachefs ${ktest_scratch_dev[0]}:${ktest_scratch_dev[1]} /mnt
+
+ echo ":: write to fs, while replicas=1"
+ touch /mnt/empty-file
+
+ echo ":: we should have some durability=1 data now"
+ bcachefs fs usage -h /mnt
+
+ echo ":: set replicas=2 and run rereplicate"
+ echo 2 > /sys/fs/bcachefs/*/options/data_replicas
+ echo 2 > /sys/fs/bcachefs/*/options/metadata_replicas
+ bcachefs data rereplicate /mnt
+
+ # echo ":: running rereplicate a second time seems to guarantee all data has durability=2"
+ # bcachefs data rereplicate /mnt
+
+ echo ":: all data should be replicated to both devices now, verifying..."
+ local fs_usage_out=$(bcachefs fs usage -h /mnt)
+ echo "$fs_usage_out"
+ local residual_durability_1_data=$(grep -E '^(btree|user):' <<<"$fs_usage_out" | awk '$3 == "1"')
+
+ umount /mnt
+
+ local dev_remove=
+ if ! [[ -n "$residual_durability_1_data" ]]; then
+ echo ":: no residual durability=1 data found"
+ dev_remove="vdb"
+ echo ":: we will simulate loss of device '$dev_remove' to verify proper replication"
+ else
+ echo ":: found residual durability=1 data:"
+ echo "$residual_durability_1_data"
+
+ local first_spof_dev=$(head -n1 <<<"$residual_durability_1_data" | grep -oP '\[\K[^\]]+' | awk '{print $1}')
+ dev_remove="$first_spof_dev"
+ echo ":: we will simulate loss of device '$dev_remove', which we suspect of being a single-point-of-failure"
+ fi
+
+ # we want to keep the other device
+ local dev_keep=
+ if [[ "$dev_remove" == "vdb" ]]; then
+ dev_keep="vdc"
+ elif [[ "$dev_remove" == "vdc" ]]; then
+ dev_keep="vdb"
+ else
+ exit 1
+ fi
+
+ echo ":: wipe the super-block on device '$dev_remove' to prevent auto-discovery durring mount"
+ dd if=/dev/zero of=/dev/$dev_remove bs=1M count=1 oflag=direct
+
+ echo ":: attempt degraded mount with only device '$dev_keep'"
+ mount -t bcachefs -o degraded,fsck,fix_errors /dev/$dev_keep /mnt
+
+ umount /mnt
+}
+
disabled_test_device_add_faults()
{
setup_tracing 'bcachefs:*'