summaryrefslogtreecommitdiff
path: root/tests/xfs/558
blob: 270f458cb991c4f7e810ca872cdc189ba848b011 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2022 Oracle.  All Rights Reserved.
#
# FS QA Test No. 558
#
# This is a regression test for a data corruption bug that existed in XFS' copy
# on write code between 4.9 and 4.19.  The root cause is a concurrency bug
# wherein we would drop ILOCK_SHARED after querying the CoW fork in xfs_map_cow
# and retake it before querying the data fork in xfs_map_blocks.  If a second
# thread changes the CoW fork mappings between the two calls, it's possible for
# xfs_map_blocks to return a zero-block mapping, which results in writeback
# being elided for that block.  Elided writeback of dirty data results in
# silent loss of writes.
#
# Worse yet, kernels from that era still used buffer heads, which means that an
# elided writeback leaves the page clean but the bufferheads dirty.  Due to a
# naïve optimization in mark_buffer_dirty, the SetPageDirty call is elided if
# the bufferhead is dirty, which means that a subsequent rewrite of the data
# block will never result in the page being marked dirty, and all subsequent
# writes are lost.
#
# It turns out that Christoph Hellwig unwittingly fixed the race in commit
# 5c665e5b5af6 ("xfs: remove xfs_map_cow"), and no testcase was ever written.
# Four years later, we hit it on a production 4.14 kernel.  This testcase
# relies on a debugging knob that introduces artificial delays into writeback.
#
# Before the race, the file blocks 0-1 are not shared and blocks 2-5 are
# shared.  There are no extents in CoW fork.
#
# Two threads race like this:
#
# Thread 1 (writeback block 0)     | Thread 2  (write to block 2)
# ---------------------------------|--------------------------------
#                                  |
# 1. Check if block 0 in CoW fork  |
#    from xfs_map_cow.             |
#                                  |
# 2. Block 0 not found in CoW      |
#    fork; the block is considered |
#    not shared.                   |
#                                  |
# 3. xfs_map_blocks looks up data  |
#    fork to get a map covering    |
#    block 0.                      |
#                                  |
# 4. It gets a data fork mapping   |
#    for block 0 with length 2.    |
#                                  |
#                                  | 1. A buffered write to block 2 sees
#                                  |    that it is a shared block and no
#                                  |    extent covers block 2 in CoW fork.
#                                  |
#                                  |    It creates a new CoW fork mapping.
#                                  |    Due to the cowextsize, the new
#                                  |    extent starts at block 0 with
#                                  |    length 128.
#                                  |
#                                  |
# 5. It lookup CoW fork again to   |
#    trim the map (0, 2) to a      |
#    shared block boundary.        |
#                                  |
# 5a. It finds (0, 128) in CoW fork|
# 5b. It trims the data fork map   |
#     from (0, 1) to (0, 0) (!!!)  |
#                                  |
# 6. The xfs_imap_valid call after |
#    the xfs_map_blocks call checks|
#    if the mapping (0, 0) covers  |
#    block 0.  The result is "NO". |
#                                  |
# 7. Since block 0 has no physical |
#    block mapped, it's not added  |
#    to the ioend.  This is the    |
#    first problem.                |
#                                  |
# 8. xfs_add_to_ioend usually      |
#    clears the bufferhead dirty   |
#    flag  Because this is skipped,|
#    we leave the page clean with  |
#    the associated buffer head(s) |
#    dirty (the second problem).   |
#    Now the dirty state is        |
#    inconsistent.
#
# On newer kernels, this is also a functionality test for the ifork sequence
# counter because the writeback completions will change the data fork and force
# revalidations of the wb mapping.
#
. ./common/preamble
_begin_fstest auto quick clone

# Import common functions.
. ./common/reflink
. ./common/inject
. ./common/tracing

# real QA test starts here
_cleanup()
{
	test -n "$sentryfile" && rm -f $sentryfile
	wait
	_ftrace_cleanup
	cd /
	rm -r -f $tmp.* $sentryfile $tracefile
}

# Modify as appropriate.
_supported_fs xfs
_fixed_by_kernel_commit 5c665e5b5af6 "xfs: remove xfs_map_cow"
_require_ftrace
_require_xfs_io_error_injection "wb_delay_ms"
_require_scratch_reflink
_require_cp_reflink

# This test races writeback of a pure overwrite of a data fork extent against
# the creation of a speculative COW preallocation.  In alwayscow mode, there
# are no pure overwrites, which means that a precondition of the test is not
# satisfied, and this test should be skipped.
_require_no_xfs_always_cow

_scratch_mkfs >> $seqres.full
_scratch_mount >> $seqres.full

# This is a pagecache test, so try to disable fsdax mode.
$XFS_IO_PROG -c 'chattr -x' $SCRATCH_MNT &> $seqres.full
_require_pagecache_access $SCRATCH_MNT

min_blksz=65536
file_blksz=$(_get_file_block_size "$SCRATCH_MNT")
blksz=$(( 8 * $file_blksz ))

blksz=$(( blksz > min_blksz ? blksz : min_blksz ))

_require_congruent_file_oplen $SCRATCH_MNT $blksz

# Make sure we have sufficient extent size to create speculative CoW
# preallocations.
$XFS_IO_PROG -c 'cowextsize 1m' $SCRATCH_MNT

# Write out a file with the first two blocks unshared and the rest shared.
_pwrite_byte 0x59 0 $((160 * blksz)) $SCRATCH_MNT/file >> $seqres.full
_pwrite_byte 0x59 0 $((160 * blksz)) $SCRATCH_MNT/file.compare >> $seqres.full
sync

_cp_reflink $SCRATCH_MNT/file $SCRATCH_MNT/file.reflink

_pwrite_byte 0x58 0 $((2 * blksz)) $SCRATCH_MNT/file >> $seqres.full
_pwrite_byte 0x58 0 $((2 * blksz)) $SCRATCH_MNT/file.compare >> $seqres.full
sync

# Avoid creation of large folios on newer kernels by cycling the mount and
# immediately writing to the page cache.
_scratch_cycle_mount

# Write the same data to file.compare as we're about to do to file.  Do this
# before slowing down writeback to avoid unnecessary delay.
_pwrite_byte 0x57 0 $((2 * blksz)) $SCRATCH_MNT/file.compare >> $seqres.full
_pwrite_byte 0x56 $((2 * blksz)) $((2 * blksz)) $SCRATCH_MNT/file.compare >> $seqres.full
sync

# Introduce a half-second wait to each writeback block mapping call.  This
# gives us a chance to race speculative cow prealloc with writeback.
_scratch_inject_error "wb_delay_ms" 500

_ftrace_setup
_ftrace_record_events 'xfs_wb*iomap_invalid'

# Start thread 1 + writeback above
$XFS_IO_PROG -c "pwrite -S 0x57 0 $((2 * blksz))" \
	-c 'fsync' $SCRATCH_MNT/file >> $seqres.full &
sleep 1

# Start a sentry to look for evidence of invalidation tracepoint tripping.  If
# we see that, we know we've forced writeback to revalidate a mapping.  The
# test has been successful, so turn off the delay.
sentryfile=$TEST_DIR/$seq.sentry
tracefile=$TEST_DIR/$seq.ftrace
wait_for_errortag() {
	while [ -e "$sentryfile" ]; do
		_ftrace_dump | grep iomap_invalid >> "$tracefile"
		if grep -q iomap_invalid "$tracefile"; then
			_scratch_inject_error "wb_delay_ms" 0
			_ftrace_ignore_events
			break;
		fi
		sleep 0.5
	done
}
touch $sentryfile
wait_for_errortag &

# Start thread 2 to create the cowextsize reservation
$XFS_IO_PROG -c "pwrite -S 0x56 $((2 * blksz)) $((2 * blksz))" \
	-c 'fsync' $SCRATCH_MNT/file >> $seqres.full
rm -f $sentryfile

cat "$tracefile" >> $seqres.full
grep -q iomap_invalid "$tracefile"
saw_invalidation=$?

# Flush everything to disk.  If the bug manifests, then after the cycle,
# file should have stale 0x58 in block 0 because we silently dropped a write.
_scratch_cycle_mount

if ! cmp -s $SCRATCH_MNT/file $SCRATCH_MNT/file.compare; then
	echo file and file.compare do not match
	$XFS_IO_PROG -c 'bmap -celpv' -c 'bmap -elpv' $SCRATCH_MNT/file &>> $seqres.full
	echo file.compare
	od -tx1 -Ad -c $SCRATCH_MNT/file.compare
	echo file
	od -tx1 -Ad -c $SCRATCH_MNT/file
elif [ $saw_invalidation -ne 0 ]; then
	# The files matched, but nothing got logged about the revalidation?
	echo "Expected to hear about writeback iomap invalidations?"
fi

echo Silence is golden
status=0
exit