From f7c11affde9e29316d24b6736d16faeaac8278f2 Mon Sep 17 00:00:00 2001 From: Tanmay Bhushan <007047221b@gmail.com> Date: Sun, 15 Jan 2023 21:32:15 +0100 Subject: btrfs: raid56: fix stripes if vertical errors are found We take two stripe numbers if vertical errors are found. In case it is just a pstripe it does not matter but in case of raid 6 it matters as both stripes need to be fixed. Fixes: 7a3150723061 ("btrfs: raid56: do data csum verification during RMW cycle") Reviewed-by: Qu Wenruo Signed-off-by: Tanmay Bhushan <007047221b@gmail.com> Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 6a2cf754912d..f35898a78b16 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1886,7 +1886,7 @@ pstripe: sector->uptodate = 1; } if (failb >= 0) { - ret = verify_one_sector(rbio, faila, sector_nr); + ret = verify_one_sector(rbio, failb, sector_nr); if (ret < 0) goto cleanup; -- cgit v1.2.3 From 3c538de0f2a74d50aff7278c092f88ae59cee688 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 18 Jan 2023 16:35:13 -0500 Subject: btrfs: limit device extents to the device size There was a recent regression in btrfs/177 that started happening with the size class patches ("btrfs: introduce size class to block group allocator"). This however isn't a regression introduced by those patches, but rather the bug was uncovered by a change in behavior in these patches. The patches triggered more chunk allocations in the ^free-space-tree case, which uncovered a race with device shrink. The problem is we will set the device total size to the new size, and use this to find a hole for a device extent. However during shrink we may have device extents allocated past this range, so we could potentially find a hole in a range past our new shrink size. We don't actually limit our found extent to the device size anywhere, we assume that we will not find a hole past our device size. This isn't true with shrink as we're relocating block groups and thus creating holes past the device size. Fix this by making sure we do not search past the new device size, and if we wander into any device extents that start after our device size simply break from the loop and use whatever hole we've already found. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bcfef75b97da..4cdadf30163d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1600,7 +1600,7 @@ again: if (ret < 0) goto out; - while (1) { + while (search_start < search_end) { l = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(l)) { @@ -1623,6 +1623,9 @@ again: if (key.type != BTRFS_DEV_EXTENT_KEY) goto next; + if (key.offset > search_end) + break; + if (key.offset > search_start) { hole_size = key.offset - search_start; dev_extent_hole_check(device, &search_start, &hole_size, @@ -1683,6 +1686,7 @@ next: else ret = 0; + ASSERT(max_hole_start + max_hole_size <= search_end); out: btrfs_free_path(path); *start = max_hole_start; -- cgit v1.2.3 From eadd7deca0ad8a83edb2b894d8326c78e78635d6 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 24 Jan 2023 12:32:34 +0100 Subject: btrfs: zlib: zero-initialize zlib workspace KMSAN reports uses of uninitialized memory in zlib's longest_match() called on memory originating from zlib_alloc_workspace(). This issue is known by zlib maintainers and is claimed to be harmless, but to be on the safe side we'd better initialize the memory. Link: https://zlib.net/zlib_faq.html#faq36 Reported-by: syzbot+14d9e7602ebdf7ec0a60@syzkaller.appspotmail.com CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Alexander Potapenko Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zlib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 01a13de11832..da7bb9187b68 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level) workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), zlib_inflate_workspacesize()); - workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL); + workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL); workspace->level = level; workspace->buf = NULL; /* -- cgit v1.2.3 From 33e17b3f5ab74af12aca58c515bc8424ff69a343 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 24 Jan 2023 20:32:10 +0100 Subject: btrfs: send: limit number of clones and allocated memory size The arg->clone_sources_count is u64 and can trigger a warning when a huge value is passed from user space and a huge array is allocated. Limit the allocated memory to 8MiB (can be increased if needed), which in turn limits the number of clone sources to 8M / sizeof(struct clone_root) = 8M / 40 = 209715. Real world number of clones is from tens to hundreds, so this is future proof. Reported-by: syzbot+4376a9a073770c173269@syzkaller.appspotmail.com Signed-off-by: David Sterba --- fs/btrfs/send.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 67f7c698ade3..26dace7b74c6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8072,10 +8072,10 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) /* * Check that we don't overflow at later allocations, we request * clone_sources_count + 1 items, and compare to unsigned long inside - * access_ok. + * access_ok. Also set an upper limit for allocation size so this can't + * easily exhaust memory. Max number of clone sources is about 200K. */ - if (arg->clone_sources_count > - ULONG_MAX / sizeof(struct clone_root) - 1) { + if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) { ret = -EINVAL; goto out; } -- cgit v1.2.3 From a9ad4d87aa263de36895402b66115a3a4b88bf1c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 21 Jan 2023 16:06:11 +0800 Subject: btrfs: raid56: make error_bitmap update atomic In the rework of raid56 code, there is very limited concurrency in the endio context. Most of the work is done inside the sectors arrays, which different bios will never touch the same sector. But there is a concurrency here for error_bitmap. Both read and write endio functions need to touch them, and we can have multiple write bios touching the same error bitmap if they all hit some errors. Here we fix the unprotected bitmap operation by going set_bit() in a loop. Since we have a very small ceiling of the sectors (at most 16 sectors), such set_bit() in a loop should be very acceptable. Fixes: 2942a50dea74 ("btrfs: raid56: introduce btrfs_raid_bio::error_bitmap") Reviewed-by: Christoph Hellwig Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f35898a78b16..ff4b1d583788 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1426,12 +1426,20 @@ static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bi u32 bio_size = 0; struct bio_vec *bvec; struct bvec_iter_all iter_all; + int i; bio_for_each_segment_all(bvec, bio, iter_all) bio_size += bvec->bv_len; - bitmap_set(rbio->error_bitmap, total_sector_nr, - bio_size >> rbio->bioc->fs_info->sectorsize_bits); + /* + * Since we can have multiple bios touching the error_bitmap, we cannot + * call bitmap_set() without protection. + * + * Instead use set_bit() for each bit, as set_bit() itself is atomic. + */ + for (i = total_sector_nr; i < total_sector_nr + + (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++) + set_bit(i, rbio->error_bitmap); } /* Verify the data sectors at read time. */ -- cgit v1.2.3