From 72e71bf0298c7ed985bcd0d3c7ff4ca19de60373 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 10:14:13 -0400 Subject: bcachefs: Fix a scheduler splat in __bch2_next_write_buffer_flush_journal_buf() We're using mutex_lock() inside a wait_event() conditional - prepare_to_wait() has already flipped task state, so potentially blocking ops need annotation. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 9c9a25dbd613..9c2af544251a 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -870,6 +870,8 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou { struct journal_buf *ret = NULL; + /* We're inside wait_event(), but using mutex_lock(: */ + sched_annotate_sleep(); mutex_lock(&j->buf_lock); spin_lock(&j->lock); max_seq = min(max_seq, journal_cur_seq(j)); -- cgit v1.2.3 From 7ffec9ccdc6ad8356792f9a7823b1fe9c8a10cbf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 10:55:17 -0400 Subject: bcachefs: don't free error pointers Reported-by: syzbot+3333603f569fc2ef258c@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index be5b47619327..8091d0686029 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -902,7 +902,8 @@ out: bch2_journal_keys_put_initial(c); bch2_find_btree_nodes_exit(&c->found_btree_nodes); } - kfree(clean); + if (!IS_ERR(clean)) + kfree(clean); if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && -- cgit v1.2.3 From a2ddaf965f6a15c316f483e7446fbe3d81fba27c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 11:06:54 -0400 Subject: bcachefs: bucket_pos_to_bp_noerror() We don't want the assert when we're checking if the backpointer is valid. Reported-by: syzbot+bf7215c0525098e7747a@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 2 +- fs/bcachefs/backpointers.h | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index a20044201002..af7a71de1bdf 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -54,7 +54,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, int ret = 0; bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || - !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), + !bpos_eq(bp.k->p, bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset)), c, err, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 85949b9fd880..c1b274eadda1 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -45,6 +45,15 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c, return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector)); } +static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca, + struct bpos bucket, + u64 bucket_offset) +{ + return POS(bucket.inode, + (bucket_to_sector(ca, bucket.offset) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); +} + /* * Convert from pos in alloc btree + bucket offset to pos in backpointer btree: */ @@ -53,10 +62,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, u64 bucket_offset) { struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); - struct bpos ret = POS(bucket.inode, - (bucket_to_sector(ca, bucket.offset) << - MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); - + struct bpos ret = bucket_pos_to_bp_noerror(ca, bucket, bucket_offset); EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret))); return ret; } -- cgit v1.2.3 From b30b70ad8bffcde513d34d525820ec411f48e3d7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 11:39:53 -0400 Subject: bcachefs: Fix early error path in bch2_fs_btree_key_cache_exit() Reported-by: syzbot+a35cdb62ec34d44fb062@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index e8c1c530cd95..7dafa1accec2 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -956,13 +956,15 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) } #ifdef __KERNEL__ - for_each_possible_cpu(cpu) { - struct btree_key_cache_freelist *f = - per_cpu_ptr(bc->pcpu_freed, cpu); - - for (i = 0; i < f->nr; i++) { - ck = f->objs[i]; - list_add(&ck->list, &items); + if (bc->pcpu_freed) { + for_each_possible_cpu(cpu) { + struct btree_key_cache_freelist *f = + per_cpu_ptr(bc->pcpu_freed, cpu); + + for (i = 0; i < f->nr; i++) { + ck = f->objs[i]; + list_add(&ck->list, &items); + } } } #endif -- cgit v1.2.3 From 4a8521b6bb81abba9d80d60b80908c77c9236ced Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 11:31:22 -0400 Subject: bcachefs: Inodes need extra padding for varint_decode_fast() Reported-by: syzbot+66b9b74f6520068596a9@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index f137252bccc5..b72cf31f7274 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -199,9 +199,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, u64 new_i_size, s64 i_sectors_delta) { - struct btree_iter iter; - struct bkey_i *k; - struct bkey_i_inode_v3 *inode; /* * Crazy performance optimization: * Every extent update needs to also update the inode: the inode trigger @@ -214,25 +211,36 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, * lost, but that's fine. */ unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL; - int ret; - k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes, + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, SPOS(0, extent_iter->pos.inode, extent_iter->snapshot), BTREE_ITER_CACHED); - ret = PTR_ERR_OR_ZERO(k); + int ret = bkey_err(k); if (unlikely(ret)) return ret; - if (unlikely(k->k.type != KEY_TYPE_inode_v3)) { - k = bch2_inode_to_v3(trans, k); - ret = PTR_ERR_OR_ZERO(k); + /* + * varint_decode_fast(), in the inode .invalid method, reads up to 7 + * bytes past the end of the buffer: + */ + struct bkey_i *k_mut = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8); + ret = PTR_ERR_OR_ZERO(k_mut); + if (unlikely(ret)) + goto err; + + bkey_reassemble(k_mut, k); + + if (unlikely(k_mut->k.type != KEY_TYPE_inode_v3)) { + k_mut = bch2_inode_to_v3(trans, k_mut); + ret = PTR_ERR_OR_ZERO(k_mut); if (unlikely(ret)) goto err; } - inode = bkey_i_to_inode_v3(k); + struct bkey_i_inode_v3 *inode = bkey_i_to_inode_v3(k_mut); if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) && new_i_size > le64_to_cpu(inode->v.bi_size)) { -- cgit v1.2.3 From feb077c1774e559cddfc41ef26c864780d158fe2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 17:13:21 -0400 Subject: bcachefs: Fix refcount put in sb_field_resize error path Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 08ea3dbbbe97..e7527d551e3c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -232,7 +232,7 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, struct bch_sb_handle *dev_sb = &ca->disk_sb; if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); return NULL; } } -- cgit v1.2.3 From 1267df40acb2da62b1641abae26132411d093fb3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 12:29:46 -0400 Subject: bcachefs: Initialize bch_write_op->failed in inline data path Normally this is initialized in __bch2_write(), which is executed in a loop, but the inline data path skips this. Reported-by: syzbot+fd3ccb331eb21f05d13b@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index b72cf31f7274..40d7df7607df 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1513,6 +1513,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) unsigned sectors; int ret; + memset(&op->failed, 0, sizeof(op->failed)); + op->flags |= BCH_WRITE_WROTE_DATA_INLINE; op->flags |= BCH_WRITE_DONE; -- cgit v1.2.3 From 3a2d0259274202432e5119463dd4cf5f9fabed98 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 12:51:49 -0400 Subject: bcachefs: Fix bch2_dev_lookup() refcounting bch2_dev_lookup() is supposed to take a ref on the device it returns, but for_each_member_device() takes refs as it iterates, for_each_member_device_rcu() does not. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 88e214c609bb..c2c80e6890ae 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -2004,13 +2004,9 @@ err: /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) { - rcu_read_lock(); - for_each_member_device_rcu(c, ca, NULL) - if (!strcmp(name, ca->name)) { - rcu_read_unlock(); + for_each_member_device(c, ca) + if (!strcmp(name, ca->name)) return ca; - } - rcu_read_unlock(); return ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); } -- cgit v1.2.3 From 18b4abcead744322feb90ba31450e7d770e928bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 12:55:44 -0400 Subject: bcachefs: Fix lifetime issue in device iterator helpers bch2_get_next_dev() and bch2_get_next_online_dev() iterate over devices, dropping and taking refs as they go; we can't access the previous device (for ca->dev_idx) after we've dropped our ref to it, unless we take rcu_read_lock() first. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 5efa64eca5f8..5bf27d30ca29 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -107,10 +107,10 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev * static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca) { + rcu_read_lock(); if (ca) percpu_ref_put(&ca->ref); - rcu_read_lock(); if ((ca = __bch2_next_dev(c, ca, NULL))) percpu_ref_get(&ca->ref); rcu_read_unlock(); @@ -132,10 +132,10 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, struct bch_dev *ca, unsigned state_mask) { + rcu_read_lock(); if (ca) percpu_ref_put(&ca->io_ref); - rcu_read_lock(); while ((ca = __bch2_next_dev(c, ca, NULL)) && (!((1 << ca->mi.state) & state_mask) || !percpu_ref_tryget(&ca->io_ref))) -- cgit v1.2.3 From db42549d402cb44fe67c95d08f1a9ea902d32e7e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 13:26:37 -0400 Subject: bcachefs: Add a better limit for maximum number of buckets The bucket_gens array is a single array allocation (one byte per bucket), and kernel allocations are still limited to INT_MAX. Check this limit to avoid failing the bucket_gens array allocation. Reported-by: syzbot+b29f436493184ea42e2b@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 6 ++++++ fs/bcachefs/errcode.h | 1 + fs/bcachefs/sb-members.c | 6 +++--- fs/bcachefs/super.c | 7 +++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index f7fbfccd2b1e..8345a2b2d05b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -591,6 +591,12 @@ struct bch_member { __le64 btree_allocated_bitmap; }; +/* + * This limit comes from the bucket_gens array - it's a single allocation, and + * kernel allocation are limited to INT_MAX + */ +#define BCH_MEMBER_NBUCKETS_MAX (INT_MAX - 64) + #define BCH_MEMBER_V1_BYTES 56 LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 01a79fa3eacb..dbe35b80bc0b 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -175,6 +175,7 @@ x(EINVAL, block_size_too_small) \ x(EINVAL, bucket_size_too_small) \ x(EINVAL, device_size_too_small) \ + x(EINVAL, device_size_too_big) \ x(EINVAL, device_not_a_member_of_filesystem) \ x(EINVAL, device_has_been_removed) \ x(EINVAL, device_splitbrain) \ diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 5b8e621ac5eb..44b3f0cb7b49 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -124,9 +124,9 @@ static int validate_member(struct printbuf *err, struct bch_sb *sb, int i) { - if (le64_to_cpu(m.nbuckets) > LONG_MAX) { - prt_printf(err, "device %u: too many buckets (got %llu, max %lu)", - i, le64_to_cpu(m.nbuckets), LONG_MAX); + if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) { + prt_printf(err, "device %u: too many buckets (got %llu, max %u)", + i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX); return -BCH_ERR_invalid_sb_members; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index c2c80e6890ae..dddf57ec4511 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1959,6 +1959,13 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) goto err; } + if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { + bch_err(ca, "New device size too big (%llu greater than max %u)", + nbuckets, BCH_MEMBER_NBUCKETS_MAX); + ret = -BCH_ERR_device_size_too_big; + goto err; + } + if (bch2_dev_is_online(ca) && get_capacity(ca->disk_sb.bdev->bd_disk) < ca->mi.bucket_size * nbuckets) { -- cgit v1.2.3 From 9a0ec045110dbaad4b8d609142b534f913354101 Mon Sep 17 00:00:00 2001 From: Reed Riley Date: Sat, 4 May 2024 22:12:23 +0000 Subject: bcachefs: fix overflow in fiemap filefrag (and potentially other utilities that call fiemap) sometimes pass ULONG_MAX as the length. fiemap_prep clamps excessively large lengths - but the calculation of end can overflow if it occurs before calling fiemap_prep. When this happens, filefrag assumes it has read to the end and exits. Signed-off-by: Reed Riley Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index fce690007edf..6f114803c6f2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -964,7 +964,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_iter iter; struct bkey_s_c k; struct bkey_buf cur, prev; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; u32 snapshot; @@ -974,6 +973,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (ret) return ret; + struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); if (start + len < start) return -EINVAL; -- cgit v1.2.3 From 6b8cbfc3db7582d6f26c6b757d8e949174641709 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:02:28 -0400 Subject: bcachefs: Fix assert in bch2_alloc_v4_invalid() Reported-by: syzbot+10827fa6b176e1acf1d0@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 4 ++-- fs/bcachefs/alloc_background.h | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 4ff56fa4d539..534ba2b02bd6 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -244,10 +244,10 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, alloc_v4_val_size_bad, "bad val size (%u > %zu)", - alloc_v4_u64s(a.v), bkey_val_u64s(k.k)); + alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 052b2fac25d6..2790e516383d 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -126,13 +126,17 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_ return pos; } -static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a) +static inline unsigned alloc_v4_u64s_noerror(const struct bch_alloc_v4 *a) { - unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: + return (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: BCH_ALLOC_V4_U64s_V0) + BCH_ALLOC_V4_NR_BACKPOINTERS(a) * (sizeof(struct bch_backpointer) / sizeof(u64)); +} +static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a) +{ + unsigned ret = alloc_v4_u64s_noerror(a); BUG_ON(ret > U8_MAX - BKEY_U64s); return ret; } -- cgit v1.2.3 From f39055220f6f98a180e3503fe05bbf9921c425c8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:28:00 -0400 Subject: bcachefs: Add missing validation for superblock section clean We were forgetting to check for jset entries that overrun the end of the section - both in validate and to_text(); to_text() needs to be safe for types that fail to validate. Reported-by: syzbot+c48865e11e7e893ec4ab@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-clean.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 35ca3f138de6..194e55b11137 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -278,6 +278,17 @@ static int bch2_sb_clean_validate(struct bch_sb *sb, return -BCH_ERR_invalid_sb_clean; } + for (struct jset_entry *entry = clean->start; + entry != vstruct_end(&clean->field); + entry = vstruct_next(entry)) { + if ((void *) vstruct_next(entry) > vstruct_end(&clean->field)) { + prt_str(err, "entry type "); + bch2_prt_jset_entry_type(err, le16_to_cpu(entry->type)); + prt_str(err, " overruns end of section"); + return -BCH_ERR_invalid_sb_clean; + } + } + return 0; } @@ -295,6 +306,9 @@ static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb, for (entry = clean->start; entry != vstruct_end(&clean->field); entry = vstruct_next(entry)) { + if ((void *) vstruct_next(entry) > vstruct_end(&clean->field)) + break; + if (entry->type == BCH_JSET_ENTRY_btree_keys && !entry->u64s) continue; -- cgit v1.2.3 From 2bb9600d5d4735953c47dd1ee99382c68dd04caa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:33:05 -0400 Subject: bcachefs: Guard against unknown k.k->type in __bkey_invalid() For forwards compatibility we have to allow unknown key types, and only run the checks that make sense against them. Fix a missing guard on k.k->type being known. Reported-by: syzbot+ae4dc916da3ce51f284f@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index db336a43fc08..a275a9e8e341 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -171,8 +171,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on((type == BKEY_TYPE_btree || - (flags & BKEY_INVALID_COMMIT)) && + bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && + (type == BKEY_TYPE_btree || (flags & BKEY_INVALID_COMMIT)) && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", -- cgit v1.2.3 From 0ec5b3b7ccfcdca02ab322abf86455d0050ae98f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:44:27 -0400 Subject: bcachefs: Fix shift-by-64 in bformat_needs_redo() Ancient versions of bcachefs produced packed formats that could represent keys that our in memory format cannot represent; bformat_needs_redo() has some tricky shifts to check for this sort of overflow. Reported-by: syzbot+594427aebfefeebe91c6@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index bf68ea49447b..4d94b7742dbb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -968,24 +968,30 @@ static bool migrate_btree_pred(struct bch_fs *c, void *arg, return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } +/* + * Ancient versions of bcachefs produced packed formats which could represent + * keys that the in memory format cannot represent; this checks for those + * formats so we can get rid of them. + */ static bool bformat_needs_redo(struct bkey_format *f) { - unsigned i; - - for (i = 0; i < f->nr_fields; i++) { + for (unsigned i = 0; i < f->nr_fields; i++) { + unsigned f_bits = f->bits_per_field[i]; unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1)); u64 field_offset = le64_to_cpu(f->field_offset[i]); - if (f->bits_per_field[i] > unpacked_bits) + if (f_bits > unpacked_bits) return true; - if ((f->bits_per_field[i] == unpacked_bits) && field_offset) + if ((f_bits == unpacked_bits) && field_offset) return true; - if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) & - unpacked_mask) < - field_offset) + u64 f_mask = f_bits + ? ~((~0ULL << (f_bits - 1)) << 1) + : 0; + + if (((field_offset + f_mask) & unpacked_mask) < field_offset) return true; } -- cgit v1.2.3 From 8060bf1d83f7d404bacb0e5a38f2d4d8f4c9dfb7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:56:54 -0400 Subject: bcachefs: Fix snapshot_t() usage in bch2_fs_quota_read_inode() bch2_fs_quota_read_inode() wasn't entirely updated to the bch2_snapshot_tree() helper, which takes rcu lock. Reported-by: syzbot+a3a9a61224ed3b7f0010@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/quota.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index e68b34eab90a..556da0738106 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -560,13 +560,11 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bch_inode_unpacked u; struct bch_snapshot_tree s_t; - int ret; + u32 tree = bch2_snapshot_tree(c, k.k->p.snapshot); - ret = bch2_snapshot_tree_lookup(trans, - bch2_snapshot_tree(c, k.k->p.snapshot), &s_t); + int ret = bch2_snapshot_tree_lookup(trans, tree, &s_t); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "%s: snapshot tree %u not found", __func__, - snapshot_t(c, k.k->p.snapshot)->tree); + "%s: snapshot tree %u not found", __func__, tree); if (ret) return ret; -- cgit v1.2.3 From 88ab10186c44d0a8b90842beab8648b5fd14432d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 08:40:46 -0400 Subject: bcachefs: Add missing skcipher_request_set_callback() call Signed-off-by: Kent Overstreet --- fs/bcachefs/checksum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 7ed779b411f6..088fd2e7bdf1 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -102,6 +102,7 @@ static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm, int ret; skcipher_request_set_sync_tfm(req, tfm); + skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, len, nonce.d); ret = crypto_skcipher_encrypt(req); -- cgit v1.2.3 From 71dac2482ad3c8d4a8b8998a96751f009bad895f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 09:10:29 -0400 Subject: bcachefs: BCH_SB_LAYOUT_SIZE_BITS_MAX Define a constant for the max superblock size, to avoid a too-large shift. Reported-by: syzbot+a8b0fb419355c91dda7f@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 2 ++ fs/bcachefs/super-io.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 8345a2b2d05b..2e8b1a489c20 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -903,6 +903,8 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re #define BCH_SB_SECTOR 8 #define BCH_SB_MEMBERS_MAX 64 /* XXX kill */ +#define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */ + struct bch_sb_layout { __uuid_t magic; /* bcachefs superblock UUID */ __u8 layout_type; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e7527d551e3c..989d16bba8f0 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -649,7 +649,7 @@ reread: bytes = vstruct_bytes(sb->sb); - if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { + if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) { prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", bytes, 512UL << sb->sb->layout.sb_max_size_bits); return -BCH_ERR_invalid_sb_too_big; -- cgit v1.2.3 From 54541c1f78e12a78487ae63e2e199a7d4f6dbd26 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 20:49:24 -0400 Subject: bcachefs: Fix race in bch2_write_super() bch2_write_super() was looping over online devices multiple times - dropping and retaking io_ref each time. This meant it could race with device removal; it could increment the sequence number on a device but fail to write it - and then if the device was re-added, it would get confused the next time around thinking a superblock write was silently dropped. Fix this by taking io_ref once, and stashing pointers to online devices in a darray. Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 989d16bba8f0..bfdb15e7d778 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -923,6 +923,7 @@ int bch2_write_super(struct bch_fs *c) struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; + DARRAY(struct bch_dev *) online_devices = {}; int ret = 0; trace_and_count(c, write_super, c, _RET_IP_); @@ -935,6 +936,15 @@ int bch2_write_super(struct bch_fs *c) closure_init_stack(cl); memset(&sb_written, 0, sizeof(sb_written)); + for_each_online_member(c, ca) { + ret = darray_push(&online_devices, ca); + if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) { + percpu_ref_put(&ca->io_ref); + goto out; + } + percpu_ref_get(&ca->io_ref); + } + /* Make sure we're using the new magic numbers: */ c->disk_sb.sb->magic = BCHFS_MAGIC; c->disk_sb.sb->layout.magic = BCHFS_MAGIC; @@ -942,8 +952,8 @@ int bch2_write_super(struct bch_fs *c) le64_add_cpu(&c->disk_sb.sb->seq, 1); struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); - for_each_online_member(c, ca) - __bch2_members_v2_get_mut(mi, ca->dev_idx)->seq = c->disk_sb.sb->seq; + darray_for_each(online_devices, ca) + __bch2_members_v2_get_mut(mi, (*ca)->dev_idx)->seq = c->disk_sb.sb->seq; c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds()); if (test_bit(BCH_FS_error, &c->flags)) @@ -959,16 +969,15 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_errors_from_cpu(c); bch2_sb_downgrade_update(c); - for_each_online_member(c, ca) - bch2_sb_from_fs(c, ca); + darray_for_each(online_devices, ca) + bch2_sb_from_fs(c, (*ca)); - for_each_online_member(c, ca) { + darray_for_each(online_devices, ca) { printbuf_reset(&err); - ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE); + ret = bch2_sb_validate(&(*ca)->disk_sb, &err, WRITE); if (ret) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); - percpu_ref_put(&ca->io_ref); goto out; } } @@ -995,16 +1004,18 @@ int bch2_write_super(struct bch_fs *c) return -BCH_ERR_sb_not_downgraded; } - for_each_online_member(c, ca) { - __set_bit(ca->dev_idx, sb_written.d); - ca->sb_write_error = 0; + darray_for_each(online_devices, ca) { + __set_bit((*ca)->dev_idx, sb_written.d); + (*ca)->sb_write_error = 0; } - for_each_online_member(c, ca) - read_back_super(c, ca); + darray_for_each(online_devices, ca) + read_back_super(c, *ca); closure_sync(cl); - for_each_online_member(c, ca) { + darray_for_each(online_devices, cap) { + struct bch_dev *ca = *cap; + if (ca->sb_write_error) continue; @@ -1031,17 +1042,20 @@ int bch2_write_super(struct bch_fs *c) do { wrote = false; - for_each_online_member(c, ca) + darray_for_each(online_devices, cap) { + struct bch_dev *ca = *cap; if (!ca->sb_write_error && sb < ca->disk_sb.sb->layout.nr_superblocks) { write_one_super(c, ca, sb); wrote = true; } + } closure_sync(cl); sb++; } while (wrote); - for_each_online_member(c, ca) { + darray_for_each(online_devices, cap) { + struct bch_dev *ca = *cap; if (ca->sb_write_error) __clear_bit(ca->dev_idx, sb_written.d); else @@ -1077,6 +1091,9 @@ int bch2_write_super(struct bch_fs *c) out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); + darray_for_each(online_devices, ca) + percpu_ref_put(&(*ca)->io_ref); + darray_exit(&online_devices); printbuf_exit(&err); return ret; } -- cgit v1.2.3 From 6e297a73bccf852e7716207caa8eb868737c7155 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 23:11:43 -0400 Subject: bcachefs: Add missing sched_annotate_sleep() in bch2_journal_flush_seq_async() Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 9c2af544251a..a8b08e76d0d0 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -706,6 +706,12 @@ recheck_need_open: spin_unlock(&j->lock); + /* + * We're called from bch2_journal_flush_seq() -> wait_event(); + * but this might block. We won't usually block, so we won't + * livelock: + */ + sched_annotate_sleep(); ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); if (ret) return ret; -- cgit v1.2.3