summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-08-16 12:31:53 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2024-08-22 10:04:41 -0400
commit436ae1908e00ffb6b0b876bb4c4e59b54af71a48 (patch)
tree2c4eb22153247923e71d83906eafcd4ab6b8cec5
parent8b5bcc96651e2ab1fd73de1b22a630d333cd5aef (diff)
bcachefs: Fix deadlock in __wait_on_freeing_inode()
We can't call __wait_on_freeing_inode() with btree locks held; we're waiting on another thread that's in evict(), and before it clears that bit it needs to write that inode to flush timestamps - deadlock. Fixing this involves a fair amount of re-jiggering to plumb a new transaction restart. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/fs.c70
1 files changed, 48 insertions, 22 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 85b697041f32..eca0b073a1cf 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -185,12 +185,12 @@ static void __wait_on_freeing_inode(struct inode *inode)
finish_wait(wq, &wait.wq_entry);
}
-static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans,
+ subvol_inum inum)
{
struct bch_inode_info *inode;
repeat:
- inode = rhashtable_lookup_fast(&c->vfs_inodes_table, &inum,
- bch2_vfs_inodes_params);
+ inode = rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
if (inode) {
spin_lock(&inode->v.i_lock);
if (!test_bit(EI_INODE_HASHED, &inode->ei_flags)) {
@@ -198,7 +198,15 @@ repeat:
return NULL;
}
if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {
- __wait_on_freeing_inode(&inode->v);
+ if (!trans) {
+ __wait_on_freeing_inode(&inode->v);
+ } else {
+ bch2_trans_unlock(trans);
+ __wait_on_freeing_inode(&inode->v);
+ int ret = bch2_trans_relock(trans);
+ if (ret)
+ return ERR_PTR(ret);
+ }
goto repeat;
}
__iget(&inode->v);
@@ -222,7 +230,9 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod
}
}
-static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bch_inode_info *inode)
+static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
+ struct btree_trans *trans,
+ struct bch_inode_info *inode)
{
struct bch_inode_info *old = inode;
@@ -231,7 +241,7 @@ retry:
if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
&inode->hash,
bch2_vfs_inodes_params))) {
- old = bch2_inode_hash_find(c, inode->ei_inum);
+ old = bch2_inode_hash_find(c, trans, inode->ei_inum);
if (!old)
goto retry;
@@ -250,7 +260,7 @@ retry:
*/
set_nlink(&inode->v, 1);
discard_new_inode(&inode->v);
- inode = old;
+ return old;
} else {
inode_fake_hash(&inode->v);
@@ -259,9 +269,8 @@ retry:
mutex_lock(&c->vfs_inodes_lock);
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
mutex_unlock(&c->vfs_inodes_lock);
+ return inode;
}
-
- return inode;
}
#define memalloc_flags_do(_flags, _do) \
@@ -321,9 +330,24 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
return inode;
}
+static struct bch_inode_info *bch2_inode_hash_init_insert(struct btree_trans *trans,
+ subvol_inum inum,
+ struct bch_inode_unpacked *bi,
+ struct bch_subvolume *subvol)
+{
+ struct bch_inode_info *inode = bch2_new_inode(trans);
+ if (IS_ERR(inode))
+ return inode;
+
+ bch2_vfs_inode_init(trans, inum, inode, bi, subvol);
+
+ return bch2_inode_hash_insert(trans->c, trans, inode);
+
+}
+
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
{
- struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);
+ struct bch_inode_info *inode = bch2_inode_hash_find(c, NULL, inum);
if (inode)
return &inode->v;
@@ -334,11 +358,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
int ret = lockrestart_do(trans,
bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
- PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
- if (!ret) {
- bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
- inode = bch2_inode_hash_insert(c, inode);
- }
+ PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
bch2_trans_put(trans);
return ret ? ERR_PTR(ret) : &inode->v;
@@ -429,8 +449,16 @@ err_before_quota:
* we must insert the new inode into the inode cache before calling
* bch2_trans_exit() and dropping locks, else we could race with another
* thread pulling the inode in and modifying it:
+ *
+ * also, calling bch2_inode_hash_insert() without passing in the
+ * transaction object is sketchy - if we could ever end up in
+ * __wait_on_freeing_inode(), we'd risk deadlock.
+ *
+ * But that shouldn't be possible, since we still have the inode locked
+ * that we just created, and we _really_ can't take a transaction
+ * restart here.
*/
- inode = bch2_inode_hash_insert(c, inode);
+ inode = bch2_inode_hash_insert(c, NULL, inode);
bch2_trans_put(trans);
err:
posix_acl_release(default_acl);
@@ -470,7 +498,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
if (ret)
goto err;
- struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);
+ struct bch_inode_info *inode = bch2_inode_hash_find(c, trans, inum);
if (inode)
goto out;
@@ -478,7 +506,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
struct bch_inode_unpacked inode_u;
ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
- PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
+ PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
c, "dirent to missing inode:\n %s",
@@ -498,9 +526,6 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
ret = -ENOENT;
goto err;
}
-
- bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
- inode = bch2_inode_hash_insert(c, inode);
out:
bch2_trans_iter_exit(trans, &dirent_iter);
printbuf_exit(&buf);
@@ -1541,7 +1566,8 @@ static const struct export_operations bch_export_ops = {
.get_name = bch2_get_name,
};
-static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
+static void bch2_vfs_inode_init(struct btree_trans *trans,
+ subvol_inum inum,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
struct bch_subvolume *subvol)