summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-12-24 06:24:11 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2025-04-29 13:44:29 -0400
commitc8323bdc1ea0f2265977cd290ffe0c076b467556 (patch)
treeb9a0f9045cf82193bae095b17439f3e4b0e81aec
parentb9613dce3ed56a9c65d742ec245a495badee4989 (diff)
bcachefs: check_allocations_done_lock
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_gc.c26
-rw-r--r--fs/bcachefs/btree_gc.h2
-rw-r--r--fs/bcachefs/btree_io.c3
-rw-r--r--fs/bcachefs/btree_trans_commit.c16
-rw-r--r--fs/bcachefs/btree_update.h3
-rw-r--r--fs/bcachefs/btree_update_interior.c14
-rw-r--r--fs/bcachefs/disk_accounting.c4
-rw-r--r--fs/bcachefs/reflink.c4
-rw-r--r--fs/bcachefs/super.c1
10 files changed, 65 insertions, 10 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 494967290e72..7670a2302f9c 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -1023,6 +1023,8 @@ struct bch_fs {
*/
seqcount_t gc_pos_lock;
struct gc_pos gc_pos;
+ struct percpu_rw_semaphore
+ check_allocations_done_lock;
struct mutex gc_gens_lock;
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 64259f3b1988..411fc28a862c 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -973,7 +973,9 @@ static int bch2_gc_alloc_done(struct bch_fs *c)
POS(ca->dev_idx, ca->mi.first_bucket),
POS(ca->dev_idx, ca->mi.nbuckets - 1),
BTREE_ITER_slots|BTREE_ITER_prefetch, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_check_allocations_lock_held,
bch2_alloc_write_key(trans, &iter, ca, k)));
if (ret) {
bch2_dev_put(ca);
@@ -1062,7 +1064,9 @@ static int bch2_gc_stripes_done(struct bch_fs *c)
for_each_btree_key_commit(trans, iter,
BTREE_ID_stripes, POS_MIN,
BTREE_ITER_prefetch, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_check_allocations_lock_held,
bch2_gc_write_stripes_key(trans, &iter, k)));
}
@@ -1114,10 +1118,14 @@ int bch2_check_allocations(struct bch_fs *c)
c->gc_count++;
+ percpu_down_write(&c->check_allocations_done_lock);
+ bch2_btree_interior_updates_flush(c);
+
ret = bch2_gc_alloc_done(c) ?:
bch2_gc_accounting_done(c) ?:
bch2_gc_stripes_done(c) ?:
bch2_gc_reflink_done(c);
+ percpu_up_write(&c->check_allocations_done_lock);
out:
percpu_down_write(&c->mark_lock);
/* Indicates that gc is no longer in progress: */
@@ -1306,10 +1314,24 @@ void bch2_gc_gens_async(struct bch_fs *c)
enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
+void bch2_fs_btree_gc_exit(struct bch_fs *c)
+{
+ percpu_free_rwsem(&c->check_allocations_done_lock);
+}
+
void bch2_fs_btree_gc_init_early(struct bch_fs *c)
{
seqcount_init(&c->gc_pos_lock);
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
+ int ret = percpu_init_rwsem(&c->check_allocations_done_lock);
+ if (ret)
+ return ret;
+
mutex_init(&c->gc_gens_lock);
}
+
+int bch2_fs_btree_gc_init(struct bch_fs *c)
+{
+ return percpu_init_rwsem(&c->check_allocations_done_lock);
+}
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index 109ac5c67961..40b22052f844 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -84,6 +84,8 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
int bch2_gc_gens(struct bch_fs *);
void bch2_gc_gens_async(struct bch_fs *);
+void bch2_fs_btree_gc_exit(struct bch_fs *);
void bch2_fs_btree_gc_init_early(struct bch_fs *);
+int bch2_fs_btree_gc_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_GC_H */
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 28b53310912c..d189daee6d64 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -2146,7 +2146,8 @@ static void btree_node_write_work(struct work_struct *work)
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc|
- BCH_TRANS_COMMIT_no_check_rw;
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_check_allocations_lock_held;
u64 start_time = wbio->start_time;
int ret = 0;
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index cdde769e7da3..885fb030c85e 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -990,9 +990,18 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
!trans->journal_entries_u64s)
goto out_reset;
+ if (likely(!(flags & BCH_TRANS_COMMIT_check_allocations_lock_held))) {
+ if (unlikely(!percpu_down_read_trylock(&c->check_allocations_done_lock))) {
+ ret = drop_locks_do(trans,
+ (percpu_down_read(&c->check_allocations_done_lock), 0));
+ if (ret)
+ goto out_reset_unlock;
+ }
+ }
+
ret = bch2_trans_commit_run_triggers(trans);
if (ret)
- goto out_reset;
+ goto out_reset_unlock;
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) {
@@ -1000,7 +1009,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
ret = do_bch2_trans_commit_to_journal_replay(trans);
else
ret = -BCH_ERR_erofs_trans_commit;
- goto out_reset;
+ goto out_reset_unlock;
}
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
@@ -1062,6 +1071,9 @@ retry:
out:
if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw)))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_trans);
+out_reset_unlock:
+ if (likely(!(flags & BCH_TRANS_COMMIT_check_allocations_lock_held)))
+ percpu_up_read(&c->check_allocations_done_lock);
out_reset:
if (!ret)
bch2_trans_downgrade(trans);
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 62d24b081e27..f440136b631b 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -28,7 +28,8 @@ void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
"pin journal entry referred to by trans->journal_res.seq") \
x(journal_reclaim, "operation required for journal reclaim; may return error" \
"instead of deadlocking if BCH_WATERMARK_reclaim not specified")\
- x(skip_accounting_apply, "we're in journal replay - accounting updates have already been applied")
+ x(skip_accounting_apply, "we're in journal replay - accounting updates have already been applied")\
+ x(check_allocations_lock_held, "check_allocations_lock_held")
enum __bch_trans_commit_flags {
/* First bits for bch_watermark: */
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index f8a2d0b6ded2..76d20e06d736 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -741,7 +741,8 @@ static void btree_update_nodes_written(struct btree_update *as)
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw|
- BCH_TRANS_COMMIT_journal_reclaim,
+ BCH_TRANS_COMMIT_journal_reclaim|
+ BCH_TRANS_COMMIT_check_allocations_lock_held,
btree_update_nodes_written_trans(trans, as));
bch2_trans_unlock(trans);
@@ -2220,14 +2221,22 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree *parent;
btree_path_idx_t new_path = 0;
+ int ret = 0;
flags |= BCH_TRANS_COMMIT_no_enospc;
+ if (unlikely(!percpu_down_read_trylock(&c->check_allocations_done_lock))) {
+ ret = drop_locks_do(trans,
+ (percpu_down_read(&c->check_allocations_done_lock), 0));
+ if (ret)
+ goto out;
+ }
+
struct btree_path *path = btree_iter_path(trans, iter);
parent = btree_node_parent(path, b);
struct btree_update *as =
bch2_btree_update_start(trans, path, b->c.level, false, target, flags);
- int ret = PTR_ERR_OR_ZERO(as);
+ ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto out;
@@ -2272,6 +2281,7 @@ out:
if (new_path)
bch2_path_put(trans, new_path, true);
bch2_trans_downgrade(trans);
+ percpu_up_read(&c->check_allocations_done_lock);
return ret;
err_free_node:
bch2_btree_node_free_never_used(as, trans, n);
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index 7be71952425c..784a9b298350 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -570,7 +570,9 @@ int bch2_gc_accounting_done(struct bch_fs *c)
if (fsck_err(trans, accounting_mismatch, "%s", buf.buf)) {
percpu_up_write(&c->mark_lock);
- ret = commit_do(trans, NULL, NULL, 0,
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_check_allocations_lock_held,
bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false));
percpu_down_write(&c->mark_lock);
if (ret)
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 3a13dbcab6ba..29179bd36b97 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -826,7 +826,9 @@ int bch2_gc_reflink_done(struct bch_fs *c)
for_each_btree_key_commit(trans, iter,
BTREE_ID_reflink, POS_MIN,
BTREE_ITER_prefetch, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_check_allocations_lock_held,
bch2_gc_write_reflink_key(trans, &iter, k, &idx)));
c->reflink_gc_nr = 0;
return ret;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 16a9632390dc..c8d40e88e850 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -971,6 +971,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
ret =
bch2_fs_async_obj_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
+ bch2_fs_btree_gc_init(c) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?: