summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2022-12-15 21:44:32 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2022-12-15 21:48:43 -0500
commitb9fe8a8f22e9771ba84a9958a46fdedc472aa0db (patch)
tree2b215bd614fa7d3bc49aee0db22f2d13372f6b20
parentca2b096ed0403f34418cd4bb3af42d19d1f21005 (diff)
bcachefs: Fix for long running btree transactions & key cache
While a btree transaction is running, we hold a SRCU read lock on the btree key cache that prevents btree key cache keys from being freed - this is so that relock() operations won't access freed memory. The downside of this is that long running btree transactions prevent memory from being freed from the key cache. This adds a check in bch2_trans_begin() - if the transaction has been running longer than 1 second, drop and retake the SRCU read lock and zero out pointers to unlock key cache paths. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/btree_iter.c18
-rw-r--r--fs/bcachefs/btree_types.h1
-rw-r--r--fs/bcachefs/errcode.h1
3 files changed, 20 insertions, 0 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index fd4358eb0130..9c139a7b4306 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2781,6 +2781,20 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
return p;
}
+static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
+
+ trans_for_each_path(trans, path)
+ if (path->cached && !btree_node_locked(path, 0))
+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
+}
+
/**
* bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset
@@ -2836,6 +2850,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
bch2_trans_relock(trans);
}
+ if (unlikely(time_after(jiffies, trans->srcu_lock_time + HZ)))
+ bch2_trans_reset_srcu_lock(trans);
+
trans->last_restarted_ip = _RET_IP_;
if (trans->restarted)
bch2_btree_path_traverse_all(trans);
@@ -2925,6 +2942,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
trans->nr_max_paths = s->nr_max_paths;
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
mutex_lock(&c->btree_trans_lock);
list_for_each_entry(pos, &c->btree_trans_list, list) {
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index c074e4e6da65..af86ba12e3a4 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -421,6 +421,7 @@ struct btree_trans {
enum bch_errcode restarted:16;
u32 restart_count;
unsigned long last_restarted_ip;
+ unsigned long srcu_lock_time;
/*
* For when bch2_trans_update notices we'll be splitting a compressed
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 543cdb553188..6217096494e6 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -52,6 +52,7 @@
x(BCH_ERR_no_btree_node, no_btree_node_down) \
x(BCH_ERR_no_btree_node, no_btree_node_init) \
x(BCH_ERR_no_btree_node, no_btree_node_cached) \
+ x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \
x(0, btree_insert_fail) \
x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \
x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \