summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-09-23 22:12:31 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-02-12 18:40:19 -0500
commit9cf6b84b71adb97f3c19476ebb5a42228fad89b5 (patch)
treed5cda508ff0836cc09bc44d13c17c5a01df8a82a
parent9f734cd076931fa4d7feb5728e5cd95cde0af114 (diff)
bcachefs: CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
Incorrectly handled transaction restarts can be a source of heisenbugs; add a mode where we randomly inject them to shake them out. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/Kconfig7
-rw-r--r--fs/bcachefs/btree_iter.c33
-rw-r--r--fs/bcachefs/btree_iter.h12
-rw-r--r--fs/bcachefs/btree_trans_commit.c4
-rw-r--r--fs/bcachefs/btree_types.h3
5 files changed, 58 insertions, 1 deletions
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index 85eea7a4dea3..fc7efd0a7525 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -61,6 +61,13 @@ config BCACHEFS_DEBUG
The resulting code will be significantly slower than normal; you
probably shouldn't select this option unless you're a developer.
+config BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ bool "Randomly inject transaction restarts"
+ depends on BCACHEFS_DEBUG
+ help
+ Randomly inject transaction restarts in a few core paths - may have a
+ significant performance penalty
+
config BCACHEFS_TESTS
bool "bcachefs unit and performance tests"
depends on BCACHEFS_FS
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 5988219c6908..e32fce4fd258 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2357,6 +2357,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
if (iter->update_path) {
bch2_path_put_nokeep(trans, iter->update_path,
iter->flags & BTREE_ITER_intent);
@@ -2622,6 +2628,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
+ int ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
while (1) {
k = __bch2_btree_iter_peek_prev(iter, search_key);
if (unlikely(!k.k))
@@ -2749,6 +2761,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
/* extents can't span inode numbers: */
if ((iter->flags & BTREE_ITER_is_extents) &&
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
@@ -3106,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (ret)
+ return ERR_PTR(ret);
+
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
@@ -3163,7 +3185,8 @@ out_new_mem:
if (old_bytes) {
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
+ return ERR_PTR(btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
out_change_top:
p = trans->mem + trans->mem_top;
@@ -3271,6 +3294,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->last_begin_ip = _RET_IP_;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (trans->restarted) {
+ trans->restart_count_this_trans++;
+ } else {
+ trans->restart_count_this_trans = 0;
+ }
+#endif
+
trans_set_locked(trans, false);
if (trans->restarted) {
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index b9538e6e6d65..8c16d9a3ec1d 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -355,6 +355,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err)
return btree_trans_restart_ip(trans, err, _THIS_IP_);
}
+static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
+{
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
+ trace_and_count(trans->c, trans_restart_injected, trans, ip);
+ return btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_fault_inject, ip);
+ }
+#endif
+ return 0;
+}
+
bool bch2_btree_node_upgrade(struct btree_trans *,
struct btree_path *, unsigned);
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 2760dd9569ed..c4f524b2ca9a 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -999,6 +999,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
bch2_trans_verify_not_unlocked_or_in_restart(trans);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret))
+ goto out_reset;
+
if (!trans->nr_updates &&
!trans->journal_entries_u64s)
goto out_reset;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index a6f251eb4164..a09cbe9cd94f 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -509,6 +509,9 @@ struct btree_trans {
bool notrace_relock_fail:1;
enum bch_errcode restarted:16;
u32 restart_count;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ u32 restart_count_this_trans;
+#endif
u64 last_begin_time;
unsigned long last_begin_ip;