summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-01-21 17:42:25 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2025-01-21 18:32:05 -0500
commit2c5d8a8347a8ea8c243245e2ed8d87dfbf9f54fb (patch)
treee6f98cc208966943bf1ae10fcaa0e3e2f0399712
parentf917016f69ba5a0c42a3a3327792707315f2a937 (diff)
bcachefs: "Journal stuck" timeout now takes into account device latency
If a block device (e.g. your typical consumer SSD) is taking multiple seconds for IOs (typically flushes), we don't want to emit the "journal stuck" message prematurely. Also, make sure to drop the btree_trans srcu lock if we're blocking for more than a second. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/btree_trans_commit.c2
-rw-r--r--fs/bcachefs/journal.c32
-rw-r--r--fs/bcachefs/journal.h7
3 files changed, 33 insertions, 8 deletions
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 6b79b672e0b1..2760dd9569ed 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
- trans->journal_u64s, flags);
+ trans->journal_u64s, flags, trans);
}
#define JSET_ENTRY_LOG_U64s 4
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 2cd20114b74b..46d53d3ba018 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -601,6 +601,16 @@ out:
: -BCH_ERR_journal_res_get_blocked;
}
+static unsigned max_dev_latency(struct bch_fs *c)
+{
+ u64 nsecs = 0;
+
+ for_each_rw_member(c, ca)
+ nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
+
+ return nsecs_to_jiffies(nsecs);
+}
+
/*
* Essentially the entry function to the journaling code. When bcachefs is doing
* a btree insert, it calls this function to get the current journal write.
@@ -612,17 +622,31 @@ out:
* btree node write locks.
*/
int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
- unsigned flags)
+ unsigned flags,
+ struct btree_trans *trans)
{
int ret;
if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
(flags & JOURNAL_RES_GET_NONBLOCK),
- HZ * 10))
+ HZ))
return ret;
+ if (trans)
+ bch2_trans_unlock_long(trans);
+
struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10);
+
+ remaining_wait = max(0, remaining_wait - HZ);
+
+ if (closure_wait_event_timeout(&j->async_wait,
+ (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
+ (flags & JOURNAL_RES_GET_NONBLOCK),
+ remaining_wait))
+ return ret;
+
struct printbuf buf = PRINTBUF;
bch2_journal_debug_to_text(&buf, j);
bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
@@ -727,7 +751,7 @@ recheck_need_open:
* livelock:
*/
sched_annotate_sleep();
- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;
@@ -848,7 +872,7 @@ out:
static int __bch2_journal_meta(struct journal *j)
{
struct journal_res res = {};
- int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index cb0df0663946..a01dae1a57e3 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -312,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j,
}
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
- unsigned);
+ unsigned, struct btree_trans *);
/* First bits for BCH_WATERMARK: */
enum journal_res_flags {
@@ -368,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j,
}
static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
- unsigned u64s, unsigned flags)
+ unsigned u64s, unsigned flags,
+ struct btree_trans *trans)
{
int ret;
@@ -380,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
if (journal_res_get_fast(j, res, flags))
goto out;
- ret = bch2_journal_res_get_slowpath(j, res, flags);
+ ret = bch2_journal_res_get_slowpath(j, res, flags, trans);
if (ret)
return ret;
out: