diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-11-18 13:21:59 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2020-12-07 11:47:04 -0500 |
commit | 035f993b08d096a0af767089622f4d047f55c672 (patch) | |
tree | abef2d3ab0b6cf8520b6f5fc87a9f94e7448972d | |
parent | bc084ee6bc7b1dc9aa054ee18beb350598d1f087 (diff) |
bcachefs: Be more precise with journal error reporting
We were incorrectly detecting a journal deadlock - the journal filling
up - when only the journal pin fifo had filled up; if the journal pin
fifo is full that just means we need to wait on reclaim.
This plumbs through better error reporting so we can better discriminate
in the journal_res_get path what's going on.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/journal.c | 76 | ||||
-rw-r--r-- | fs/bcachefs/journal_reclaim.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/journal_types.h | 8 |
3 files changed, 49 insertions, 41 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 519e68b3c855..a4cc98f86a88 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -235,7 +235,7 @@ static int journal_entry_open(struct journal *j) BUG_ON(journal_entry_is_open(j)); if (j->blocked) - return -EAGAIN; + return cur_entry_blocked; if (j->cur_entry_error) return j->cur_entry_error; @@ -251,7 +251,7 @@ static int journal_entry_open(struct journal *j) u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); if (u64s <= le32_to_cpu(buf->data->u64s)) - return -ENOSPC; + return cur_entry_journal_full; /* * Must be set before marking the journal entry as open: @@ -263,7 +263,7 @@ static int journal_entry_open(struct journal *j) old.v = new.v = v; if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) - return -EROFS; + return cur_entry_insufficient_devices; /* Handle any already added entries */ new.cur_entry_offset = le32_to_cpu(buf->data->u64s); @@ -376,7 +376,7 @@ retry: * Don't want to close current journal entry, just need to * invoke reclaim: */ - ret = -ENOSPC; + ret = cur_entry_journal_full; goto unlock; } @@ -399,14 +399,16 @@ retry: * there's still a previous one in flight: */ trace_journal_entry_full(c); - ret = -EAGAIN; + ret = cur_entry_blocked; } else { ret = journal_entry_open(j); } unlock: - if ((ret == -EAGAIN || ret == -ENOSPC) && - !j->res_get_blocked_start) + if ((ret && ret != cur_entry_insufficient_devices) && + !j->res_get_blocked_start) { j->res_get_blocked_start = local_clock() ?: 1; + trace_journal_full(c); + } can_discard = j->can_discard; spin_unlock(&j->lock); @@ -414,41 +416,39 @@ unlock: if (!ret) goto retry; - if (ret == -ENOSPC) { - if (WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED), - "JOURNAL_RES_GET_RESERVED set but journal full")) { - char *buf; - - buf = kmalloc(4096, GFP_NOFS); - if (buf) { - bch2_journal_debug_to_text(&_PBUF(buf, 4096), j); - pr_err("\n%s", buf); - kfree(buf); - } + if (WARN_ONCE(ret == cur_entry_journal_full && + !can_discard && + (flags & JOURNAL_RES_GET_RESERVED), + "JOURNAL_RES_GET_RESERVED set but journal full")) { + char *buf; + + buf = kmalloc(4096, GFP_NOFS); + if (buf) { + bch2_journal_debug_to_text(&_PBUF(buf, 4096), j); + pr_err("\n%s", buf); + kfree(buf); } + } - /* - * Journal is full - can't rely on reclaim from work item due to - * freezing: - */ - trace_journal_full(c); - - if (!(flags & JOURNAL_RES_GET_NONBLOCK)) { - if (can_discard) { - bch2_journal_do_discards(j); - goto retry; - } - - if (mutex_trylock(&j->reclaim_lock)) { - bch2_journal_reclaim(j); - mutex_unlock(&j->reclaim_lock); - } + /* + * Journal is full - can't rely on reclaim from work item due to + * freezing: + */ + if ((ret == cur_entry_journal_full || + ret == cur_entry_journal_pin_full) && + !(flags & JOURNAL_RES_GET_NONBLOCK)) { + if (can_discard) { + bch2_journal_do_discards(j); + goto retry; } - ret = -EAGAIN; + if (mutex_trylock(&j->reclaim_lock)) { + bch2_journal_reclaim(j); + mutex_unlock(&j->reclaim_lock); + } } - return ret; + return ret == cur_entry_insufficient_devices ? -EROFS : -EAGAIN; } /* @@ -1070,6 +1070,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) "last_seq_ondisk:\t%llu\n" "prereserved:\t\t%u/%u\n" "current entry sectors:\t%u\n" + "current entry error:\t%u\n" "current entry:\t\t", fifo_used(&j->pin), journal_cur_seq(j), @@ -1077,7 +1078,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) j->last_seq_ondisk, j->prereserved.reserved, j->prereserved.remaining, - j->cur_entry_sectors); + j->cur_entry_sectors, + j->cur_entry_error); switch (s.cur_entry_offset) { case JOURNAL_ENTRY_ERROR_VAL: diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 7a04d06bb342..62eda89b1047 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -164,12 +164,12 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; if (nr_online < c->opts.metadata_replicas_required) { - ret = -EROFS; + ret = cur_entry_insufficient_devices; goto out; } if (!fifo_free(&j->pin)) { - ret = -ENOSPC; + ret = cur_entry_journal_pin_full; goto out; } @@ -180,7 +180,7 @@ void bch2_journal_space_available(struct journal *j) clean = __journal_space_available(j, nr_devs_want, journal_space_clean); if (!discarded.next_entry) - ret = -ENOSPC; + ret = cur_entry_journal_full; overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) * journal_entry_overhead(j); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 9757e3d55991..8a05bb991c2f 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -146,7 +146,13 @@ struct journal { * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if * insufficient devices: */ - int cur_entry_error; + enum { + cur_entry_ok, + cur_entry_blocked, + cur_entry_journal_full, + cur_entry_journal_pin_full, + cur_entry_insufficient_devices, + } cur_entry_error; union journal_preres_state prereserved; |