summaryrefslogtreecommitdiff
path: root/fs/bcachefs/journal_reclaim.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/journal_reclaim.c')
-rw-r--r--fs/bcachefs/journal_reclaim.c226
1 files changed, 114 insertions, 112 deletions
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index cd6201741c59..f23e5ee9ad75 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
return (struct journal_space) { 0, 0 };
/*
+ * It's possible for bucket size to be misaligned w.r.t. the filesystem
+ * block size:
+ */
+ min_bucket_size = round_down(min_bucket_size, block_sectors(c));
+
+ /*
* We sorted largest to smallest, and we want the smallest out of the
* @nr_devs_want largest devices:
*/
@@ -215,8 +221,8 @@ void bch2_journal_space_available(struct journal *j)
if (nr_online < metadata_replicas_required(c)) {
if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
+ CLASS(printbuf, buf)();
+ guard(printbuf_atomic)(&buf);
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
"rw journal devs:", nr_online, metadata_replicas_required(c));
@@ -224,7 +230,6 @@ void bch2_journal_space_available(struct journal *j)
prt_printf(&buf, " %s", ca->name);
bch_err(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
ret = bch_err_throw(c, insufficient_journal_devices);
goto out;
@@ -274,11 +279,8 @@ static bool __should_discard_bucket(struct journal *j, struct journal_device *ja
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
{
- spin_lock(&j->lock);
- bool ret = __should_discard_bucket(j, ja);
- spin_unlock(&j->lock);
-
- return ret;
+ guard(spinlock)(&j->lock);
+ return __should_discard_bucket(j, ja);
}
/*
@@ -289,7 +291,7 @@ void bch2_journal_do_discards(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- mutex_lock(&j->discard_lock);
+ guard(mutex)(&j->discard_lock);
for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) {
struct journal_device *ja = &ca->journal;
@@ -303,15 +305,12 @@ void bch2_journal_do_discards(struct journal *j)
ja->buckets[ja->discard_idx]),
ca->mi.bucket_size, GFP_NOFS);
- spin_lock(&j->lock);
- ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-
- bch2_journal_space_available(j);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+ bch2_journal_space_available(j);
+ }
}
}
-
- mutex_unlock(&j->discard_lock);
}
/*
@@ -352,9 +351,8 @@ bool __bch2_journal_pin_put(struct journal *j, u64 seq)
void bch2_journal_pin_put(struct journal *j, u64 seq)
{
if (__bch2_journal_pin_put(j, seq)) {
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
bch2_journal_reclaim_fast(j);
- spin_unlock(&j->lock);
}
}
@@ -387,10 +385,9 @@ static inline bool __journal_pin_drop(struct journal *j,
void bch2_journal_pin_drop(struct journal *j,
struct journal_entry_pin *pin)
{
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
if (__journal_pin_drop(j, pin))
bch2_journal_reclaim_fast(j);
- spin_unlock(&j->lock);
}
static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
@@ -437,7 +434,7 @@ void bch2_journal_pin_copy(struct journal *j,
struct journal_entry_pin *src,
journal_pin_flush_fn flush_fn)
{
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
u64 seq = READ_ONCE(src->seq);
@@ -448,7 +445,6 @@ void bch2_journal_pin_copy(struct journal *j,
* longer to exist, but that means there's no longer anything to
* copy and we can bail out here:
*/
- spin_unlock(&j->lock);
return;
}
@@ -465,31 +461,32 @@ void bch2_journal_pin_copy(struct journal *j,
*/
if (seq == journal_last_seq(j))
journal_wake(j);
- spin_unlock(&j->lock);
}
void bch2_journal_pin_set(struct journal *j, u64 seq,
struct journal_entry_pin *pin,
journal_pin_flush_fn flush_fn)
{
- spin_lock(&j->lock);
+ bool wake;
- BUG_ON(seq < journal_last_seq(j));
+ scoped_guard(spinlock, &j->lock) {
+ BUG_ON(seq < journal_last_seq(j));
- bool reclaim = __journal_pin_drop(j, pin);
+ bool reclaim = __journal_pin_drop(j, pin);
- bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
+ bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
- if (reclaim)
- bch2_journal_reclaim_fast(j);
- /*
- * If the journal is currently full, we might want to call flush_fn
- * immediately:
- */
- if (seq == journal_last_seq(j))
- journal_wake(j);
+ if (reclaim)
+ bch2_journal_reclaim_fast(j);
+ /*
+ * If the journal is currently full, we might want to call flush_fn
+ * immediately:
+ */
+ wake = seq == journal_last_seq(j);
+ }
- spin_unlock(&j->lock);
+ if (wake)
+ journal_wake(j);
}
/**
@@ -574,17 +571,17 @@ static size_t journal_flush_pins(struct journal *j,
j->last_flushed = jiffies;
- spin_lock(&j->lock);
- pin = journal_get_next_pin(j, seq_to_flush,
- allowed_below,
- allowed_above, &seq);
- if (pin) {
- BUG_ON(j->flush_in_progress);
- j->flush_in_progress = pin;
- j->flush_in_progress_dropped = false;
- flush_fn = pin->flush;
+ scoped_guard(spinlock, &j->lock) {
+ pin = journal_get_next_pin(j, seq_to_flush,
+ allowed_below,
+ allowed_above, &seq);
+ if (pin) {
+ BUG_ON(j->flush_in_progress);
+ j->flush_in_progress = pin;
+ j->flush_in_progress_dropped = false;
+ flush_fn = pin->flush;
+ }
}
- spin_unlock(&j->lock);
if (!pin)
break;
@@ -597,13 +594,13 @@ static size_t journal_flush_pins(struct journal *j,
err = flush_fn(j, pin, seq);
- spin_lock(&j->lock);
- /* Pin might have been dropped or rearmed: */
- if (likely(!err && !j->flush_in_progress_dropped))
- list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
- j->flush_in_progress = NULL;
- j->flush_in_progress_dropped = false;
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock) {
+ /* Pin might have been dropped or rearmed: */
+ if (likely(!err && !j->flush_in_progress_dropped))
+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
+ j->flush_in_progress = NULL;
+ j->flush_in_progress_dropped = false;
+ }
wake_up(&j->pin_flush_wait);
@@ -764,9 +761,8 @@ static int bch2_journal_reclaim_thread(void *arg)
j->reclaim_kicked = false;
- mutex_lock(&j->reclaim_lock);
- ret = __bch2_journal_reclaim(j, false, kicked);
- mutex_unlock(&j->reclaim_lock);
+ scoped_guard(mutex, &j->reclaim_lock)
+ ret = __bch2_journal_reclaim(j, false, kicked);
now = jiffies;
delay = msecs_to_jiffies(c->opts.journal_reclaim_delay);
@@ -782,9 +778,8 @@ static int bch2_journal_reclaim_thread(void *arg)
if (j->reclaim_kicked)
break;
- spin_lock(&j->lock);
- journal_empty = fifo_empty(&j->pin);
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock)
+ journal_empty = fifo_empty(&j->pin);
long timeout = j->next_reclaim - jiffies;
@@ -838,10 +833,10 @@ int bch2_journal_reclaim_start(struct journal *j)
static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush,
unsigned types)
{
+ guard(spinlock)(&j->lock);
+
struct journal_entry_pin_list *pin_list;
u64 seq;
-
- spin_lock(&j->lock);
fifo_for_each_entry_ptr(pin_list, &j->pin, seq) {
if (seq > seq_to_flush)
break;
@@ -849,12 +844,9 @@ static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush,
for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++)
if ((BIT(i) & types) &&
(!list_empty(&pin_list->unflushed[i]) ||
- !list_empty(&pin_list->flushed[i]))) {
- spin_unlock(&j->lock);
+ !list_empty(&pin_list->flushed[i])))
return true;
- }
}
- spin_unlock(&j->lock);
return false;
}
@@ -875,32 +867,54 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
if (ret)
return ret;
- mutex_lock(&j->reclaim_lock);
+ guard(mutex)(&j->reclaim_lock);
for (int type = JOURNAL_PIN_TYPE_NR - 1;
type >= 0;
--type)
if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
*did_work = true;
- goto unlock;
+
+ /*
+ * Question from Dan Carpenter, on the early return:
+ *
+ * If journal_flush_pins_or_still_flushing() returns
+ * true, then the flush hasn't complete and we must
+ * return 0; we want the outer closure_wait_event() in
+ * journal_flush_pins() to continue.
+ *
+ * The early return is there because we don't want to
+ * call journal_entry_close() until we've finished
+ * flushing all outstanding journal pins - otherwise
+ * seq_to_flush can be U64_MAX, and we'll close a bunch
+ * of journal entries and write tiny ones completely
+ * unnecessarily.
+ *
+ * Having the early return be in the loop where we loop
+ * over types is important, because flushing one journal
+ * pin can cause new journal pins to be added (even of
+ * the same type, btree node writes may generate more
+ * btree node writes, when updating the parent pointer
+ * has a full node and has to trigger a split/compact).
+ *
+ * This is part of our shutdown sequence, where order of
+ * flushing is important in order to make sure that it
+ * terminates...
+ */
+ return 0;
}
if (seq_to_flush > journal_cur_seq(j))
bch2_journal_entry_close(j);
- spin_lock(&j->lock);
/*
* If journal replay hasn't completed, the unreplayed journal entries
* hold refs on their corresponding sequence numbers
*/
+ guard(spinlock)(&j->lock);
ret = !test_bit(JOURNAL_replay_done, &j->flags) ||
journal_last_seq(j) > seq_to_flush ||
!fifo_used(&j->pin);
-
- spin_unlock(&j->lock);
-unlock:
- mutex_unlock(&j->reclaim_lock);
-
return ret;
}
@@ -925,13 +939,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
u64 iter, seq = 0;
int ret = 0;
- spin_lock(&j->lock);
- fifo_for_each_entry_ptr(p, &j->pin, iter)
- if (dev_idx >= 0
- ? bch2_dev_list_has_dev(p->devs, dev_idx)
- : p->devs.nr < c->opts.metadata_replicas)
- seq = iter;
- spin_unlock(&j->lock);
+ scoped_guard(spinlock, &j->lock)
+ fifo_for_each_entry_ptr(p, &j->pin, iter)
+ if (dev_idx >= 0
+ ? bch2_dev_list_has_dev(p->devs, dev_idx)
+ : p->devs.nr < c->opts.metadata_replicas)
+ seq = iter;
bch2_journal_flush_pins(j, seq);
@@ -939,7 +952,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
if (ret)
return ret;
- mutex_lock(&c->replicas_gc_lock);
+ guard(mutex)(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
/*
@@ -954,29 +967,25 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
goto err;
seq = 0;
- spin_lock(&j->lock);
- while (!ret) {
- union bch_replicas_padded replicas;
+ scoped_guard(spinlock, &j->lock)
+ while (!ret) {
+ union bch_replicas_padded replicas;
- seq = max(seq, journal_last_seq(j));
- if (seq >= j->pin.back)
- break;
- bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
- journal_seq_pin(j, seq)->devs);
- seq++;
+ seq = max(seq, journal_last_seq(j));
+ if (seq >= j->pin.back)
+ break;
+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+ journal_seq_pin(j, seq)->devs);
+ seq++;
- if (replicas.e.nr_devs) {
- spin_unlock(&j->lock);
- ret = bch2_mark_replicas(c, &replicas.e);
- spin_lock(&j->lock);
+ if (replicas.e.nr_devs) {
+ spin_unlock(&j->lock);
+ ret = bch2_mark_replicas(c, &replicas.e);
+ spin_lock(&j->lock);
+ }
}
- }
- spin_unlock(&j->lock);
err:
- ret = bch2_replicas_gc_end(c, ret);
- mutex_unlock(&c->replicas_gc_lock);
-
- return ret;
+ return bch2_replicas_gc_end(c, ret);
}
bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
@@ -984,20 +993,16 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
- spin_lock(&j->lock);
- if (!test_bit(JOURNAL_running, &j->flags)) {
- spin_unlock(&j->lock);
+ guard(spinlock)(&j->lock);
+ guard(printbuf_atomic)(out);
+
+ if (!test_bit(JOURNAL_running, &j->flags))
return true;
- }
*seq = max(*seq, j->pin.front);
- if (*seq >= j->pin.back) {
- spin_unlock(&j->lock);
+ if (*seq >= j->pin.back)
return true;
- }
-
- out->atomic++;
pin_list = journal_seq_pin(j, *seq);
@@ -1016,9 +1021,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
printbuf_indent_sub(out, 2);
- --out->atomic;
- spin_unlock(&j->lock);
-
return false;
}