diff options
Diffstat (limited to 'fs/bcachefs/journal_reclaim.c')
-rw-r--r-- | fs/bcachefs/journal_reclaim.c | 226 |
1 files changed, 114 insertions, 112 deletions
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index cd6201741c59..f23e5ee9ad75 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne return (struct journal_space) { 0, 0 }; /* + * It's possible for bucket size to be misaligned w.r.t. the filesystem + * block size: + */ + min_bucket_size = round_down(min_bucket_size, block_sectors(c)); + + /* * We sorted largest to smallest, and we want the smallest out of the * @nr_devs_want largest devices: */ @@ -215,8 +221,8 @@ void bch2_journal_space_available(struct journal *j) if (nr_online < metadata_replicas_required(c)) { if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" "rw journal devs:", nr_online, metadata_replicas_required(c)); @@ -224,7 +230,6 @@ void bch2_journal_space_available(struct journal *j) prt_printf(&buf, " %s", ca->name); bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); } ret = bch_err_throw(c, insufficient_journal_devices); goto out; @@ -274,11 +279,8 @@ static bool __should_discard_bucket(struct journal *j, struct journal_device *ja static bool should_discard_bucket(struct journal *j, struct journal_device *ja) { - spin_lock(&j->lock); - bool ret = __should_discard_bucket(j, ja); - spin_unlock(&j->lock); - - return ret; + guard(spinlock)(&j->lock); + return __should_discard_bucket(j, ja); } /* @@ -289,7 +291,7 @@ void bch2_journal_do_discards(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - mutex_lock(&j->discard_lock); + guard(mutex)(&j->discard_lock); for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) { struct journal_device *ja = &ca->journal; @@ -303,15 +305,12 @@ void bch2_journal_do_discards(struct journal *j) ja->buckets[ja->discard_idx]), ca->mi.bucket_size, GFP_NOFS); - spin_lock(&j->lock); - ja->discard_idx = (ja->discard_idx + 1) % ja->nr; - - bch2_journal_space_available(j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + ja->discard_idx = (ja->discard_idx + 1) % ja->nr; + bch2_journal_space_available(j); + } } } - - mutex_unlock(&j->discard_lock); } /* @@ -352,9 +351,8 @@ bool __bch2_journal_pin_put(struct journal *j, u64 seq) void bch2_journal_pin_put(struct journal *j, u64 seq) { if (__bch2_journal_pin_put(j, seq)) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_reclaim_fast(j); - spin_unlock(&j->lock); } } @@ -387,10 +385,9 @@ static inline bool __journal_pin_drop(struct journal *j, void bch2_journal_pin_drop(struct journal *j, struct journal_entry_pin *pin) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); if (__journal_pin_drop(j, pin)) bch2_journal_reclaim_fast(j); - spin_unlock(&j->lock); } static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin, @@ -437,7 +434,7 @@ void bch2_journal_pin_copy(struct journal *j, struct journal_entry_pin *src, journal_pin_flush_fn flush_fn) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); u64 seq = READ_ONCE(src->seq); @@ -448,7 +445,6 @@ void bch2_journal_pin_copy(struct journal *j, * longer to exist, but that means there's no longer anything to * copy and we can bail out here: */ - spin_unlock(&j->lock); return; } @@ -465,31 +461,32 @@ void bch2_journal_pin_copy(struct journal *j, */ if (seq == journal_last_seq(j)) journal_wake(j); - spin_unlock(&j->lock); } void bch2_journal_pin_set(struct journal *j, u64 seq, struct journal_entry_pin *pin, journal_pin_flush_fn flush_fn) { - spin_lock(&j->lock); + bool wake; - BUG_ON(seq < journal_last_seq(j)); + scoped_guard(spinlock, &j->lock) { + BUG_ON(seq < journal_last_seq(j)); - bool reclaim = __journal_pin_drop(j, pin); + bool reclaim = __journal_pin_drop(j, pin); - bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); + bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); - if (reclaim) - bch2_journal_reclaim_fast(j); - /* - * If the journal is currently full, we might want to call flush_fn - * immediately: - */ - if (seq == journal_last_seq(j)) - journal_wake(j); + if (reclaim) + bch2_journal_reclaim_fast(j); + /* + * If the journal is currently full, we might want to call flush_fn + * immediately: + */ + wake = seq == journal_last_seq(j); + } - spin_unlock(&j->lock); + if (wake) + journal_wake(j); } /** @@ -574,17 +571,17 @@ static size_t journal_flush_pins(struct journal *j, j->last_flushed = jiffies; - spin_lock(&j->lock); - pin = journal_get_next_pin(j, seq_to_flush, - allowed_below, - allowed_above, &seq); - if (pin) { - BUG_ON(j->flush_in_progress); - j->flush_in_progress = pin; - j->flush_in_progress_dropped = false; - flush_fn = pin->flush; + scoped_guard(spinlock, &j->lock) { + pin = journal_get_next_pin(j, seq_to_flush, + allowed_below, + allowed_above, &seq); + if (pin) { + BUG_ON(j->flush_in_progress); + j->flush_in_progress = pin; + j->flush_in_progress_dropped = false; + flush_fn = pin->flush; + } } - spin_unlock(&j->lock); if (!pin) break; @@ -597,13 +594,13 @@ static size_t journal_flush_pins(struct journal *j, err = flush_fn(j, pin, seq); - spin_lock(&j->lock); - /* Pin might have been dropped or rearmed: */ - if (likely(!err && !j->flush_in_progress_dropped)) - list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); - j->flush_in_progress = NULL; - j->flush_in_progress_dropped = false; - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + /* Pin might have been dropped or rearmed: */ + if (likely(!err && !j->flush_in_progress_dropped)) + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); + j->flush_in_progress = NULL; + j->flush_in_progress_dropped = false; + } wake_up(&j->pin_flush_wait); @@ -764,9 +761,8 @@ static int bch2_journal_reclaim_thread(void *arg) j->reclaim_kicked = false; - mutex_lock(&j->reclaim_lock); - ret = __bch2_journal_reclaim(j, false, kicked); - mutex_unlock(&j->reclaim_lock); + scoped_guard(mutex, &j->reclaim_lock) + ret = __bch2_journal_reclaim(j, false, kicked); now = jiffies; delay = msecs_to_jiffies(c->opts.journal_reclaim_delay); @@ -782,9 +778,8 @@ static int bch2_journal_reclaim_thread(void *arg) if (j->reclaim_kicked) break; - spin_lock(&j->lock); - journal_empty = fifo_empty(&j->pin); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + journal_empty = fifo_empty(&j->pin); long timeout = j->next_reclaim - jiffies; @@ -838,10 +833,10 @@ int bch2_journal_reclaim_start(struct journal *j) static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, unsigned types) { + guard(spinlock)(&j->lock); + struct journal_entry_pin_list *pin_list; u64 seq; - - spin_lock(&j->lock); fifo_for_each_entry_ptr(pin_list, &j->pin, seq) { if (seq > seq_to_flush) break; @@ -849,12 +844,9 @@ static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) if ((BIT(i) & types) && (!list_empty(&pin_list->unflushed[i]) || - !list_empty(&pin_list->flushed[i]))) { - spin_unlock(&j->lock); + !list_empty(&pin_list->flushed[i]))) return true; - } } - spin_unlock(&j->lock); return false; } @@ -875,32 +867,54 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, if (ret) return ret; - mutex_lock(&j->reclaim_lock); + guard(mutex)(&j->reclaim_lock); for (int type = JOURNAL_PIN_TYPE_NR - 1; type >= 0; --type) if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) { *did_work = true; - goto unlock; + + /* + * Question from Dan Carpenter, on the early return: + * + * If journal_flush_pins_or_still_flushing() returns + * true, then the flush hasn't complete and we must + * return 0; we want the outer closure_wait_event() in + * journal_flush_pins() to continue. + * + * The early return is there because we don't want to + * call journal_entry_close() until we've finished + * flushing all outstanding journal pins - otherwise + * seq_to_flush can be U64_MAX, and we'll close a bunch + * of journal entries and write tiny ones completely + * unnecessarily. + * + * Having the early return be in the loop where we loop + * over types is important, because flushing one journal + * pin can cause new journal pins to be added (even of + * the same type, btree node writes may generate more + * btree node writes, when updating the parent pointer + * has a full node and has to trigger a split/compact). + * + * This is part of our shutdown sequence, where order of + * flushing is important in order to make sure that it + * terminates... + */ + return 0; } if (seq_to_flush > journal_cur_seq(j)) bch2_journal_entry_close(j); - spin_lock(&j->lock); /* * If journal replay hasn't completed, the unreplayed journal entries * hold refs on their corresponding sequence numbers */ + guard(spinlock)(&j->lock); ret = !test_bit(JOURNAL_replay_done, &j->flags) || journal_last_seq(j) > seq_to_flush || !fifo_used(&j->pin); - - spin_unlock(&j->lock); -unlock: - mutex_unlock(&j->reclaim_lock); - return ret; } @@ -925,13 +939,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) u64 iter, seq = 0; int ret = 0; - spin_lock(&j->lock); - fifo_for_each_entry_ptr(p, &j->pin, iter) - if (dev_idx >= 0 - ? bch2_dev_list_has_dev(p->devs, dev_idx) - : p->devs.nr < c->opts.metadata_replicas) - seq = iter; - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + fifo_for_each_entry_ptr(p, &j->pin, iter) + if (dev_idx >= 0 + ? bch2_dev_list_has_dev(p->devs, dev_idx) + : p->devs.nr < c->opts.metadata_replicas) + seq = iter; bch2_journal_flush_pins(j, seq); @@ -939,7 +952,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) if (ret) return ret; - mutex_lock(&c->replicas_gc_lock); + guard(mutex)(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_journal); /* @@ -954,29 +967,25 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) goto err; seq = 0; - spin_lock(&j->lock); - while (!ret) { - union bch_replicas_padded replicas; + scoped_guard(spinlock, &j->lock) + while (!ret) { + union bch_replicas_padded replicas; - seq = max(seq, journal_last_seq(j)); - if (seq >= j->pin.back) - break; - bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, - journal_seq_pin(j, seq)->devs); - seq++; + seq = max(seq, journal_last_seq(j)); + if (seq >= j->pin.back) + break; + bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, + journal_seq_pin(j, seq)->devs); + seq++; - if (replicas.e.nr_devs) { - spin_unlock(&j->lock); - ret = bch2_mark_replicas(c, &replicas.e); - spin_lock(&j->lock); + if (replicas.e.nr_devs) { + spin_unlock(&j->lock); + ret = bch2_mark_replicas(c, &replicas.e); + spin_lock(&j->lock); + } } - } - spin_unlock(&j->lock); err: - ret = bch2_replicas_gc_end(c, ret); - mutex_unlock(&c->replicas_gc_lock); - - return ret; + return bch2_replicas_gc_end(c, ret); } bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) @@ -984,20 +993,16 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 struct journal_entry_pin_list *pin_list; struct journal_entry_pin *pin; - spin_lock(&j->lock); - if (!test_bit(JOURNAL_running, &j->flags)) { - spin_unlock(&j->lock); + guard(spinlock)(&j->lock); + guard(printbuf_atomic)(out); + + if (!test_bit(JOURNAL_running, &j->flags)) return true; - } *seq = max(*seq, j->pin.front); - if (*seq >= j->pin.back) { - spin_unlock(&j->lock); + if (*seq >= j->pin.back) return true; - } - - out->atomic++; pin_list = journal_seq_pin(j, *seq); @@ -1016,9 +1021,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 printbuf_indent_sub(out, 2); - --out->atomic; - spin_unlock(&j->lock); - return false; } |