summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-12-20 00:08:11 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-12-23 23:40:46 -0500
commit339c51397e9f6b3c377e8559a81ca74e9bbfd493 (patch)
treedfa7a6d1b607be5102e0d792ca72e5d129ad3bcd
parent5be20aa87df1bc2723f5f41e37c6c4aa4e0a6093 (diff)
bcachefs: Zone support for journal codezones
For zoned devices, we need to ensure we close buckets we're no longer writing to, since some (flash) devices have a limit on the number of active zones. Also, on startup, if we're going to continue appending to a partially-written bucket we need to query the zone's write pointer. This patch updates the journal code to: - On startup, we now query the write pointer for the bucket cur_idx points to - On startup, we ensure all journal buckets except that the one cur_idx points to are closed - In the journal write path, we factor out journal_close_buckets(), which now increments cur_idx when a bucket fills up so that we can start allocating from the next one - it now also issues the appropriate zone command to close the previous bucket. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/journal_io.c106
1 files changed, 72 insertions, 34 deletions
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index b0ce77655596..824b91e9bf3d 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1110,6 +1110,38 @@ found:
BUG_ON(!wrote);
ja->sectors_free = cur_bucket_capacity - min(wrote, cur_bucket_capacity);
+ if (ca->zoned) {
+ struct blk_zone zone;
+ unsigned i;
+
+ if (!bch2_zone_report(ca->disk_sb.bdev,
+ bucket_to_sector(ca, ja->buckets[ja->cur_idx]),
+ &zone) &&
+ zone.type != BLK_ZONE_TYPE_CONVENTIONAL &&
+ blk_zone_writeable(zone)) {
+ if (bch2_fs_inconsistent_on(zone.capacity - zone.wp > ja->sectors_free, c,
+ "device claims %llu sectors written to current journal bucket but found %u",
+ zone.wp, ca->mi.bucket_size - ja->sectors_free)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ja->sectors_free = zone.capacity - zone.wp;
+ }
+
+ /* Make sure all other nonempty journal buckets are closed: */
+ for (i = 0; i < ja->nr; i++) {
+ if (i == ja->cur_idx)
+ continue;
+
+ if (!bch2_zone_report(ca->disk_sb.bdev,
+ bucket_to_sector(ca, ja->buckets[ja->cur_idx]),
+ &zone) &&
+ zone.cond != BLK_ZONE_COND_EMPTY)
+ bch2_bucket_finish(ca, ja->buckets[i]);
+ }
+ }
+
/*
* Set dirty_idx to indicate the entire journal is full and needs to be
* reclaimed - journal reclaim will immediately reclaim whatever isn't
@@ -1374,6 +1406,34 @@ fsck_err:
/* journal write: */
+static void journal_close_buckets(struct journal *j, unsigned sectors)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ for_each_rw_member(c, ca) {
+ struct journal_device *ja = &ca->journal;
+
+ if (sectors > ja->sectors_free &&
+ sectors <= ca->mi.bucket_size &&
+ bch2_journal_dev_buckets_available(j, ja,
+ journal_space_discarded)) {
+ spin_unlock(&j->lock);
+ bch2_bucket_finish(ca, ja->buckets[ja->cur_idx]);
+ spin_lock(&j->lock);
+
+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
+ ja->sectors_free = bucket_capacity(ca, ja->buckets[ja->cur_idx]);
+
+ /*
+ * ja->bucket_seq[ja->cur_idx] must always have
+ * something sensible:
+ */
+ ja->bucket_seq[ja->cur_idx] =
+ le64_to_cpu(journal_last_unwritten_seq(j));
+ }
+ }
+}
+
static void __journal_write_alloc(struct journal *j,
struct journal_buf *w,
struct dev_alloc_list *devs_sorted,
@@ -1441,59 +1501,37 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_devs_mask devs;
- struct journal_device *ja;
- struct bch_dev *ca;
struct dev_alloc_list devs_sorted;
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
unsigned target = c->opts.metadata_target ?:
c->opts.foreground_target;
- unsigned i, replicas = 0, replicas_want =
+ unsigned replicas = 0, replicas_want =
READ_ONCE(c->opts.metadata_replicas);
+ bool did_close = false;
- rcu_read_lock();
retry:
+ rcu_read_lock();
devs = target_rw_devs(c, BCH_DATA_journal, target);
-
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
__journal_write_alloc(j, w, &devs_sorted,
sectors, &replicas, replicas_want);
+ rcu_read_unlock();
- if (replicas >= replicas_want)
- goto done;
-
- for (i = 0; i < devs_sorted.nr; i++) {
- ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
- if (!ca)
- continue;
-
- ja = &ca->journal;
-
- if (sectors > ja->sectors_free &&
- sectors <= ca->mi.bucket_size &&
- bch2_journal_dev_buckets_available(j, ja,
- journal_space_discarded)) {
- ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
- ja->sectors_free = bucket_capacity(ca, ja->buckets[ja->cur_idx]);
-
- /*
- * ja->bucket_seq[ja->cur_idx] must always have
- * something sensible:
- */
- ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
- }
+ if (replicas < replicas_want && !did_close) {
+ journal_close_buckets(j, sectors);
+ did_close = true;
+ goto retry;
}
- __journal_write_alloc(j, w, &devs_sorted,
- sectors, &replicas, replicas_want);
-
if (replicas < replicas_want && target) {
- /* Retry from all devices: */
+ /*
+ * Retry from all devices
+ * XXX: this should be configurable
+ */
target = 0;
goto retry;
}
-done:
- rcu_read_unlock();
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);