summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-12-20 00:07:29 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-12-23 23:40:46 -0500
commit5be20aa87df1bc2723f5f41e37c6c4aa4e0a6093 (patch)
treeb3ae63fec839851887fb7dc12faae8812ee8bfdc
parent1f383e88038c2038fd972e9da70190c602dd6840 (diff)
bcachefs: New superblock layout for zoned devices XXX todo
On zoned devices that don't have any random-write capable zones, we need a different strategy for writing the superblock. This patch implements a path that uses the first two zones for sequentially logging superblock writes. XXX: We still need to do something with the sb_layout struct and make sure it points to the right place, so that superblock buckets get marked correctly Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/super-io.c224
-rw-r--r--fs/bcachefs/super_types.h9
-rw-r--r--fs/bcachefs/zone.c2
-rw-r--r--fs/bcachefs/zone.h2
4 files changed, 202 insertions, 35 deletions
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 270863663595..aaed51279245 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "buckets.h"
#include "checksum.h"
#include "counters.h"
#include "disk_groups.h"
@@ -171,9 +172,8 @@ void bch2_free_super(struct bch_sb_handle *sb)
memset(sb, 0, sizeof(*sb));
}
-int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
+static int __bch2_sb_realloc(struct bch_sb_handle *sb, size_t new_bytes)
{
- size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
size_t new_buffer_size;
struct bch_sb *new_sb;
struct bio *bio;
@@ -226,6 +226,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
return 0;
}
+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
+{
+ return __bch2_sb_realloc(sb,
+ __vstruct_bytes(struct bch_sb, u64s));
+}
+
struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
enum bch_sb_field_type type,
unsigned u64s)
@@ -593,17 +599,23 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
/* read superblock: */
+static int submit_read_sync(struct bio *bio, struct block_device *bdev,
+ size_t count, u64 offset, void *buf)
+{
+ bio_reset(bio, bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, buf, count);
+
+ return submit_bio_wait(bio);
+}
+
static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
{
struct bch_csum csum;
size_t bytes;
int ret;
reread:
- bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
- sb->bio->bi_iter.bi_sector = offset;
- bch2_bio_map(sb->bio, sb->sb, sb->buffer_size);
-
- ret = submit_bio_wait(sb->bio);
+ ret = submit_read_sync(sb->bio, sb->bdev, sb->buffer_size, offset, sb->sb);
if (ret) {
prt_printf(err, "IO error: %i", ret);
return ret;
@@ -619,6 +631,12 @@ reread:
if (ret)
return ret;
+ if (offset != le64_to_cpu(sb->sb->offset)) {
+ prt_printf(err, "Invalid superblock: not at correct offset (read from %llu , sb offset %llu)",
+ offset, le64_to_cpu(sb->sb->offset));
+ return -EINVAL;
+ }
+
bytes = vstruct_bytes(sb->sb);
if (bytes > 512 << sb->sb->layout.sb_max_size_bits) {
@@ -648,8 +666,105 @@ reread:
return -BCH_ERR_invalid_sb_csum;
}
- sb->seq = le64_to_cpu(sb->sb->seq);
+ return 0;
+}
+
+static int zone_0_is_normal(struct block_device *bdev)
+{
+ struct blk_zone zone;
+ int ret;
+
+ if (!bdev_nr_zones(bdev))
+ return 1;
+
+ ret = bch2_zone_report(bdev, 0, &zone);
+ if (ret)
+ return ret;
+
+ return zone.type == BLK_ZONE_TYPE_CONVENTIONAL;
+}
+
+static int ringbuffer_read_super(struct bch_sb_handle *sb, struct printbuf *err)
+{
+ struct blk_zone zone;
+ u64 best_seq = 0;
+ unsigned bucket;
+ int ret;
+
+ ret = __bch2_sb_realloc(sb, 1U << 20);
+ if (ret)
+ return ret;
+
+ ret = bch2_zone_report(sb->bdev, 0, &zone);
+ if (ret) {
+ pr_err("error getting zone %u: %s", 0, bch2_err_str(ret));
+ return ret;
+ }
+
+ for (bucket = 0; bucket < 2; bucket++) {
+ u64 offset = zone.len * bucket;
+ u64 end_offset = zone.len * (bucket + 1);
+ u64 next_offset;
+ unsigned buf_offset, bytes;
+
+ while (offset < end_offset) {
+reread:
+ ret = submit_read_sync(sb->bio, sb->bdev, sb->buffer_size,
+ offset, sb->sb);
+ if (ret) {
+ prt_printf(err, "IO error: %s", bch2_err_str(ret));
+ return ret;
+ }
+
+ next_offset = min(offset + (sb->buffer_size >> 9), end_offset);
+ buf_offset = 0;
+
+ while (offset < next_offset) {
+ struct bch_sb *i = (void *) sb->sb + buf_offset;
+ struct bch_csum csum;
+
+ if (!uuid_equal(&i->magic, &BCHFS_MAGIC))
+ goto next_offset;
+
+ if (offset != le64_to_cpu(i->offset))
+ goto next_offset;
+
+ if (BCH_SB_CSUM_TYPE(i) >= BCH_CSUM_NR)
+ goto next_offset;
+
+ bytes = vstruct_bytes(i);
+ if (buf_offset + bytes > sb->buffer_size) {
+ if (bch2_sb_realloc(sb, le32_to_cpu(i->u64s)))
+ return -ENOMEM;
+ goto reread;
+ }
+
+ csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(i), null_nonce(), i);
+ if (bch2_crc_cmp(csum, i->csum))
+ goto next_offset;
+
+ if (le64_to_cpu(i->seq) > best_seq) {
+ best_seq = le64_to_cpu(i->seq);
+ sb->ringbuffer_last_offset = offset;
+ }
+next_offset:
+ offset++;
+ buf_offset += 512;
+ }
+ }
+ }
+
+ if (!best_seq)
+ return ret ?: -EINVAL;
+
+ ret = read_one_super(sb, sb->ringbuffer_last_offset, err);
+ if (ret)
+ return ret;
+
+ sb->ringbuffer_last_sectors =
+ roundup((size_t) vstruct_bytes(sb->sb),
+ bdev_logical_block_size(sb->bdev)) >> 9;
return 0;
}
@@ -715,13 +830,29 @@ retry:
goto err;
}
+ if (opt_defined(*opts, sb)) {
+ ret = read_one_super(sb, offset, &err);
+ if (!ret)
+ goto got_super;
+ goto err;
+ }
+
+ ret = zone_0_is_normal(sb->bdev);
+ if (ret < 0)
+ goto err;
+
+ sb->sb_ringbuffer = !ret;
+ if (sb->sb_ringbuffer) {
+ ret = ringbuffer_read_super(sb, &err);
+ if (ret)
+ goto err;
+ goto got_super;
+ }
+
ret = read_one_super(sb, offset, &err);
if (!ret)
goto got_super;
- if (opt_defined(*opts, sb))
- goto err;
-
prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n",
path, err.buf);
if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg)
@@ -735,16 +866,12 @@ retry:
/*
* Error reading primary superblock - read location of backup
* superblocks:
- */
- bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
- sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
- /*
+ *
* use sb buffer to read layout, since sb buffer is page aligned but
* layout won't be:
*/
- bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
-
- ret = submit_bio_wait(sb->bio);
+ ret = submit_read_sync(sb->bio, sb->bdev, sizeof(struct bch_sb_layout),
+ BCH_SB_LAYOUT_SECTOR, sb->sb);
if (ret) {
prt_printf(&err, "IO error: %i", ret);
goto err;
@@ -785,15 +912,15 @@ got_super:
goto err;
}
- ret = 0;
- sb->have_layout = true;
-
ret = bch2_sb_validate(sb, &err, READ);
if (ret) {
printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n",
path, err.buf);
goto err_no_print;
}
+
+ sb->have_layout = true;
+ sb->seq = le64_to_cpu(sb->sb->seq);
out:
printbuf_exit(&err);
return ret;
@@ -846,7 +973,9 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
struct bio *bio = ca->disk_sb.bio;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
- bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]);
+ bio->bi_iter.bi_sector = !ca->disk_sb.sb_ringbuffer
+ ? le64_to_cpu(sb->layout.sb_offset[0])
+ : ca->disk_sb.ringbuffer_last_offset;
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
@@ -858,12 +987,49 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
closure_bio_submit(bio, &c->sb_write);
}
-static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
+/* Returns true if we did work: */
+static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
{
struct bch_sb *sb = ca->disk_sb.sb;
struct bio *bio = ca->disk_sb.bio;
+ unsigned sectors = roundup((size_t) vstruct_bytes(sb),
+ bdev_logical_block_size(ca->disk_sb.bdev)) >> 9;
+
+ if (ca->sb_write_error)
+ return false;
+
+ if (!ca->disk_sb.sb_ringbuffer) {
+ if (idx >= ca->disk_sb.sb->layout.nr_superblocks)
+ return false;
- sb->offset = sb->layout.sb_offset[idx];
+ sb->offset = sb->layout.sb_offset[idx];
+ } else {
+ unsigned bucket, sectors;
+ u64 offset, end;
+
+ if (idx)
+ return false;
+
+ offset = ca->disk_sb.ringbuffer_last_offset +
+ ca->disk_sb.ringbuffer_last_sectors;
+ end = offset + sectors;
+
+ bucket = sector_to_bucket(ca, offset);
+
+ if (end > bucket_to_sector(ca, bucket + 1)) {
+ /* Switch to writing to the other bucket: */
+ bch2_bucket_finish(ca, bucket);
+
+ bucket ^= 1;
+ offset = bucket_to_sector(ca, bucket);
+
+ bch2_bucket_discard(ca, bucket);
+ }
+
+ ca->disk_sb.ringbuffer_last_offset = offset;
+ ca->disk_sb.ringbuffer_last_sectors = sectors;
+ sb->offset = cpu_to_le64(offset);
+ }
SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false));
sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
@@ -873,15 +1039,14 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
- bch2_bio_map(bio, sb,
- roundup((size_t) vstruct_bytes(sb),
- bdev_logical_block_size(ca->disk_sb.bdev)));
+ bch2_bio_map(bio, sb, sectors << 9);
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
bio_sectors(bio));
percpu_ref_get(&ca->io_ref);
closure_bio_submit(bio, &c->sb_write);
+ return true;
}
int bch2_write_super(struct bch_fs *c)
@@ -1000,11 +1165,8 @@ int bch2_write_super(struct bch_fs *c)
do {
wrote = false;
for_each_online_member(c, ca)
- if (!ca->sb_write_error &&
- sb < ca->disk_sb.sb->layout.nr_superblocks) {
- write_one_super(c, ca, sb);
+ if (write_one_super(c, ca, sb))
wrote = true;
- }
closure_sync(cl);
sb++;
} while (wrote);
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index 9c1fd4ca2b10..021519c97e2b 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -10,10 +10,13 @@ struct bch_sb_handle {
void *holder;
size_t buffer_size;
blk_mode_t mode;
- unsigned have_layout:1;
- unsigned have_bio:1;
- unsigned fs_sb:1;
+ bool have_layout:1;
+ bool have_bio:1;
+ bool fs_sb:1;
+ bool sb_ringbuffer:1;
u64 seq;
+ u64 ringbuffer_last_offset;
+ unsigned ringbuffer_last_sectors;
};
struct bch_devs_mask {
diff --git a/fs/bcachefs/zone.c b/fs/bcachefs/zone.c
index 6f48f58a6c50..41a026ad5120 100644
--- a/fs/bcachefs/zone.c
+++ b/fs/bcachefs/zone.c
@@ -15,7 +15,7 @@ static int zone_report_cb(struct blk_zone *src, unsigned idx, void *data)
return 0;
}
-static int bch2_zone_report(struct block_device *bdev, sector_t sector, struct blk_zone *zone)
+int bch2_zone_report(struct block_device *bdev, sector_t sector, struct blk_zone *zone)
{
int ret = blkdev_report_zones(bdev, sector, 1, zone_report_cb, zone);
diff --git a/fs/bcachefs/zone.h b/fs/bcachefs/zone.h
index 620efc4fcbdc..8f62610aa357 100644
--- a/fs/bcachefs/zone.h
+++ b/fs/bcachefs/zone.h
@@ -4,6 +4,8 @@
#include "eytzinger.h"
+int bch2_zone_report(struct block_device *, sector_t, struct blk_zone *);
+
static inline bool blk_zone_writeable(struct blk_zone zone)
{
return (zone.cond == BLK_ZONE_COND_EMPTY ||