diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-12-20 00:07:29 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-12-23 23:40:46 -0500 |
commit | 5be20aa87df1bc2723f5f41e37c6c4aa4e0a6093 (patch) | |
tree | b3ae63fec839851887fb7dc12faae8812ee8bfdc | |
parent | 1f383e88038c2038fd972e9da70190c602dd6840 (diff) |
bcachefs: New superblock layout for zoned devices XXX todo
On zoned devices that don't have any random-write capable zones, we need
a different strategy for writing the superblock.
This patch implements a path that uses the first two zones for
sequentially logging superblock writes.
XXX: We still need to do something with the sb_layout struct and make
sure it points to the right place, so that superblock buckets get marked
correctly
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/super-io.c | 224 | ||||
-rw-r--r-- | fs/bcachefs/super_types.h | 9 | ||||
-rw-r--r-- | fs/bcachefs/zone.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/zone.h | 2 |
4 files changed, 202 insertions, 35 deletions
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 270863663595..aaed51279245 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "buckets.h" #include "checksum.h" #include "counters.h" #include "disk_groups.h" @@ -171,9 +172,8 @@ void bch2_free_super(struct bch_sb_handle *sb) memset(sb, 0, sizeof(*sb)); } -int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) +static int __bch2_sb_realloc(struct bch_sb_handle *sb, size_t new_bytes) { - size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s); size_t new_buffer_size; struct bch_sb *new_sb; struct bio *bio; @@ -226,6 +226,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) return 0; } +int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) +{ + return __bch2_sb_realloc(sb, + __vstruct_bytes(struct bch_sb, u64s)); +} + struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, enum bch_sb_field_type type, unsigned u64s) @@ -593,17 +599,23 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) /* read superblock: */ +static int submit_read_sync(struct bio *bio, struct block_device *bdev, + size_t count, u64 offset, void *buf) +{ + bio_reset(bio, bdev, REQ_OP_READ|REQ_SYNC|REQ_META); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, buf, count); + + return submit_bio_wait(bio); +} + static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) { struct bch_csum csum; size_t bytes; int ret; reread: - bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); - sb->bio->bi_iter.bi_sector = offset; - bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); - - ret = submit_bio_wait(sb->bio); + ret = submit_read_sync(sb->bio, sb->bdev, sb->buffer_size, offset, sb->sb); if (ret) { prt_printf(err, "IO error: %i", ret); return ret; @@ -619,6 +631,12 @@ reread: if (ret) return ret; + if (offset != le64_to_cpu(sb->sb->offset)) { + prt_printf(err, "Invalid superblock: not at correct offset (read from %llu , sb offset %llu)", + offset, le64_to_cpu(sb->sb->offset)); + return -EINVAL; + } + bytes = vstruct_bytes(sb->sb); if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { @@ -648,8 +666,105 @@ reread: return -BCH_ERR_invalid_sb_csum; } - sb->seq = le64_to_cpu(sb->sb->seq); + return 0; +} + +static int zone_0_is_normal(struct block_device *bdev) +{ + struct blk_zone zone; + int ret; + + if (!bdev_nr_zones(bdev)) + return 1; + + ret = bch2_zone_report(bdev, 0, &zone); + if (ret) + return ret; + + return zone.type == BLK_ZONE_TYPE_CONVENTIONAL; +} + +static int ringbuffer_read_super(struct bch_sb_handle *sb, struct printbuf *err) +{ + struct blk_zone zone; + u64 best_seq = 0; + unsigned bucket; + int ret; + + ret = __bch2_sb_realloc(sb, 1U << 20); + if (ret) + return ret; + + ret = bch2_zone_report(sb->bdev, 0, &zone); + if (ret) { + pr_err("error getting zone %u: %s", 0, bch2_err_str(ret)); + return ret; + } + + for (bucket = 0; bucket < 2; bucket++) { + u64 offset = zone.len * bucket; + u64 end_offset = zone.len * (bucket + 1); + u64 next_offset; + unsigned buf_offset, bytes; + + while (offset < end_offset) { +reread: + ret = submit_read_sync(sb->bio, sb->bdev, sb->buffer_size, + offset, sb->sb); + if (ret) { + prt_printf(err, "IO error: %s", bch2_err_str(ret)); + return ret; + } + + next_offset = min(offset + (sb->buffer_size >> 9), end_offset); + buf_offset = 0; + + while (offset < next_offset) { + struct bch_sb *i = (void *) sb->sb + buf_offset; + struct bch_csum csum; + + if (!uuid_equal(&i->magic, &BCHFS_MAGIC)) + goto next_offset; + + if (offset != le64_to_cpu(i->offset)) + goto next_offset; + + if (BCH_SB_CSUM_TYPE(i) >= BCH_CSUM_NR) + goto next_offset; + + bytes = vstruct_bytes(i); + if (buf_offset + bytes > sb->buffer_size) { + if (bch2_sb_realloc(sb, le32_to_cpu(i->u64s))) + return -ENOMEM; + goto reread; + } + + csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(i), null_nonce(), i); + if (bch2_crc_cmp(csum, i->csum)) + goto next_offset; + + if (le64_to_cpu(i->seq) > best_seq) { + best_seq = le64_to_cpu(i->seq); + sb->ringbuffer_last_offset = offset; + } +next_offset: + offset++; + buf_offset += 512; + } + } + } + + if (!best_seq) + return ret ?: -EINVAL; + + ret = read_one_super(sb, sb->ringbuffer_last_offset, err); + if (ret) + return ret; + + sb->ringbuffer_last_sectors = + roundup((size_t) vstruct_bytes(sb->sb), + bdev_logical_block_size(sb->bdev)) >> 9; return 0; } @@ -715,13 +830,29 @@ retry: goto err; } + if (opt_defined(*opts, sb)) { + ret = read_one_super(sb, offset, &err); + if (!ret) + goto got_super; + goto err; + } + + ret = zone_0_is_normal(sb->bdev); + if (ret < 0) + goto err; + + sb->sb_ringbuffer = !ret; + if (sb->sb_ringbuffer) { + ret = ringbuffer_read_super(sb, &err); + if (ret) + goto err; + goto got_super; + } + ret = read_one_super(sb, offset, &err); if (!ret) goto got_super; - if (opt_defined(*opts, sb)) - goto err; - prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n", path, err.buf); if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg) @@ -735,16 +866,12 @@ retry: /* * Error reading primary superblock - read location of backup * superblocks: - */ - bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); - sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; - /* + * * use sb buffer to read layout, since sb buffer is page aligned but * layout won't be: */ - bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); - - ret = submit_bio_wait(sb->bio); + ret = submit_read_sync(sb->bio, sb->bdev, sizeof(struct bch_sb_layout), + BCH_SB_LAYOUT_SECTOR, sb->sb); if (ret) { prt_printf(&err, "IO error: %i", ret); goto err; @@ -785,15 +912,15 @@ got_super: goto err; } - ret = 0; - sb->have_layout = true; - ret = bch2_sb_validate(sb, &err, READ); if (ret) { printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n", path, err.buf); goto err_no_print; } + + sb->have_layout = true; + sb->seq = le64_to_cpu(sb->sb->seq); out: printbuf_exit(&err); return ret; @@ -846,7 +973,9 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) struct bio *bio = ca->disk_sb.bio; bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); - bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); + bio->bi_iter.bi_sector = !ca->disk_sb.sb_ringbuffer + ? le64_to_cpu(sb->layout.sb_offset[0]) + : ca->disk_sb.ringbuffer_last_offset; bio->bi_end_io = write_super_endio; bio->bi_private = ca; bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); @@ -858,12 +987,49 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) closure_bio_submit(bio, &c->sb_write); } -static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) +/* Returns true if we did work: */ +static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) { struct bch_sb *sb = ca->disk_sb.sb; struct bio *bio = ca->disk_sb.bio; + unsigned sectors = roundup((size_t) vstruct_bytes(sb), + bdev_logical_block_size(ca->disk_sb.bdev)) >> 9; + + if (ca->sb_write_error) + return false; + + if (!ca->disk_sb.sb_ringbuffer) { + if (idx >= ca->disk_sb.sb->layout.nr_superblocks) + return false; - sb->offset = sb->layout.sb_offset[idx]; + sb->offset = sb->layout.sb_offset[idx]; + } else { + unsigned bucket, sectors; + u64 offset, end; + + if (idx) + return false; + + offset = ca->disk_sb.ringbuffer_last_offset + + ca->disk_sb.ringbuffer_last_sectors; + end = offset + sectors; + + bucket = sector_to_bucket(ca, offset); + + if (end > bucket_to_sector(ca, bucket + 1)) { + /* Switch to writing to the other bucket: */ + bch2_bucket_finish(ca, bucket); + + bucket ^= 1; + offset = bucket_to_sector(ca, bucket); + + bch2_bucket_discard(ca, bucket); + } + + ca->disk_sb.ringbuffer_last_offset = offset; + ca->disk_sb.ringbuffer_last_sectors = sectors; + sb->offset = cpu_to_le64(offset); + } SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false)); sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), @@ -873,15 +1039,14 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); bio->bi_end_io = write_super_endio; bio->bi_private = ca; - bch2_bio_map(bio, sb, - roundup((size_t) vstruct_bytes(sb), - bdev_logical_block_size(ca->disk_sb.bdev))); + bch2_bio_map(bio, sb, sectors << 9); this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], bio_sectors(bio)); percpu_ref_get(&ca->io_ref); closure_bio_submit(bio, &c->sb_write); + return true; } int bch2_write_super(struct bch_fs *c) @@ -1000,11 +1165,8 @@ int bch2_write_super(struct bch_fs *c) do { wrote = false; for_each_online_member(c, ca) - if (!ca->sb_write_error && - sb < ca->disk_sb.sb->layout.nr_superblocks) { - write_one_super(c, ca, sb); + if (write_one_super(c, ca, sb)) wrote = true; - } closure_sync(cl); sb++; } while (wrote); diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index 9c1fd4ca2b10..021519c97e2b 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -10,10 +10,13 @@ struct bch_sb_handle { void *holder; size_t buffer_size; blk_mode_t mode; - unsigned have_layout:1; - unsigned have_bio:1; - unsigned fs_sb:1; + bool have_layout:1; + bool have_bio:1; + bool fs_sb:1; + bool sb_ringbuffer:1; u64 seq; + u64 ringbuffer_last_offset; + unsigned ringbuffer_last_sectors; }; struct bch_devs_mask { diff --git a/fs/bcachefs/zone.c b/fs/bcachefs/zone.c index 6f48f58a6c50..41a026ad5120 100644 --- a/fs/bcachefs/zone.c +++ b/fs/bcachefs/zone.c @@ -15,7 +15,7 @@ static int zone_report_cb(struct blk_zone *src, unsigned idx, void *data) return 0; } -static int bch2_zone_report(struct block_device *bdev, sector_t sector, struct blk_zone *zone) +int bch2_zone_report(struct block_device *bdev, sector_t sector, struct blk_zone *zone) { int ret = blkdev_report_zones(bdev, sector, 1, zone_report_cb, zone); diff --git a/fs/bcachefs/zone.h b/fs/bcachefs/zone.h index 620efc4fcbdc..8f62610aa357 100644 --- a/fs/bcachefs/zone.h +++ b/fs/bcachefs/zone.h @@ -4,6 +4,8 @@ #include "eytzinger.h" +int bch2_zone_report(struct block_device *, sector_t, struct blk_zone *); + static inline bool blk_zone_writeable(struct blk_zone zone) { return (zone.cond == BLK_ZONE_COND_EMPTY || |