diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2024-11-29 21:08:00 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2024-11-29 21:27:09 -0500 |
commit | de51418b60d7bf7d783d0ed112de00a63928c337 (patch) | |
tree | 077e848a35906d272a78676389312af7589de97a /libbcachefs/extents.c | |
parent | 6829fb201072c495ce9e97850664540a0f8294f1 (diff) |
Update bcachefs sources to bc01863fb6ef bcachefs: bcachefs_metadata_version_disk_accounting_big_endian
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'libbcachefs/extents.c')
-rw-r--r-- | libbcachefs/extents.c | 367 |
1 files changed, 160 insertions, 207 deletions
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index b7b5ea9c..2fc9ace5 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -21,6 +21,7 @@ #include "extents.h" #include "inode.h" #include "journal.h" +#include "rebalance.h" #include "replicas.h" #include "super.h" #include "super-io.h" @@ -88,6 +89,14 @@ static inline bool ptr_better(struct bch_fs *c, u64 l1 = dev_latency(c, p1.ptr.dev); u64 l2 = dev_latency(c, p2.ptr.dev); + /* + * Square the latencies, to bias more in favor of the faster + * device - we never want to stop issuing reads to the slower + * device altogether, so that we can update our latency numbers: + */ + l1 *= l1; + l2 *= l2; + /* Pick at random, biased in favor of the faster device: */ return bch2_rand_range(l1 + l2) > l1; @@ -169,7 +178,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { int ret = 0; @@ -177,7 +186,7 @@ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_validate(c, k, flags); + ret = bch2_bkey_ptrs_validate(c, k, from); fsck_err: return ret; } @@ -189,7 +198,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, } int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; @@ -203,13 +212,13 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, c, btree_ptr_v2_min_key_bad, "min_key > key"); - if ((flags & BCH_VALIDATE_write) && + if ((from.flags & BCH_VALIDATE_write) && c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written) bkey_fsck_err_on(!bp.v->sectors_written, c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_validate(c, k, flags); + ret = bch2_bkey_ptrs_validate(c, k, from); fsck_err: return ret; } @@ -396,7 +405,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; @@ -979,31 +988,54 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke return NULL; } -void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr) +static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, + struct bch_extent_ptr *ptr) +{ + if (!opts->promote_target || + !bch2_dev_in_target(c, ptr->dev, opts->promote_target)) + return false; + + struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); + + return ca && bch2_dev_is_readable(ca) && !dev_ptr_stale_rcu(ca, ptr); +} + +void bch2_extent_ptr_set_cached(struct bch_fs *c, + struct bch_io_opts *opts, + struct bkey_s k, + struct bch_extent_ptr *ptr) { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *entry; - union bch_extent_entry *ec = NULL; + struct extent_ptr_decoded p; - bkey_extent_entry_for_each(ptrs, entry) { + rcu_read_lock(); + if (!want_cached_ptr(c, opts, ptr)) { + bch2_bkey_drop_ptr_noerror(k, ptr); + goto out; + } + + /* + * Stripes can't contain cached data, for - reasons. + * + * Possibly something we can fix in the future? + */ + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) if (&entry->ptr == ptr) { - ptr->cached = true; - if (ec) - extent_entry_drop(k, ec); - return; + if (p.has_ec) + bch2_bkey_drop_ptr_noerror(k, ptr); + else + ptr->cached = true; + goto out; } - if (extent_entry_is_stripe_ptr(entry)) - ec = entry; - else if (extent_entry_is_ptr(entry)) - ec = NULL; - } - BUG(); +out: + rcu_read_unlock(); } /* - * bch_extent_normalize - clean up an extent, dropping stale pointers etc. + * bch2_extent_normalize - clean up an extent, dropping stale pointers etc. * * Returns true if @k should be dropped entirely * @@ -1017,8 +1049,39 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) rcu_read_lock(); bch2_bkey_drop_ptrs(k, ptr, ptr->cached && - (ca = bch2_dev_rcu(c, ptr->dev)) && - dev_ptr_stale_rcu(ca, ptr) > 0); + (!(ca = bch2_dev_rcu(c, ptr->dev)) || + dev_ptr_stale_rcu(ca, ptr) > 0)); + rcu_read_unlock(); + + return bkey_deleted(k.k); +} + +/* + * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc. + * + * Like bch2_extent_normalize(), but also only keeps a single cached pointer on + * the promote target. + */ +bool bch2_extent_normalize_by_opts(struct bch_fs *c, + struct bch_io_opts *opts, + struct bkey_s k) +{ + struct bkey_ptrs ptrs; + bool have_cached_ptr; + + rcu_read_lock(); +restart_drop_ptrs: + ptrs = bch2_bkey_ptrs(k); + have_cached_ptr = false; + + bkey_for_each_ptr(ptrs, ptr) + if (ptr->cached) { + if (have_cached_ptr || !want_cached_ptr(c, opts, ptr)) { + bch2_bkey_drop_ptr(k, ptr); + goto restart_drop_ptrs; + } + have_cached_ptr = true; + } rcu_read_unlock(); return bkey_deleted(k.k); @@ -1067,6 +1130,57 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr bch2_prt_compression_type(out, crc->compression_type); } +static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, + const struct bch_extent_rebalance *r) +{ + prt_str(out, "rebalance:"); + + prt_printf(out, " replicas=%u", r->data_replicas); + if (r->data_replicas_from_inode) + prt_str(out, " (inode)"); + + prt_str(out, " checksum="); + bch2_prt_csum_opt(out, r->data_checksum); + if (r->data_checksum_from_inode) + prt_str(out, " (inode)"); + + if (r->background_compression || r->background_compression_from_inode) { + prt_str(out, " background_compression="); + bch2_compression_opt_to_text(out, r->background_compression); + + if (r->background_compression_from_inode) + prt_str(out, " (inode)"); + } + + if (r->background_target || r->background_target_from_inode) { + prt_str(out, " background_target="); + if (c) + bch2_target_to_text(out, c, r->background_target); + else + prt_printf(out, "%u", r->background_target); + + if (r->background_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->promote_target || r->promote_target_from_inode) { + prt_str(out, " promote_target="); + if (c) + bch2_target_to_text(out, c, r->promote_target); + else + prt_printf(out, "%u", r->promote_target); + + if (r->promote_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->erasure_code || r->erasure_code_from_inode) { + prt_printf(out, " ec=%u", r->erasure_code); + if (r->erasure_code_from_inode) + prt_str(out, " (inode)"); + } +} + void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { @@ -1102,18 +1216,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, (u64) ec->idx, ec->block); break; } - case BCH_EXTENT_ENTRY_rebalance: { - const struct bch_extent_rebalance *r = &entry->rebalance; - - prt_str(out, "rebalance: target "); - if (c) - bch2_target_to_text(out, c, r->target); - else - prt_printf(out, "%u", r->target); - prt_str(out, " compression "); - bch2_compression_opt_to_text(out, r->compression); + case BCH_EXTENT_ENTRY_rebalance: + bch2_extent_rebalance_to_text(out, c, &entry->rebalance); break; - } + default: prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; @@ -1125,7 +1231,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, static int extent_ptr_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, + struct bkey_validate_context from, const struct bch_extent_ptr *ptr, unsigned size_ondisk, bool metadata) @@ -1168,7 +1274,7 @@ fsck_err: } int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1195,7 +1301,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); + ret = extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false); if (ret) return ret; @@ -1217,9 +1323,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, - c, ptr_crc_uncompressed_size_too_small, - "checksum offset + key size > uncompressed size"); bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, ptr_crc_csum_type_unknown, "invalid checksum type"); @@ -1227,6 +1330,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, c, ptr_crc_compression_type_unknown, "invalid compression type"); + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, + "checksum offset + key size > uncompressed size"); + bkey_fsck_err_on(crc_is_encoded(crc) && + (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && + (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, + "too large encoded extent"); + bkey_fsck_err_on(!crc_is_compressed(crc) && + crc.compressed_size != crc.uncompressed_size, + c, ptr_crc_uncompressed_size_mismatch, + "not compressed but compressed != uncompressed size"); + if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; @@ -1240,12 +1356,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, "redundant crc entry"); crc_since_last_ptr = true; - bkey_fsck_err_on(crc_is_encoded(crc) && - (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), - c, ptr_crc_uncompressed_size_too_big, - "too large encoded extent"); - size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: @@ -1311,7 +1421,7 @@ void bch2_ptr_swab(struct bkey_s k) for (entry = ptrs.start; entry < ptrs.end; entry = extent_entry_next(entry)) { - switch (extent_entry_type(entry)) { + switch (__extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: break; case BCH_EXTENT_ENTRY_crc32: @@ -1331,170 +1441,13 @@ void bch2_ptr_swab(struct bkey_s k) break; case BCH_EXTENT_ENTRY_rebalance: break; + default: + /* Bad entry type: will be caught by validate() */ + return; } } } -const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - - bkey_extent_entry_for_each(ptrs, entry) - if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) - return &entry->rebalance; - - return NULL; -} - -unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - unsigned target, unsigned compression) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - unsigned rewrite_ptrs = 0; - - if (compression) { - unsigned compression_type = bch2_compression_opt_to_type(compression); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - unsigned i = 0; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || - p.ptr.unwritten) { - rewrite_ptrs = 0; - goto incompressible; - } - - if (!p.ptr.cached && p.crc.compression_type != compression_type) - rewrite_ptrs |= 1U << i; - i++; - } - } -incompressible: - if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { - unsigned i = 0; - - bkey_for_each_ptr(ptrs, ptr) { - if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) - rewrite_ptrs |= 1U << i; - i++; - } - } - - return rewrite_ptrs; -} - -bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) -{ - const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - - /* - * If it's an indirect extent, we don't delete the rebalance entry when - * done so that we know what options were applied - check if it still - * needs work done: - */ - if (r && - k.k->type == KEY_TYPE_reflink_v && - !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) - r = NULL; - - return r != NULL; -} - -static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - unsigned target, unsigned compression) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - u64 sectors = 0; - - if (compression) { - unsigned compression_type = bch2_compression_opt_to_type(compression); - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || - p.ptr.unwritten) { - sectors = 0; - goto incompressible; - } - - if (!p.ptr.cached && p.crc.compression_type != compression_type) - sectors += p.crc.compressed_size; - } - } -incompressible: - if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) - sectors += p.crc.compressed_size; - } - - return sectors; -} - -u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) -{ - const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - - return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; -} - -int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, - struct bch_io_opts *opts) -{ - struct bkey_s k = bkey_i_to_s(_k); - struct bch_extent_rebalance *r; - unsigned target = opts->background_target; - unsigned compression = background_compression(*opts); - bool needs_rebalance; - - if (!bkey_extent_is_direct_data(k.k)) - return 0; - - /* get existing rebalance entry: */ - r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); - if (r) { - if (k.k->type == KEY_TYPE_reflink_v) { - /* - * indirect extents: existing options take precedence, - * so that we don't move extents back and forth if - * they're referenced by different inodes with different - * options: - */ - if (r->target) - target = r->target; - if (r->compression) - compression = r->compression; - } - - r->target = target; - r->compression = compression; - } - - needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); - - if (needs_rebalance && !r) { - union bch_extent_entry *new = bkey_val_end(k); - - new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; - new->rebalance.compression = compression; - new->rebalance.target = target; - new->rebalance.unused = 0; - k.k->u64s += extent_entry_u64s(new); - } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { - /* - * For indirect extents, don't delete the rebalance entry when - * we're finished so that we know we specifically moved it or - * compressed it to its current location/compression type - */ - extent_entry_drop(k, (union bch_extent_entry *) r); - } - - return 0; -} - /* Generic extent code: */ int bch2_cut_front_s(struct bpos where, struct bkey_s k) @@ -1554,7 +1507,7 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) case KEY_TYPE_reflink_p: { struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); - le64_add_cpu(&p.v->idx, sub); + SET_REFLINK_P_IDX(p.v, REFLINK_P_IDX(p.v) + sub); break; } case KEY_TYPE_inline_data: |