diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-12-20 00:48:07 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-12-23 23:40:46 -0500 |
commit | d78d34661415d81357c229a8628ed1fc8a2c5ba4 (patch) | |
tree | da5cd2b746a4e35b871fad69a72164629265db3c | |
parent | 1b4fba9837e14617ca187963135ebdea742fcce4 (diff) |
delalloc btree nodes, wip
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 114 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 48 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 5 |
7 files changed, 134 insertions, 47 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 2105198daf3b..9443e8de8806 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -675,6 +675,7 @@ struct bch_btree_ptr_v2 { } __packed __aligned(8); LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); +LE16_BITMASK(BTREE_PTR_NONCE, struct bch_btree_ptr_v2, flags, 1, 16); struct bch_extent { struct bch_val v; diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 4e1af5882052..4629f90a1c56 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -51,7 +51,7 @@ static inline u64 btree_ptr_hash_val(const struct bkey_i *k) */ return *((u64 *) &bkey_i_to_btree_ptr_v2_c(k)->v.seq); default: - return 0; + BUG(); } } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 33115744d8f3..3363e2394466 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "alloc_foreground.h" #include "bkey_methods.h" #include "bkey_sort.h" #include "btree_cache.h" @@ -524,7 +525,9 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_printf(out, "at btree "); bch2_btree_pos_to_text(out, c, b); - prt_printf(out, "\n node offset %u", b->written); + prt_printf(out, "\n node offset %u/%zu", b->written, + btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)) ?: + btree_sectors(c)); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); prt_str(out, ": "); @@ -939,10 +942,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned u64s; - unsigned ptr_written = btree_ptr_sectors_written(&b->key); + unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; + BUG_ON(!ptr_written); + b->version_ondisk = U16_MAX; /* We might get called multiple times on read retry: */ b->written = 0; @@ -1017,7 +1022,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bset_bad_csum, "invalid checksum"); - ret = bset_encrypt(c, i, b->written << 9); + unsigned nonce_offset = BTREE_PTR_NONCE(&bkey_i_to_btree_ptr_v2(&b->key)->v) * btree_bytes(c); + ret = bset_encrypt(c, i, nonce_offset); if (bch2_fs_fatal_err_on(ret, c, "error decrypting btree node: %i", ret)) goto fsck_err; @@ -1805,17 +1811,20 @@ static void btree_node_write_work(struct work_struct *work) } } else { + BUG_ON(wbio->key.k.type != KEY_TYPE_btree_ptr_v2); + ret = bch2_trans_do(c, NULL, NULL, 0, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_reclaim| BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_check_rw, - !wbio->wbio.failed.nr)); + !wbio->wbio.failed.nr && !c->opts.delalloc_btree_nodes)); if (ret) goto err; } out: + bch2_open_buckets_put(c, &wbio->obs); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b); return; @@ -1867,21 +1876,22 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; - bool saw_error; int ret; - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) - return ret; + if (!c->opts.delalloc_btree_nodes) { + struct printbuf buf = PRINTBUF; + ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree, WRITE, &buf); + if (ret) + bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); + printbuf_exit(&buf); + if (ret) + return ret; + } + bool saw_error; + unsigned offset = !c->opts.delalloc_btree_nodes ? b->written : 0; ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: - validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); + validate_bset(c, NULL, b, i, offset, sectors, WRITE, false, &saw_error); if (ret) { bch2_inconsistent_error(c); dump_stack(); @@ -1896,6 +1906,38 @@ static void btree_write_submit(struct work_struct *work) struct bch_fs *c = wbio->wbio.c; struct btree *b = wbio->b; + if (c->opts.delalloc_btree_nodes) { + struct closure cl; + int ret; + + closure_init_stack(&cl); + + BUG_ON(wbio->key.k.type != KEY_TYPE_btree_ptr_v2); + set_bkey_val_bytes(&wbio->key.k, sizeof(struct bch_btree_ptr_v2)); + + do { + ret = bch2_trans_do(c, NULL, NULL, 0, + bch2_alloc_sectors_trans(trans, + c->opts.metadata_target ?: + c->opts.foreground_target, + false, + writepoint_ptr(&c->btree_write_point), + c->opts.metadata_replicas, + c->opts.metadata_replicas_required, + BCH_WATERMARK_reclaim, 0, + bio_sectors(&wbio->wbio.bio), + &cl, &wbio->obs, &wbio->key)); + closure_sync(&cl); + } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); + + if (unlikely(ret)) { + bch2_fs_fatal_error(c, "error allocating btree node: %s", + bch2_err_str(ret)); + /* how do we unwind this? */ + BUG(); + } + } + struct btree_update *as = (void *) (b->will_make_reachable & ~1UL); if (as) { spin_lock(&as->journal_entries_lock); @@ -1999,12 +2041,19 @@ do_write: BUG_ON(bset_written(b, btree_bset_last(b))); BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); + BUG_ON(BTREE_PTR_NONCE(&bkey_i_to_btree_ptr_v2(&b->key)->v) == BTREE_PTR_NONCE_MAX); + + unsigned nonce_counter = BTREE_PTR_NONCE(&bkey_i_to_btree_ptr_v2(&b->key)->v); + BUG_ON(nonce_counter == BTREE_PTR_NONCE_MAX); + nonce_counter++; bch2_sort_whiteouts(c, b); sort_iter_stack_init(&sort_iter, b); - bytes = !b->written + bool btree_node_header = !b->written || c->opts.delalloc_btree_nodes; + + bytes = btree_node_header ? sizeof(struct btree_node) : sizeof(struct btree_node_entry); @@ -2013,7 +2062,7 @@ do_write: for_each_bset(b, t) { i = bset(b, t); - if (bset_written(b, i)) + if (bset_written(b, i) && !c->opts.delalloc_btree_nodes) continue; bytes += le16_to_cpu(i->u64s) * sizeof(u64); @@ -2033,7 +2082,7 @@ do_write: data = btree_bounce_alloc(c, bytes, &used_mempool); - if (!b->written) { + if (btree_node_header) { bn = data; *bn = *b->data; i = &bn->keys; @@ -2069,17 +2118,21 @@ do_write: if (!b->written && b->key.k.type == KEY_TYPE_btree_ptr_v2) - BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write); + BUG_ON(btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)) != sectors_to_write); memset(data + bytes_to_write, 0, (sectors_to_write << 9) - bytes_to_write); - BUG_ON(b->written + sectors_to_write > btree_sectors(c)); + unsigned new_written = sectors_to_write + + (c->opts.delalloc_btree_nodes ? 0 : b->written); + + BUG_ON(new_written > btree_sectors(c)); BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); i->version = cpu_to_le16(c->sb.version); - SET_BSET_OFFSET(i, b->written); + if (!c->opts.delalloc_btree_nodes) + SET_BSET_OFFSET(i, b->written); SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) @@ -2094,7 +2147,11 @@ do_write: validate_bset_for_write(c, b, i, sectors_to_write)) goto err; - ret = bset_encrypt(c, i, b->written << 9); + unsigned nonce_offset = !c->opts.delalloc_btree_nodes + ? b->written << 9 + : btree_bytes(c) * nonce_counter; + + ret = bset_encrypt(c, i, nonce_offset); if (bch2_fs_fatal_err_on(ret, c, "error encrypting btree node: %i\n", ret)) goto err; @@ -2143,9 +2200,10 @@ do_write: struct btree_write_bio, wbio.bio); wbio_init(&wbio->wbio.bio); wbio->b = b; + wbio->obs.nr = 0; wbio->data = data; wbio->data_bytes = bytes; - wbio->sector_offset = b->written; + wbio->sector_offset = !c->opts.delalloc_btree_nodes ? b->written : 0; wbio->wbio.c = c; wbio->wbio.used_mempool = used_mempool; wbio->wbio.first_btree_write = !b->written; @@ -2155,7 +2213,13 @@ do_write: bkey_copy(&wbio->key, &b->key); - b->written += sectors_to_write; + if (!c->opts.delalloc_btree_nodes && bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) { + SET_BTREE_PTR_NONCE(&bkey_i_to_btree_ptr_v2(&wbio->key)->v, nonce_counter); + if (nonce_counter == BTREE_PTR_NONCE_MAX) + set_btree_node_need_rewrite(b); + } + + b->written = new_written; if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2) bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written = @@ -2169,7 +2233,7 @@ do_write: return; err: set_btree_node_noevict(b); - b->written += sectors_to_write; + b->written = new_written; nowrite: btree_bounce_free(c, bytes, used_mempool, data); __btree_node_write_done(c, b); diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 2fd6926a71c7..0aec54b18ce9 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -27,10 +27,10 @@ static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b atomic_dec(&c->btree_cache.dirty); } -static inline unsigned btree_ptr_sectors_written(struct bkey_i *k) +static inline unsigned btree_ptr_sectors_written(struct bkey_s_c k) { - return k->k.type == KEY_TYPE_btree_ptr_v2 - ? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written) + return k.k->type == KEY_TYPE_btree_ptr_v2 + ? le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors_written) : 0; } @@ -53,6 +53,7 @@ struct btree_write_bio { void *data; unsigned data_bytes; unsigned sector_offset; + struct open_buckets obs; struct bch_write_bio wbio; }; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index de796dfb3df8..d375747d19c2 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -258,6 +258,11 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, : 0; int ret; + bkey_btree_ptr_v2_init(&tmp.k); + + if (c->opts.delalloc_btree_nodes) + goto mem_alloc; + mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { struct btree_alloc *a = @@ -270,7 +275,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, } mutex_unlock(&c->btree_reserve_cache_lock); - bkey_btree_ptr_v2_init(&tmp.k); ret = bch2_alloc_sectors_trans(trans, c->opts.metadata_target ?: c->opts.foreground_target, @@ -1236,11 +1240,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_trans *trans, unsigned long old, new, v; BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 && - !btree_ptr_sectors_written(insert)); + !btree_ptr_sectors_written(bkey_i_to_s_c(insert))); if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - +#if 0 if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b), WRITE, &buf) ?: bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { @@ -1255,7 +1259,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_trans *trans, bch2_fs_inconsistent(c, "%s", buf.buf); dump_stack(); } - +#endif while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && bkey_iter_pos_cmp(b, k, &insert->k.p) < 0) bch2_btree_node_iter_advance(node_iter, b); @@ -2116,20 +2120,25 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bool skip_triggers) { struct bch_fs *c = trans->c; - struct btree_iter iter2 = { NULL }; + struct btree_iter iter2 = {}; struct btree *parent; + struct disk_reservation disk_res = {}; int ret; if (!skip_triggers) { - ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s_c(&b->key), 0); - if (ret) - return ret; + int ptrs_delta = bch2_bkey_nr_ptrs(bkey_i_to_s_c(new_key)) - + bch2_bkey_nr_ptrs(bkey_i_to_s_c(&b->key)); - ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1, - new_key, 0); + bch2_disk_reservation_add(c, &disk_res, + max(ptrs_delta, 0) * btree_sectors(c), + BCH_DISK_RESERVATION_NOFAIL); + + ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s_c(&b->key), 0) ?: + bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1, + new_key, 0); if (ret) - return ret; + goto err; } if (new_hash) { @@ -2174,7 +2183,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, new_key, new_key->k.u64s); } - ret = bch2_trans_commit(trans, NULL, NULL, commit_flags); + ret = bch2_trans_commit(trans, &disk_res, NULL, commit_flags); if (ret) goto err; @@ -2196,6 +2205,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b); out: bch2_trans_iter_exit(trans, &iter2); + bch2_disk_reservation_put(c, &disk_res); return ret; err: if (new_hash) { @@ -2216,6 +2226,8 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite struct closure cl; int ret = 0; + BUG_ON(new_key->k.type != KEY_TYPE_btree_ptr_v2); + ret = bch2_btree_path_upgrade(trans, path, b->c.level + 1); if (ret) return ret; @@ -2262,6 +2274,8 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, struct btree_iter iter; int ret; + BUG_ON(new_key->k.type != KEY_TYPE_btree_ptr_v2); + bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p, BTREE_MAX_DEPTH, b->c.level, BTREE_ITER_INTENT); @@ -2278,9 +2292,11 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, BUG_ON(!btree_node_hashed(b)); - struct bch_extent_ptr *ptr; - bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr, - !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev)); + if (!trans->c->opts.delalloc_btree_nodes) { + struct bch_extent_ptr *ptr; + bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr, + !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev)); + } ret = bch2_btree_node_update_key(trans, &iter, b, new_key, commit_flags, skip_triggers); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 82ec056f4cdb..648f1daffb3b 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1117,7 +1117,7 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, int ret = 0; if (bkey_is_btree_ptr(k.k)) - size_ondisk = btree_sectors(c); + size_ondisk = btree_ptr_sectors_written(k) ?: btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, @@ -1206,7 +1206,7 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, } } - bkey_fsck_err_on(!nr_ptrs, c, err, + bkey_fsck_err_on(k.k->type != KEY_TYPE_btree_ptr_v2 && !nr_ptrs, c, err, extent_ptrs_no_ptrs, "no ptrs"); bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 42cad83efb48..67d39eab413f 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -419,6 +419,11 @@ enum fsck_err_opts { OPT_UINT(0, S64_MAX), \ BCH2_NO_SB_OPT, false, \ NULL, "Pointer to a struct log_output") \ + x(delalloc_btree_nodes, u8, \ + 0, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, true, \ + NULL, NULL) \ x(project, u8, \ OPT_INODE, \ OPT_BOOL(), \ |