diff options
Diffstat (limited to 'libbcachefs/btree_io.c')
-rw-r--r-- | libbcachefs/btree_io.c | 117 |
1 files changed, 47 insertions, 70 deletions
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index bb3aecce..2f509721 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -57,25 +57,25 @@ static void set_needs_whiteout(struct bset *i, int v) k->needs_whiteout = v; } -static void btree_bounce_free(struct bch_fs *c, unsigned order, +static void btree_bounce_free(struct bch_fs *c, size_t size, bool used_mempool, void *p) { if (used_mempool) mempool_free(p, &c->btree_bounce_pool); else - vpfree(p, PAGE_SIZE << order); + vpfree(p, size); } -static void *btree_bounce_alloc(struct bch_fs *c, unsigned order, +static void *btree_bounce_alloc(struct bch_fs *c, size_t size, bool *used_mempool) { unsigned flags = memalloc_nofs_save(); void *p; - BUG_ON(order > btree_page_order(c)); + BUG_ON(size > btree_bytes(c)); *used_mempool = false; - p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order); + p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); if (!p) { *used_mempool = true; p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); @@ -125,16 +125,14 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) { struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k; bool used_mempool = false; - unsigned order; + size_t bytes = b->whiteout_u64s * sizeof(u64); if (!b->whiteout_u64s) return; - order = get_order(b->whiteout_u64s * sizeof(u64)); + new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); - new_whiteouts = btree_bounce_alloc(c, order, &used_mempool); - - ptrs = ptrs_end = ((void *) new_whiteouts + (PAGE_SIZE << order)); + ptrs = ptrs_end = ((void *) new_whiteouts + bytes); for (k = unwritten_whiteouts_start(c, b); k != unwritten_whiteouts_end(c, b); @@ -158,7 +156,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) memcpy_u64s(unwritten_whiteouts_start(c, b), new_whiteouts, b->whiteout_u64s); - btree_bounce_free(c, order, used_mempool, new_whiteouts); + btree_bounce_free(c, bytes, used_mempool, new_whiteouts); } static bool should_compact_bset(struct btree *b, struct bset_tree *t, @@ -187,7 +185,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, struct bkey_packed *whiteouts = NULL; struct bkey_packed *u_start, *u_pos; struct sort_iter sort_iter; - unsigned order, whiteout_u64s = 0, u64s; + unsigned bytes, whiteout_u64s = 0, u64s; bool used_mempool, compacting = false; BUG_ON(!btree_node_is_extents(b)); @@ -204,9 +202,9 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, sort_iter_init(&sort_iter, b); whiteout_u64s += b->whiteout_u64s; - order = get_order(whiteout_u64s * sizeof(u64)); + bytes = whiteout_u64s * sizeof(u64); - whiteouts = btree_bounce_alloc(c, order, &used_mempool); + whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); u_start = u_pos = whiteouts; memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b), @@ -306,7 +304,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, unwritten_whiteouts_end(c, b), true); - btree_bounce_free(c, order, used_mempool, whiteouts); + btree_bounce_free(c, bytes, used_mempool, whiteouts); bch2_btree_build_aux_trees(b); @@ -401,7 +399,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, struct bset *start_bset = bset(b, &b->set[start_idx]); bool used_mempool = false; u64 start_time, seq = 0; - unsigned i, u64s = 0, order, shift = end_idx - start_idx - 1; + unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1; bool sorting_entire_node = start_idx == 0 && end_idx == b->nsets; @@ -416,11 +414,11 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, btree_bkey_last(b, t)); } - order = sorting_entire_node - ? btree_page_order(c) - : get_order(__vstruct_bytes(struct btree_node, u64s)); + bytes = sorting_entire_node + ? btree_bytes(c) + : __vstruct_bytes(struct btree_node, u64s); - out = btree_bounce_alloc(c, order, &used_mempool); + out = btree_bounce_alloc(c, bytes, &used_mempool); start_time = local_clock(); @@ -435,7 +433,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, out->keys.u64s = cpu_to_le16(u64s); - BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order)); + BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes); if (sorting_entire_node) bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], @@ -449,7 +447,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, if (sorting_entire_node) { unsigned u64s = le16_to_cpu(out->keys.u64s); - BUG_ON(order != btree_page_order(c)); + BUG_ON(bytes != btree_bytes(c)); /* * Our temporary buffer is the same size as the btree node's @@ -484,7 +482,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, set_btree_bset_end(b, &b->set[start_idx]); bch2_bset_set_no_aux_tree(b, &b->set[start_idx]); - btree_bounce_free(c, order, used_mempool, out); + btree_bounce_free(c, bytes, used_mempool, out); bch2_verify_btree_nr_keys(b); } @@ -599,34 +597,6 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b, bch2_btree_iter_reinit_node(iter, b); } -static struct nonce btree_nonce(struct bset *i, unsigned offset) -{ - return (struct nonce) {{ - [0] = cpu_to_le32(offset), - [1] = ((__le32 *) &i->seq)[0], - [2] = ((__le32 *) &i->seq)[1], - [3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE, - }}; -} - -static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset) -{ - struct nonce nonce = btree_nonce(i, offset); - - if (!offset) { - struct btree_node *bn = container_of(i, struct btree_node, keys); - unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; - - bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags, - bytes); - - nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE)); - } - - bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, - vstruct_end(i) - (void *) i->_data); -} - static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct btree *b, struct bset *i, unsigned offset, int write) @@ -917,6 +887,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry struct sort_iter *iter; struct btree_node *sorted; struct bkey_packed *k; + struct bch_extent_ptr *ptr; struct bset *i; bool used_mempool, blacklisted; unsigned u64s; @@ -971,8 +942,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry bset_encrypt(c, i, b->written << 9); if (btree_node_is_extents(b) && - !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) + !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) { set_btree_node_old_extent_overwrite(b); + set_btree_node_need_rewrite(b); + } sectors = vstruct_sectors(b->data, c->block_bits); } else { @@ -1040,7 +1013,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry BTREE_ERR_WANT_RETRY, c, b, NULL, "found bset signature after last bset"); - sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool); + sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); sorted->keys.u64s = 0; set_btree_bset(b, b->set, &b->data->keys); @@ -1058,7 +1031,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry BUG_ON(b->nr.live_u64s != u64s); - btree_bounce_free(c, btree_page_order(c), used_mempool, sorted); + btree_bounce_free(c, btree_bytes(c), used_mempool, sorted); i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { @@ -1098,6 +1071,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry set_needs_whiteout(btree_bset_first(b), true); btree_node_reset_sib_u64s(b); + + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + + if (ca->mi.state != BCH_MEMBER_STATE_RW) + set_btree_node_need_rewrite(b); + } out: mempool_free(iter, &c->fill_iter); return retry_read; @@ -1139,7 +1119,8 @@ static void btree_node_read_work(struct work_struct *work) bio->bi_status = BLK_STS_REMOVED; } start: - bch2_dev_io_err_on(bio->bi_status, ca, "btree read"); + bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s", + bch2_blk_status_to_str(bio->bi_status)); if (rb->have_ioref) percpu_ref_put(&ca->io_ref); rb->have_ioref = false; @@ -1220,7 +1201,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, set_btree_node_read_in_flight(b); if (rb->have_ioref) { - this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_BTREE], + this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], bio_sectors(bio)); bio_set_dev(bio, ca->disk_sb.bdev); @@ -1392,7 +1373,7 @@ static void btree_node_write_work(struct work_struct *work) struct btree *b = wbio->wbio.bio.bi_private; btree_bounce_free(c, - wbio->wbio.order, + wbio->bytes, wbio->wbio.used_mempool, wbio->data); @@ -1423,8 +1404,8 @@ static void btree_node_write_endio(struct bio *bio) if (wbio->have_ioref) bch2_latency_acct(ca, wbio->submit_time, WRITE); - if (bio->bi_status == BLK_STS_REMOVED || - bch2_dev_io_err_on(bio->bi_status, ca, "btree write") || + if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s", + bch2_blk_status_to_str(bio->bi_status)) || bch2_meta_write_fault("btree")) { spin_lock_irqsave(&c->btree_write_error_lock, flags); bch2_dev_list_add_dev(&orig->failed, wbio->dev); @@ -1475,7 +1456,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct bch_extent_ptr *ptr; struct sort_iter sort_iter; struct nonce nonce; - unsigned bytes_to_write, sectors_to_write, order, bytes, u64s; + unsigned bytes_to_write, sectors_to_write, bytes, u64s; u64 seq = 0; bool used_mempool; unsigned long old, new; @@ -1545,8 +1526,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, seq = max(seq, le64_to_cpu(i->journal_seq)); } - order = get_order(bytes); - data = btree_bounce_alloc(c, order, &used_mempool); + data = btree_bounce_alloc(c, bytes, &used_mempool); if (!b->written) { bn = data; @@ -1658,7 +1638,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct btree_write_bio, wbio.bio); wbio_init(&wbio->wbio.bio); wbio->data = data; - wbio->wbio.order = order; + wbio->bytes = bytes; wbio->wbio.used_mempool = used_mempool; wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META; wbio->wbio.bio.bi_end_io = btree_node_write_endio; @@ -1689,13 +1669,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, b->written += sectors_to_write; /* XXX: submitting IO with btree locks held: */ - bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_BTREE, &k.key); + bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key); return; err: set_btree_node_noevict(b); b->written += sectors_to_write; nowrite: - btree_bounce_free(c, order, used_mempool, data); + btree_bounce_free(c, bytes, used_mempool, data); btree_node_write_done(c, b); } @@ -1826,9 +1806,8 @@ void bch2_btree_verify_flushed(struct bch_fs *c) rcu_read_unlock(); } -ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) +void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c) { - struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; @@ -1841,7 +1820,7 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) if (!(flags & (1 << BTREE_NODE_dirty))) continue; - pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu\n", + pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n", b, (flags & (1 << BTREE_NODE_dirty)) != 0, (flags & (1 << BTREE_NODE_need_write)) != 0, @@ -1852,6 +1831,4 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) b->will_make_reachable & 1); } rcu_read_unlock(); - - return out.pos - buf; } |