diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-02-27 16:58:29 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2020-03-24 22:45:26 -0400 |
commit | dda0a08b57d290a6c065221d4e31a58741b6f115 (patch) | |
tree | 24d90deb067667a7a0c82fa385162d3e162e3771 | |
parent | 72800626777145820faf4758e35e385b8f62d943 (diff) |
bcachefs: Walk btree with keys from journal
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.c | 81 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 113 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 161 | ||||
-rw-r--r-- | fs/bcachefs/recovery.h | 21 |
6 files changed, 281 insertions, 100 deletions
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index c064cf468a9b..0aa3d3b9a281 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -134,7 +134,7 @@ const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) { - if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0) + if (bkey_cmp(k.k->p, b->data->min_key) < 0) return "key before start of btree node"; if (bkey_cmp(k.k->p, b->data->max_key) > 0) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index e9df7e82a766..5c3e7e165fcf 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -588,6 +588,7 @@ err: static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, struct btree_iter *iter, const struct bkey_i *k, + enum btree_id btree_id, unsigned level, enum six_lock_type lock_type, bool sync) @@ -600,7 +601,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * Parent node must be locked, else we could read in a btree node that's * been freed: */ - if (!bch2_btree_node_relock(iter, level + 1)) + if (iter && !bch2_btree_node_relock(iter, level + 1)) return ERR_PTR(-EINTR); b = bch2_btree_node_mem_alloc(c); @@ -608,7 +609,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return b; bkey_copy(&b->key, k); - if (bch2_btree_node_hash_insert(bc, b, level, iter->btree_id)) { + if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ /* mark as unhashed... */ @@ -628,7 +629,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * * XXX: ideally should be dropping all btree node locks here */ - if (btree_node_read_locked(iter, level + 1)) + if (iter && btree_node_read_locked(iter, level + 1)) btree_node_unlock(iter, level + 1); bch2_btree_node_read(c, b, sync); @@ -676,7 +677,8 @@ retry: * else we could read in a btree node from disk that's been * freed: */ - b = bch2_btree_node_fill(c, iter, k, level, lock_type, true); + b = bch2_btree_node_fill(c, iter, k, iter->btree_id, + level, lock_type, true); /* We raced and found the btree node in the cache */ if (!b) @@ -762,6 +764,74 @@ lock_node: return b; } +struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, + const struct bkey_i *k, + enum btree_id btree_id, + unsigned level) +{ + struct btree_cache *bc = &c->btree_cache; + struct btree *b; + struct bset_tree *t; + + EBUG_ON(level >= BTREE_MAX_DEPTH); + + b = btree_node_mem_ptr(k); + if (b) + goto lock_node; +retry: + b = btree_cache_find(bc, k); + if (unlikely(!b)) { + b = bch2_btree_node_fill(c, NULL, k, btree_id, + level, SIX_LOCK_read, true); + + /* We raced and found the btree node in the cache */ + if (!b) + goto retry; + + if (IS_ERR(b)) + return b; + } else { +lock_node: + six_lock_read(&b->lock); + + if (unlikely(b->hash_val != btree_ptr_hash_val(k) || + b->btree_id != btree_id || + b->level != level)) { + six_unlock_read(&b->lock); + goto retry; + } + } + + /* XXX: waiting on IO with btree locks held: */ + wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, + TASK_UNINTERRUPTIBLE); + + prefetch(b->aux_data); + + for_each_bset(b, t) { + void *p = (u64 *) b->aux_data + t->aux_data_offset; + + prefetch(p + L1_CACHE_BYTES * 0); + prefetch(p + L1_CACHE_BYTES * 1); + prefetch(p + L1_CACHE_BYTES * 2); + } + + /* avoid atomic set bit if it's not needed: */ + if (!btree_node_accessed(b)) + set_btree_node_accessed(b); + + if (unlikely(btree_node_read_error(b))) { + six_unlock_read(&b->lock); + return ERR_PTR(-EIO); + } + + EBUG_ON(b->btree_id != btree_id || + BTREE_NODE_LEVEL(b->data) != level || + bkey_cmp(b->data->max_key, k->k.p)); + + return b; +} + struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, struct btree_iter *iter, struct btree *b, @@ -876,7 +946,8 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, if (b) return; - bch2_btree_node_fill(c, iter, k, level, SIX_LOCK_read, false); + bch2_btree_node_fill(c, iter, k, iter->btree_id, + level, SIX_LOCK_read, false); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index bc24d92678d3..132cc95a4c02 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -25,6 +25,9 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, const struct bkey_i *, unsigned, enum six_lock_type); +struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, + enum btree_id, unsigned); + struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, struct btree *, enum btree_node_sibling); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index c5a0c0ed22a0..7c89a6dd7f5a 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -184,16 +184,8 @@ fsck_err: return ret; } -static bool pos_in_journal_keys(struct journal_keys *journal_keys, - enum btree_id id, struct bpos pos) -{ - struct journal_key *k = journal_key_search(journal_keys, id, pos); - - return k && k->btree_id == id && !bkey_cmp(k->k->k.p, pos); -} - static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, - struct journal_keys *journal_keys, bool initial) + bool initial) { struct btree_node_iter iter; struct bkey unpacked; @@ -207,10 +199,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { - if (!b->level && journal_keys && - pos_in_journal_keys(journal_keys, b->btree_id, k.k->p)) - continue; - bch2_bkey_debugcheck(c, b, k); ret = bch2_gc_mark_key(c, k, max_stale, initial); @@ -222,7 +210,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, } static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, - struct journal_keys *journal_keys, bool initial, bool metadata_only) { struct btree_trans trans; @@ -250,8 +237,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, gc_pos_set(c, gc_pos_btree_node(b)); - ret = btree_gc_mark_node(c, b, &max_stale, - journal_keys, initial); + ret = btree_gc_mark_node(c, b, &max_stale, initial); if (ret) break; @@ -287,6 +273,78 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, return ret; } +static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, + struct journal_keys *journal_keys, + unsigned target_depth) +{ + struct btree_and_journal_iter iter; + struct bkey_s_c k; + u8 max_stale = 0; + int ret = 0; + + bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); + + while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + bch2_bkey_debugcheck(c, b, k); + + ret = bch2_gc_mark_key(c, k, &max_stale, true); + if (ret) + break; + + if (b->level > target_depth) { + struct btree *child; + BKEY_PADDED(k) tmp; + + bkey_reassemble(&tmp.k, k); + + child = bch2_btree_node_get_noiter(c, &tmp.k, + b->btree_id, b->level - 1); + ret = PTR_ERR_OR_ZERO(child); + if (ret) + break; + + bch2_gc_btree_init_recurse(c, child, + journal_keys, target_depth); + six_unlock_read(&child->lock); + } + + bch2_btree_and_journal_iter_advance(&iter); + } + + return ret; +} + +static int bch2_gc_btree_init(struct bch_fs *c, + struct journal_keys *journal_keys, + enum btree_id btree_id, + bool metadata_only) +{ + struct btree *b; + unsigned target_depth = metadata_only ? 1 + : expensive_debug_checks(c) ? 0 + : !btree_node_type_needs_gc(btree_id) ? 1 + : 0; + u8 max_stale = 0; + int ret = 0; + + b = c->btree_roots[btree_id].b; + + if (btree_node_fake(b)) + return 0; + + six_lock_read(&b->lock); + if (b->level >= target_depth) + ret = bch2_gc_btree_init_recurse(c, b, + journal_keys, target_depth); + + if (!ret) + ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), + &max_stale, true); + six_unlock_read(&b->lock); + + return ret; +} + static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) { return (int) btree_id_to_gc_phase(l) - @@ -305,27 +363,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, for (i = 0; i < BTREE_ID_NR; i++) { enum btree_id id = ids[i]; - enum btree_node_type type = __btree_node_type(0, id); - - int ret = bch2_gc_btree(c, id, journal_keys, - initial, metadata_only); + int ret = initial + ? bch2_gc_btree_init(c, journal_keys, + id, metadata_only) + : bch2_gc_btree(c, id, initial, metadata_only); if (ret) return ret; - - if (journal_keys && !metadata_only && - btree_node_type_needs_gc(type)) { - struct journal_key *j; - u8 max_stale; - int ret; - - for_each_journal_key(*journal_keys, j) - if (j->btree_id == id) { - ret = bch2_gc_mark_key(c, bkey_i_to_s_c(j->k), - &max_stale, initial); - if (ret) - return ret; - } - } } return 0; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 02b381cb567b..11083331fe65 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -27,30 +27,78 @@ /* iterate over keys read from the journal: */ -struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) +static struct journal_key *journal_key_search(struct journal_keys *journal_keys, + enum btree_id id, unsigned level, + struct bpos pos) { - while (iter->k) { - if (iter->k->btree_id == iter->btree_id) - return bkey_i_to_s_c(iter->k->k); + size_t l = 0, r = journal_keys->nr, m; - iter->k++; - if (iter->k == iter->keys->d + iter->keys->nr) - iter->k = NULL; + while (l < r) { + m = l + ((r - l) >> 1); + if ((cmp_int(id, journal_keys->d[m].btree_id) ?: + cmp_int(level, journal_keys->d[m].level) ?: + bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0) + l = m + 1; + else + r = m; } - return bkey_s_c_null; + BUG_ON(l < journal_keys->nr && + (cmp_int(id, journal_keys->d[l].btree_id) ?: + cmp_int(level, journal_keys->d[l].level) ?: + bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0); + + BUG_ON(l && + (cmp_int(id, journal_keys->d[l - 1].btree_id) ?: + cmp_int(level, journal_keys->d[l - 1].level) ?: + bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0); + + return l < journal_keys->nr ? journal_keys->d + l : NULL; +} + +static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter) +{ + if (iter->k && + iter->k < iter->keys->d + iter->keys->nr && + iter->k->btree_id == iter->btree_id && + iter->k->level == iter->level) + return iter->k->k; + + iter->k = NULL; + return NULL; +} + +static void bch2_journal_iter_advance(struct journal_iter *iter) +{ + if (iter->k) + iter->k++; } -struct bkey_s_c bch2_journal_iter_next(struct journal_iter *iter) +static void bch2_journal_iter_init(struct journal_iter *iter, + struct journal_keys *journal_keys, + enum btree_id id, unsigned level, + struct bpos pos) { - if (!iter->k) - return bkey_s_c_null; + iter->btree_id = id; + iter->level = level; + iter->keys = journal_keys; + iter->k = journal_key_search(journal_keys, id, level, pos); +} - iter->k++; - if (iter->k == iter->keys->d + iter->keys->nr) - iter->k = NULL; +static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) +{ + return iter->btree + ? bch2_btree_iter_peek(iter->btree) + : bch2_btree_node_iter_peek_unpack(&iter->node_iter, + iter->b, &iter->unpacked); +} - return bch2_journal_iter_peek(iter); +static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter) +{ + if (iter->btree) + bch2_btree_iter_next(iter->btree); + else + bch2_btree_node_iter_advance(&iter->node_iter, iter->b); } void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) @@ -59,10 +107,10 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) case none: break; case btree: - bch2_btree_iter_next(iter->btree); + bch2_journal_iter_advance_btree(iter); break; case journal: - bch2_journal_iter_next(&iter->journal); + bch2_journal_iter_advance(&iter->journal); break; } @@ -74,14 +122,16 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * struct bkey_s_c ret; while (1) { - struct bkey_s_c btree_k = bch2_btree_iter_peek(iter->btree); - struct bkey_s_c journal_k = bch2_journal_iter_peek(&iter->journal); + struct bkey_s_c btree_k = + bch2_journal_iter_peek_btree(iter); + struct bkey_s_c journal_k = + bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal)); if (btree_k.k && journal_k.k) { int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p); if (!cmp) - bch2_btree_iter_next(iter->btree); + bch2_journal_iter_advance_btree(iter); iter->last = cmp < 0 ? btree : journal; } else if (btree_k.k) { @@ -94,6 +144,14 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * } ret = iter->last == journal ? journal_k : btree_k; + + if (iter->b && + bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) { + iter->journal.k = NULL; + iter->last = none; + return bkey_s_c_null; + } + if (!bkey_deleted(ret.k)) break; @@ -110,41 +168,32 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter * return bch2_btree_and_journal_iter_peek(iter); } -struct journal_key *journal_key_search(struct journal_keys *journal_keys, - enum btree_id id, struct bpos pos) -{ - size_t l = 0, r = journal_keys->nr, m; - - while (l < r) { - m = l + ((r - l) >> 1); - if ((cmp_int(id, journal_keys->d[m].btree_id) ?: - bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0) - l = m + 1; - else - r = m; - } - - BUG_ON(l < journal_keys->nr && - (cmp_int(id, journal_keys->d[l].btree_id) ?: - bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0); - - BUG_ON(l && - (cmp_int(id, journal_keys->d[l - 1].btree_id) ?: - bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0); - - return l < journal_keys->nr ? journal_keys->d + l : NULL; -} - void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter, struct btree_trans *trans, struct journal_keys *journal_keys, enum btree_id id, struct bpos pos) { - iter->journal.keys = journal_keys; - iter->journal.k = journal_key_search(journal_keys, id, pos); - iter->journal.btree_id = id; + memset(iter, 0, sizeof(*iter)); iter->btree = bch2_trans_get_iter(trans, id, pos, 0); + bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos); +} + +void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter, + struct journal_keys *journal_keys, + struct btree *b) +{ + struct bpos start = b->data->min_key; + + if (btree_node_type_is_extents(b->btree_id)) + start = bkey_successor(start); + + memset(iter, 0, sizeof(*iter)); + + iter->b = b; + bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b); + bch2_journal_iter_init(&iter->journal, journal_keys, + b->btree_id, b->level, start); } /* sort and dedup all keys in the journal: */ @@ -169,7 +218,8 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) const struct journal_key *l = _l; const struct journal_key *r = _r; - return cmp_int(l->btree_id, r->btree_id) ?: + return cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->level, r->level) ?: bkey_cmp(l->k->k.p, r->k->k.p) ?: cmp_int(l->journal_seq, r->journal_seq) ?: cmp_int(l->journal_offset, r->journal_offset); @@ -180,9 +230,10 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = _l; const struct journal_key *r = _r; - return cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->btree_id, r->btree_id) ?: - bkey_cmp(l->k->k.p, r->k->k.p); + return cmp_int(l->journal_seq, r->journal_seq) ?: + cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->level, r->level) ?: + bkey_cmp(l->k->k.p, r->k->k.p); } static void journal_keys_free(struct journal_keys *keys) @@ -218,6 +269,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) for_each_jset_key(k, _n, entry, &p->j) keys.d[keys.nr++] = (struct journal_key) { .btree_id = entry->btree_id, + .level = entry->level, .k = k, .journal_seq = le64_to_cpu(p->j.seq) - keys.journal_seq_base, @@ -229,7 +281,8 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) src = dst = keys.d; while (src < keys.d + keys.nr) { while (src + 1 < keys.d + keys.nr && - src[0].btree_id == src[1].btree_id && + src[0].btree_id == src[1].btree_id && + src[0].level == src[1].level && !bkey_cmp(src[0].k->k.p, src[1].k->k.p)) src++; @@ -864,7 +917,7 @@ int bch2_fs_recovery(struct bch_fs *c) */ bch_info(c, "starting metadata mark and sweep"); err = "error in mark and sweep"; - ret = bch2_gc(c, NULL, true, true); + ret = bch2_gc(c, &journal_keys, true, true); if (ret) goto err; bch_verbose(c, "mark and sweep done"); diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index c91309301563..fa1f2818817d 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -5,6 +5,7 @@ struct journal_keys { struct journal_key { enum btree_id btree_id:8; + unsigned level:8; struct bkey_i *k; u32 journal_seq; u32 journal_offset; @@ -17,15 +18,23 @@ struct journal_keys { for (i = (keys).d; i < (keys).d + (keys).nr; (i)++) struct journal_iter { + enum btree_id btree_id; + unsigned level; struct journal_keys *keys; struct journal_key *k; - enum btree_id btree_id; }; -struct btree_and_journal_iter { - enum btree_id btree_id; +/* + * Iterate over keys in the btree, with keys from the journal overlaid on top: + */ +struct btree_and_journal_iter { struct btree_iter *btree; + + struct btree *b; + struct btree_node_iter node_iter; + struct bkey unpacked; + struct journal_iter journal; enum last_key_returned { @@ -38,12 +47,14 @@ struct btree_and_journal_iter { void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *); -struct journal_key *journal_key_search(struct journal_keys *, - enum btree_id, struct bpos); + void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *, struct btree_trans *, struct journal_keys *, enum btree_id, struct bpos); +void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, + struct journal_keys *, + struct btree *); int bch2_fs_recovery(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *); |