diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-03-04 22:29:25 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2021-09-26 19:48:43 -0400 |
commit | 948f0452b669ba1cfa926ba1c02e4e689262fe3b (patch) | |
tree | 37e2f10e87b00fb5d7adfe6582e09ddd61a4f105 | |
parent | fc58cfd0af87e4d42d97f7150f62ca824254745a (diff) |
bcachefs: BTREE_ITER_FILTER_SNAPSHOTS
For snapshots, we need to implement btree lookups that return the first
key that's an ancestor of the snapshot ID the lookup is being done in -
and filter out keys in unrelated snapshots. This patch adds the btree
iterator flag BTREE_ITER_FILTER_SNAPSHOTS which does that filtering.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/btree_iter.c | 151 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 9 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 1 |
4 files changed, 156 insertions, 8 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index ef9719c1bff2..06daa95f2abf 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -13,6 +13,7 @@ #include "extents.h" #include "journal.h" #include "replicas.h" +#include "subvolume.h" #include <linux/prefetch.h> #include <trace/events/bcachefs.h> @@ -681,6 +682,55 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) bkey_cmp(iter->pos, iter->k.p) > 0); } +static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) +{ + struct btree_trans *trans = iter->trans; + struct btree_iter copy; + struct bkey_s_c prev; + int ret = 0; + + if (!bch2_debug_check_iterators) + return 0; + + if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) + return 0; + + if (bkey_err(k) || !k.k) + return 0; + + BUG_ON(!bch2_snapshot_is_ancestor(trans->c, + iter->snapshot, + k.k->p.snapshot)); + + bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos, + BTREE_ITER_ALL_SNAPSHOTS); + prev = bch2_btree_iter_prev(©); + if (!prev.k) + goto out; + + ret = bkey_err(prev); + if (ret) + goto out; + + if (!bkey_cmp(prev.k->p, k.k->p) && + bch2_snapshot_is_ancestor(trans->c, iter->snapshot, + prev.k->p.snapshot) > 0) { + char buf1[100], buf2[200]; + + bch2_bkey_to_text(&PBUF(buf1), k.k); + bch2_bkey_to_text(&PBUF(buf2), prev.k); + + panic("iter snap %u\n" + "k %s\n" + "prev %s\n", + iter->snapshot, + buf1, buf2); + } +out: + bch2_trans_iter_exit(trans, ©); + return ret; +} + #else static inline void bch2_btree_path_verify_level(struct btree_trans *trans, @@ -689,6 +739,7 @@ static inline void bch2_btree_path_verify(struct btree_trans *trans, struct btree_path *path) {} static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} +static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; } #endif @@ -1978,11 +2029,25 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) } if (likely(k.k)) { - if (likely(!bkey_deleted(k.k))) - break; + /* + * We can never have a key in a leaf node at POS_MAX, so + * we don't have to check these successor() calls: + */ + if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && + !bch2_snapshot_is_ancestor(trans->c, + iter->snapshot, + k.k->p.snapshot)) { + search_key = bpos_successor(k.k->p); + continue; + } + + if (bkey_whiteout(k.k) && + !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { + search_key = bkey_successor(iter, k.k->p); + continue; + } - /* Advance to next key: */ - search_key = bkey_successor(iter, k.k->p); + break; } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) { /* Advance to next leaf node: */ search_key = bpos_successor(iter->path->l[0].b->key.k.p); @@ -2003,6 +2068,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) iter->pos = bkey_start_pos(k.k); + if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) + iter->pos.snapshot = iter->snapshot; + cmp = bpos_cmp(k.k->p, iter->path->pos); if (cmp) { iter->path = bch2_btree_path_make_mut(trans, iter->path, @@ -2015,6 +2083,10 @@ out: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); + ret = bch2_btree_iter_verify_ret(iter, k); + if (unlikely(ret)) + return bkey_s_c_err(ret); + return k; } @@ -2038,7 +2110,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; struct bpos search_key = iter->pos; + struct btree_path *saved_path = NULL; struct bkey_s_c k; + struct bkey saved_k; + const struct bch_val *saved_v; int ret; EBUG_ON(iter->path->cached || iter->path->level); @@ -2046,6 +2121,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); + if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) + search_key.snapshot = U32_MAX; + while (1) { iter->path = btree_path_set_pos(trans, iter->path, search_key, iter->flags & BTREE_ITER_INTENT); @@ -2062,14 +2140,57 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) &iter->path->l[0], &iter->k); if (!k.k || ((iter->flags & BTREE_ITER_IS_EXTENTS) - ? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0 - : bkey_cmp(k.k->p, iter->pos) > 0)) + ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0 + : bpos_cmp(k.k->p, search_key) > 0)) k = btree_path_level_prev(trans->c, iter->path, &iter->path->l[0], &iter->k); btree_path_check_sort(trans, iter->path, 0); if (likely(k.k)) { + if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) { + if (k.k->p.snapshot == iter->snapshot) + goto got_key; + + /* + * If we have a saved candidate, and we're no + * longer at the same _key_ (not pos), return + * that candidate + */ + if (saved_path && bkey_cmp(k.k->p, saved_k.p)) { + bch2_path_put(trans, iter->path, + iter->flags & BTREE_ITER_INTENT); + iter->path = saved_path; + saved_path = NULL; + iter->k = saved_k; + k.v = saved_v; + goto got_key; + } + + if (bch2_snapshot_is_ancestor(iter->trans->c, + iter->snapshot, + k.k->p.snapshot)) { + if (saved_path) + bch2_path_put(trans, saved_path, + iter->flags & BTREE_ITER_INTENT); + saved_path = btree_path_clone(trans, iter->path, + iter->flags & BTREE_ITER_INTENT); + saved_k = *k.k; + saved_v = k.v; + } + + search_key = bpos_predecessor(k.k->p); + continue; + } +got_key: + if (bkey_whiteout(k.k) && + !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { + search_key = bkey_predecessor(iter, k.k->p); + if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) + search_key.snapshot = U32_MAX; + continue; + } + break; } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) { /* Advance to previous leaf node: */ @@ -2087,7 +2208,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) /* Extents can straddle iter->pos: */ if (bkey_cmp(k.k->p, iter->pos) < 0) iter->pos = k.k->p; + + if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) + iter->pos.snapshot = iter->snapshot; out: + if (saved_path) + bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT); iter->path->should_be_locked = true; bch2_btree_iter_verify_entry_exit(iter); @@ -2136,7 +2262,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (unlikely(ret)) return bkey_s_c_err(ret); - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) { + if ((iter->flags & BTREE_ITER_CACHED) || + !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { struct bkey_i *next_update; next_update = iter->flags & BTREE_ITER_WITH_UPDATES @@ -2195,6 +2322,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); + ret = bch2_btree_iter_verify_ret(iter, k); + if (unlikely(ret)) + return bkey_s_c_err(ret); return k; } @@ -2348,6 +2478,13 @@ static void __bch2_trans_iter_init(struct btree_trans *trans, if (!btree_type_has_snapshots(btree_id) && !(flags & __BTREE_ITER_ALL_SNAPSHOTS)) flags &= ~BTREE_ITER_ALL_SNAPSHOTS; +#if 0 + /* let's have this be explicitly set: */ + if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && + btree_type_has_snapshots(btree_id) && + !(flags & BTREE_ITER_ALL_SNAPSHOTS)) + flags |= BTREE_ITER_FILTER_SNAPSHOTS; +#endif if (!(flags & BTREE_ITER_ALL_SNAPSHOTS)) pos.snapshot = btree_type_has_snapshots(btree_id) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index be1bb489f3d6..19ca73f5ea22 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -234,6 +234,15 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it iter->pos = bkey_start_pos(&iter->k); } +static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot) +{ + struct bpos pos = iter->pos; + + iter->snapshot = snapshot; + pos.snapshot = snapshot; + bch2_btree_iter_set_pos(iter, pos); +} + /* * Unlocks before scheduling * Note: does not revalidate iterator diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index c019200a6125..4f1bc1d165aa 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -371,7 +371,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos, BTREE_ITER_SLOTS| - BTREE_ITER_INTENT); + BTREE_ITER_INTENT| + BTREE_ITER_ALL_SNAPSHOTS); bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL| diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 262ee2d53322..7fcd2ceb51e9 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -209,6 +209,7 @@ struct btree_node_iter { #define BTREE_ITER_WITH_UPDATES (1 << 10) #define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11) #define BTREE_ITER_ALL_SNAPSHOTS (1 << 12) +#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13) enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, |