diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2015-08-21 01:42:34 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-01-18 20:26:54 -0900 |
commit | 0789d0c893dcd0c28f2d72010d7056be70573397 (patch) | |
tree | cee09dcc4a4a557eacfb1894add7bcfd268823be | |
parent | 9df2276e331c9068feda1e20c5670f7c118a4575 (diff) |
bcache: Don't insert deleted keys with nonzero size
This is the other half of making it possible for the btree node iterator to not
compare against bkey_start_pos(): in order to still see keys in order by
start pos, we need to ensure that a 0 size extent sorts after a nonzero size
extent at the same position.
We could do this by comparing !k->size if the keys compare as equal, but part of
why we're making this change is for packed bkeys - once we have packed bkeys,
comparing !k->size will require unpacking the key (expensive).
So we'll use bkey_deleted() as a proxy for !k->size, and disallow bkey_deleted(k)
== true and k->size != 0.
Where previously we did discards by inserting KEY_DELETED keys, now we'll just
use KEY_DISCARD with version = 0 - and bch_extent_normalize() will drop these
keys later (the idea being that we can drop a KEY_DISCARD key with version = 0
without losing information).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | drivers/md/bcache/bkey_methods.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 11 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 13 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 12 | ||||
-rw-r--r-- | include/uapi/linux/bcache.h | 10 |
6 files changed, 23 insertions, 31 deletions
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c index 53d8bef0e1fa..cb5974e80f00 100644 --- a/drivers/md/bcache/bkey_methods.c +++ b/drivers/md/bcache/bkey_methods.c @@ -17,10 +17,11 @@ bool bkey_invalid(struct cache_set *c, { const struct bkey_ops *ops = bch_bkey_ops[type]; - if (k->size && !ops->is_extents) + if (k->u64s < BKEY_U64s) return true; - if (k->u64s < BKEY_U64s) + if (k->size && + (bkey_deleted(k) || !ops->is_extents)) return true; switch (k->type) { diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 220f4e6b70d8..cca3c1c68c45 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -818,7 +818,8 @@ void bch_bset_insert(struct btree_keys *b, BKEY_PADDED(k) tmp; BUG_ON(insert->u64s > bch_btree_keys_u64s_remaining(b)); - BUG_ON(b->ops->is_extents && !insert->size); + BUG_ON(b->ops->is_extents && + (!insert->size || bkey_deleted(insert))); BUG_ON(!b->last_set_unwritten); BUG_ON(where < i->start); BUG_ON(where > bset_bkey_last(i)); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index b80603bb3c53..887dc464552e 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -166,6 +166,8 @@ unsigned bch_extent_nr_ptrs_after_normalize(const struct cache_set *c, return 0; case KEY_TYPE_DISCARD: + return k->version ? BKEY_U64s : 0; + case KEY_TYPE_ERROR: return BKEY_U64s; @@ -1101,6 +1103,7 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert, struct bpos orig_insert = insert->p; bool inserted = false; + BUG_ON(bkey_deleted(insert)); BUG_ON(!insert->size); /* @@ -1180,7 +1183,7 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert, &inserted, res)) continue; - if (k->size && !bkey_deleted(insert) && + if (k->size && insert->version < k->version) { handle_existing_key_newer(b, iter, insert, k, &inserted, res); @@ -1390,10 +1393,14 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey *k) switch (k->type) { case KEY_TYPE_DELETED: - case KEY_TYPE_DISCARD: case KEY_TYPE_ERROR: break; + case KEY_TYPE_DISCARD: + if (!k->version) + set_bkey_deleted(k); + break; + case KEY_TYPE_COOKIE: set_bkey_deleted(k); break; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 3a35fc4e300a..09441498bf6a 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -361,7 +361,7 @@ static void bch_write_error(struct closure *cl) struct bkey *n = bkey_next(src); set_bkey_val_u64s(src, 0); - src->type = KEY_TYPE_DELETED; + src->type = KEY_TYPE_DISCARD; memmove(dst, src, bkey_bytes(src)); dst = bkey_next(dst); @@ -691,7 +691,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c, * allocate pointers */ op->insert_key.type = op->discard - ? KEY_TYPE_DELETED + ? KEY_TYPE_DISCARD : BCH_EXTENT; } @@ -737,16 +737,13 @@ int bch_discard(struct cache_set *c, struct bpos start, /* create the biggest key we can, to minimize writes */ bkey_init(&erase); - erase.p = bkey_start_pos(k); + erase.type = KEY_TYPE_DISCARD; + erase.version = version; + erase.p = bkey_start_pos(k); bch_key_resize(&erase, max_sectors); bch_cut_front(iter.pos, &erase); n = erase.p; - erase.version = version; - erase.type = version - ? KEY_TYPE_DISCARD - : KEY_TYPE_DELETED; - bch_cut_back(end, &erase); ret = bch_btree_insert_at(&iter, &keylist_single(&erase), diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 641f0b85b959..d5954802f53a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -438,17 +438,7 @@ retry: goto out; } - if (!ca && k->type == KEY_TYPE_DISCARD) { - /* The data is zeros. Instantiate them. */ - unsigned bytes = min_t(unsigned, sectors, - bio_sectors(bio)) << 9; - - swap(bio->bi_iter.bi_size, bytes); - zero_fill_bio(bio); - swap(bio->bi_iter.bi_size, bytes); - - bio_advance(bio, bytes); - } else if (!ca) { + if (!ca) { /* not present (hole), or stale cached data */ if (cached_dev_cache_miss(&iter, s, bio, sectors)) { k = bch_btree_iter_peek_with_holes(&iter); diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index c65da0ec268a..0500927f6be0 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -152,13 +152,9 @@ struct bkey_i_##name { \ * override keys in composition order. Their version number is ignored. * * - DISCARDED keys indicate that the data is all 0s because it has been - * discarded. Unlike DELETED keys, DISCARDED keys have version numbers so - * that discarded regions don't revert when a server that was offline during - * the discard comes back on line. Unlike DELETED keys, which can be - * eliminated by (node-) local GC, DISCARDED keys can only be eliminated by: - * - cluster-wide GC, when all the servers are online and the DISCARDED keys are - * no longer overriding any older keys, or - * - local GC when completely overridden by younger writes/discards. + * discarded. DISCARDs may have a version; if the version is nonzero the key + * will be persistent, otherwise the key will be dropped whenever the btree + * node is rewritten (like DELETED keys). * * - ERROR: any read of the data returns a read error, as the data was lost due * to a failing device. Like DISCARDED keys, they can be removed (overridden) |