summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2015-08-21 01:42:34 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-01-18 20:26:54 -0900
commit0789d0c893dcd0c28f2d72010d7056be70573397 (patch)
treecee09dcc4a4a557eacfb1894add7bcfd268823be
parent9df2276e331c9068feda1e20c5670f7c118a4575 (diff)
bcache: Don't insert deleted keys with nonzero size
This is the other half of making it possible for the btree node iterator to not compare against bkey_start_pos(): in order to still see keys in order by start pos, we need to ensure that a 0 size extent sorts after a nonzero size extent at the same position. We could do this by comparing !k->size if the keys compare as equal, but part of why we're making this change is for packed bkeys - once we have packed bkeys, comparing !k->size will require unpacking the key (expensive). So we'll use bkey_deleted() as a proxy for !k->size, and disallow bkey_deleted(k) == true and k->size != 0. Where previously we did discards by inserting KEY_DELETED keys, now we'll just use KEY_DISCARD with version = 0 - and bch_extent_normalize() will drop these keys later (the idea being that we can drop a KEY_DISCARD key with version = 0 without losing information). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--drivers/md/bcache/bkey_methods.c5
-rw-r--r--drivers/md/bcache/bset.c3
-rw-r--r--drivers/md/bcache/extents.c11
-rw-r--r--drivers/md/bcache/io.c13
-rw-r--r--drivers/md/bcache/request.c12
-rw-r--r--include/uapi/linux/bcache.h10
6 files changed, 23 insertions, 31 deletions
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index 53d8bef0e1fa..cb5974e80f00 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -17,10 +17,11 @@ bool bkey_invalid(struct cache_set *c,
{
const struct bkey_ops *ops = bch_bkey_ops[type];
- if (k->size && !ops->is_extents)
+ if (k->u64s < BKEY_U64s)
return true;
- if (k->u64s < BKEY_U64s)
+ if (k->size &&
+ (bkey_deleted(k) || !ops->is_extents))
return true;
switch (k->type) {
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 220f4e6b70d8..cca3c1c68c45 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -818,7 +818,8 @@ void bch_bset_insert(struct btree_keys *b,
BKEY_PADDED(k) tmp;
BUG_ON(insert->u64s > bch_btree_keys_u64s_remaining(b));
- BUG_ON(b->ops->is_extents && !insert->size);
+ BUG_ON(b->ops->is_extents &&
+ (!insert->size || bkey_deleted(insert)));
BUG_ON(!b->last_set_unwritten);
BUG_ON(where < i->start);
BUG_ON(where > bset_bkey_last(i));
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index b80603bb3c53..887dc464552e 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -166,6 +166,8 @@ unsigned bch_extent_nr_ptrs_after_normalize(const struct cache_set *c,
return 0;
case KEY_TYPE_DISCARD:
+ return k->version ? BKEY_U64s : 0;
+
case KEY_TYPE_ERROR:
return BKEY_U64s;
@@ -1101,6 +1103,7 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
struct bpos orig_insert = insert->p;
bool inserted = false;
+ BUG_ON(bkey_deleted(insert));
BUG_ON(!insert->size);
/*
@@ -1180,7 +1183,7 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
&inserted, res))
continue;
- if (k->size && !bkey_deleted(insert) &&
+ if (k->size &&
insert->version < k->version) {
handle_existing_key_newer(b, iter, insert, k,
&inserted, res);
@@ -1390,10 +1393,14 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey *k)
switch (k->type) {
case KEY_TYPE_DELETED:
- case KEY_TYPE_DISCARD:
case KEY_TYPE_ERROR:
break;
+ case KEY_TYPE_DISCARD:
+ if (!k->version)
+ set_bkey_deleted(k);
+ break;
+
case KEY_TYPE_COOKIE:
set_bkey_deleted(k);
break;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 3a35fc4e300a..09441498bf6a 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -361,7 +361,7 @@ static void bch_write_error(struct closure *cl)
struct bkey *n = bkey_next(src);
set_bkey_val_u64s(src, 0);
- src->type = KEY_TYPE_DELETED;
+ src->type = KEY_TYPE_DISCARD;
memmove(dst, src, bkey_bytes(src));
dst = bkey_next(dst);
@@ -691,7 +691,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
* allocate pointers
*/
op->insert_key.type = op->discard
- ? KEY_TYPE_DELETED
+ ? KEY_TYPE_DISCARD
: BCH_EXTENT;
}
@@ -737,16 +737,13 @@ int bch_discard(struct cache_set *c, struct bpos start,
/* create the biggest key we can, to minimize writes */
bkey_init(&erase);
- erase.p = bkey_start_pos(k);
+ erase.type = KEY_TYPE_DISCARD;
+ erase.version = version;
+ erase.p = bkey_start_pos(k);
bch_key_resize(&erase, max_sectors);
bch_cut_front(iter.pos, &erase);
n = erase.p;
- erase.version = version;
- erase.type = version
- ? KEY_TYPE_DISCARD
- : KEY_TYPE_DELETED;
-
bch_cut_back(end, &erase);
ret = bch_btree_insert_at(&iter, &keylist_single(&erase),
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 641f0b85b959..d5954802f53a 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -438,17 +438,7 @@ retry:
goto out;
}
- if (!ca && k->type == KEY_TYPE_DISCARD) {
- /* The data is zeros. Instantiate them. */
- unsigned bytes = min_t(unsigned, sectors,
- bio_sectors(bio)) << 9;
-
- swap(bio->bi_iter.bi_size, bytes);
- zero_fill_bio(bio);
- swap(bio->bi_iter.bi_size, bytes);
-
- bio_advance(bio, bytes);
- } else if (!ca) {
+ if (!ca) {
/* not present (hole), or stale cached data */
if (cached_dev_cache_miss(&iter, s, bio, sectors)) {
k = bch_btree_iter_peek_with_holes(&iter);
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index c65da0ec268a..0500927f6be0 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -152,13 +152,9 @@ struct bkey_i_##name { \
* override keys in composition order. Their version number is ignored.
*
* - DISCARDED keys indicate that the data is all 0s because it has been
- * discarded. Unlike DELETED keys, DISCARDED keys have version numbers so
- * that discarded regions don't revert when a server that was offline during
- * the discard comes back on line. Unlike DELETED keys, which can be
- * eliminated by (node-) local GC, DISCARDED keys can only be eliminated by:
- * - cluster-wide GC, when all the servers are online and the DISCARDED keys are
- * no longer overriding any older keys, or
- * - local GC when completely overridden by younger writes/discards.
+ * discarded. DISCARDs may have a version; if the version is nonzero the key
+ * will be persistent, otherwise the key will be dropped whenever the btree
+ * node is rewritten (like DELETED keys).
*
* - ERROR: any read of the data returns a read error, as the data was lost due
* to a failing device. Like DISCARDED keys, they can be removed (overridden)