summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-10-05 23:02:38 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:37:20 -0800
commit96627db39e119fb8e6ec4ee7da7d7ae388cb431a (patch)
tree911e7749f40684327e22d4af6191bdf26b3bb9ec
parented55d6e18a6699e8628c8a384d7f8e07eee96889 (diff)
bcache: Simpler keylists
With scan keylists gone, keylists don't have to hold more than a few keys anymore, and we can go back to a much simpler implementation. Yay for deleting code. This also means we can finally fix the definition of BKEY_EXTENT_VAL_U64s_MAX...
-rw-r--r--drivers/md/bcache/btree_gc.c4
-rw-r--r--drivers/md/bcache/btree_update.c25
-rw-r--r--drivers/md/bcache/fs-io.c2
-rw-r--r--drivers/md/bcache/io.c8
-rw-r--r--drivers/md/bcache/keylist.c119
-rw-r--r--drivers/md/bcache/keylist.h105
-rw-r--r--drivers/md/bcache/keylist_types.h40
-rw-r--r--drivers/md/bcache/move.c2
-rw-r--r--include/trace/events/bcache.h32
-rw-r--r--include/uapi/linux/bcache.h13
10 files changed, 80 insertions, 270 deletions
diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c
index 8bd6070cfee0..f658012c02bd 100644
--- a/drivers/md/bcache/btree_gc.c
+++ b/drivers/md/bcache/btree_gc.c
@@ -468,7 +468,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
if (IS_ERR(res))
return;
- if (bch_keylist_realloc(&keylist,
+ if (bch_keylist_realloc(&keylist, NULL, 0,
(BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
trace_bcache_btree_gc_coalesce_fail(c);
goto out;
@@ -636,7 +636,7 @@ next:
}
}
out:
- bch_keylist_free(&keylist);
+ bch_keylist_free(&keylist, NULL);
bch_btree_reserve_put(c, res);
}
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index bfbc2964394f..a8df0fab3e5f 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -733,10 +733,9 @@ static void verify_keys_sorted(struct keylist *l)
#ifdef CONFIG_BCACHE_DEBUG
struct bkey_i *k;
- for (k = l->bot;
- k < l->top && bkey_next(k) < l->top;
- k = bkey_next(k))
- BUG_ON(bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
+ for_each_keylist_key(l, k)
+ BUG_ON(bkey_next(k) != l->top &&
+ bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
#endif
}
@@ -1100,7 +1099,7 @@ bch_btree_insert_keys_interior(struct btree *b,
btree_node_lock_for_insert(b, iter);
- if (bch_keylist_nkeys(insert_keys) >
+ if (bch_keylist_u64s(insert_keys) >
bch_btree_keys_u64s_remaining(iter->c, b)) {
btree_node_unlock_write(b, iter);
return BTREE_INSERT_BTREE_NODE_FULL;
@@ -1123,7 +1122,7 @@ bch_btree_insert_keys_interior(struct btree *b,
bch_insert_fixup_btree_ptr(iter, b, insert,
&node_iter, &res->disk_res);
- bch_keylist_dequeue(insert_keys);
+ bch_keylist_pop_front(insert_keys);
}
btree_interior_update_updated_btree(iter->c, as, b);
@@ -1232,7 +1231,7 @@ static void btree_split_insert_keys(struct btree_iter *iter, struct btree *b,
while (!bch_keylist_empty(keys)) {
k = bch_keylist_front(keys);
- BUG_ON(bch_keylist_nkeys(keys) >
+ BUG_ON(bch_keylist_u64s(keys) >
bch_btree_keys_u64s_remaining(iter->c, b));
BUG_ON(bkey_cmp(k->k.p, b->data->min_key) < 0);
@@ -1242,7 +1241,7 @@ static void btree_split_insert_keys(struct btree_iter *iter, struct btree *b,
}
bch_insert_fixup_btree_ptr(iter, b, k, &node_iter, &res->disk_res);
- bch_keylist_dequeue(keys);
+ bch_keylist_pop_front(keys);
}
six_unlock_write(&b->lock);
@@ -1260,7 +1259,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
struct btree *n1, *n2 = NULL, *n3 = NULL;
uint64_t start_time = local_clock();
unsigned u64s_to_insert = b->level
- ? bch_keylist_nkeys(insert_keys) : 0;
+ ? bch_keylist_u64s(insert_keys) : 0;
BUG_ON(!parent && (b != btree_node_root(b)));
BUG_ON(!btree_node_intent_locked(iter, btree_node_root(b)->level));
@@ -1745,7 +1744,7 @@ int bch_btree_insert_list_at(struct btree_iter *iter,
BUG_ON(bch_keylist_empty(keys));
verify_keys_sorted(keys);
- while (1) {
+ while (!bch_keylist_empty(keys)) {
/* need to traverse between each insert */
int ret = bch_btree_iter_traverse(iter);
if (ret)
@@ -1757,10 +1756,10 @@ int bch_btree_insert_list_at(struct btree_iter *iter,
if (ret)
return ret;
- bch_keylist_dequeue(keys);
- if (bch_keylist_empty(keys))
- return 0;
+ bch_keylist_pop_front(keys);
}
+
+ return 0;
}
/**
diff --git a/drivers/md/bcache/fs-io.c b/drivers/md/bcache/fs-io.c
index ec460634aad1..05ce21bc9990 100644
--- a/drivers/md/bcache/fs-io.c
+++ b/drivers/md/bcache/fs-io.c
@@ -333,7 +333,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
if (ret)
break;
- bch_keylist_dequeue(keys);
+ bch_keylist_pop_front(keys);
} while (!bch_keylist_empty(keys));
bch_btree_iter_unlock(&extent_iter);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index c2829114f4f2..a984dc44cdd1 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -210,7 +210,7 @@ static void bch_write_done(struct closure *cl)
bch_disk_reservation_put(op->c, &op->res);
percpu_ref_put(&op->c->writes);
- bch_keylist_free(&op->insert_keys);
+ bch_keylist_free(&op->insert_keys, op->inline_keys);
closure_return(cl);
}
@@ -342,7 +342,7 @@ static void bch_write_io_error(struct closure *cl)
} else {
/* TODO: We could try to recover from this. */
while (!bch_keylist_empty(&op->insert_keys))
- bch_keylist_dequeue(&op->insert_keys);
+ bch_keylist_pop_front(&op->insert_keys);
op->error = -EIO;
op->flags |= BCH_WRITE_DONE;
@@ -560,6 +560,8 @@ static void __bch_write(struct closure *cl)
/* for the device pointers and 1 for the chksum */
if (bch_keylist_realloc(&op->insert_keys,
+ op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
BKEY_EXTENT_U64s_MAX))
continue_at(cl, bch_write_index, op->c->wq);
@@ -623,7 +625,7 @@ static void __bch_write(struct closure *cl)
if (!(op->flags & BCH_WRITE_CACHED))
bch_check_mark_super(op->c, k, false);
- bch_keylist_enqueue(&op->insert_keys);
+ bch_keylist_push(&op->insert_keys);
trace_bcache_cache_insert(&k->k);
} while (ret);
diff --git a/drivers/md/bcache/keylist.c b/drivers/md/bcache/keylist.c
index b7af04e5adfa..789e51ef6cee 100644
--- a/drivers/md/bcache/keylist.c
+++ b/drivers/md/bcache/keylist.c
@@ -1,120 +1,55 @@
#include "bcache.h"
-#include "btree_gc.h"
-#include "btree_iter.h"
-#include "extents.h"
#include "keylist.h"
-#include <trace/events/bcache.h>
-
-/* Utilities for plain keylists */
-
-int bch_keylist_realloc_max(struct keylist *l,
- unsigned needu64s,
- unsigned maxu64s)
+int bch_keylist_realloc(struct keylist *l, u64 *inline_u64s,
+ size_t nr_inline_u64s, size_t new_u64s)
{
- size_t oldcap = bch_keylist_capacity(l);
- size_t newsize = max(oldcap, BKEY_EXTENT_U64s_MAX) + needu64s;
+ size_t oldsize = bch_keylist_u64s(l);
+ size_t newsize = oldsize + new_u64s;
+ u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
u64 *new_keys;
- if (bch_keylist_fits(l, needu64s))
- return 0;
-
- /*
- * The idea here is that the allocated size is always a power of two:
- * thus, we know we need to reallocate if current_space_used and
- * current_space_used + new_space spans a power of two
- *
- * Note that the max size may not be a power of two, in which case,
- * the last reallocation may allocate very few new entries.
- */
newsize = roundup_pow_of_two(newsize);
- /* We simulate being out of memory -- the code using the key list
- has to handle that case. */
- if (newsize > maxu64s) {
- if (oldcap >= maxu64s) {
- trace_bcache_keylist_realloc_full(l);
- return -ENOMEM;
- }
- newsize = maxu64s;
- }
-
- new_keys = kmalloc_array(newsize, sizeof(u64), GFP_NOIO);
+ if (newsize <= nr_inline_u64s ||
+ (old_buf && roundup_pow_of_two(oldsize) == newsize))
+ return 0;
- if (!new_keys) {
- trace_bcache_keylist_realloc_fail(l);
+ new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOIO);
+ if (!new_keys)
return -ENOMEM;
- }
-
- /* Has @top wrapped around? */
- if (l->top_p < l->bot_p) {
- /*
- * The FIFO wraps around the end with a "gap" in the
- * middle. Copy the first half to the beginning and the
- * second to the end and grow the gap.
- */
-
- /* Copy @start_keys up to @top */
- memcpy(new_keys,
- l->start_keys_p,
- (l->top_p - l->start_keys_p) * sizeof(u64));
-
- /* Copy @bot up to @end_keys */
- memcpy(new_keys + newsize - (l->end_keys_p - l->bot_p),
- l->bot_p,
- (l->end_keys_p - l->bot_p) * sizeof(u64));
-
- l->top_p = new_keys + (l->top_p - l->start_keys_p);
- l->bot_p = new_keys + newsize - (l->end_keys_p - l->bot_p);
- } else {
- /*
- * Else copy everything over and shift the bottom of
- * the FIFO to align with the start of the keylist
- */
- memcpy(new_keys,
- l->bot_p,
- (l->top_p - l->bot_p) * sizeof(u64));
- l->top_p = new_keys + (l->top_p - l->bot_p);
- l->bot_p = new_keys;
- }
- if (l->has_buf)
- kfree(l->start_keys_p);
- l->has_buf = true;
+ if (!old_buf)
+ memcpy(new_keys, inline_u64s, sizeof(u64) * oldsize);
- l->start_keys_p = new_keys;
- l->end_keys_p = new_keys + newsize;
-
- trace_bcache_keylist_realloc(l);
+ l->keys_p = new_keys;
+ l->top_p = new_keys + oldsize;
return 0;
}
-int bch_keylist_realloc(struct keylist *l, unsigned needu64s)
-{
- return bch_keylist_realloc_max(l, needu64s, KEYLIST_MAX);
-}
-
void bch_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
{
- struct bkey_i *where = l->bot;
-
- /*
- * Shouldn't fire since we only use this on a fresh keylist
- * before calling bch_keylist_dequeue()
- */
- BUG_ON(l->top_p < l->bot_p);
+ struct bkey_i *where;
- while (where != l->top &&
- bkey_cmp(insert->k.p, where->k.p) >= 0)
- where = bkey_next(where);
+ for_each_keylist_key(l, where)
+ if (bkey_cmp(insert->k.p, where->k.p) < 0)
+ break;
memmove((u64 *) where + insert->k.u64s,
where,
((void *) l->top) - ((void *) where));
l->top_p += insert->k.u64s;
- BUG_ON(l->top_p > l->end_keys_p);
bkey_copy(where, insert);
}
+
+void bch_keylist_pop_front(struct keylist *l)
+{
+ l->top_p -= bch_keylist_front(l)->k.u64s;
+
+ memmove(l->keys,
+ bkey_next(l->keys),
+ bch_keylist_bytes(l));
+}
diff --git a/drivers/md/bcache/keylist.h b/drivers/md/bcache/keylist.h
index 990ba72ee60c..1166f9415f1f 100644
--- a/drivers/md/bcache/keylist.h
+++ b/drivers/md/bcache/keylist.h
@@ -3,117 +3,60 @@
#include "keylist_types.h"
+int bch_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
+void bch_keylist_add_in_order(struct keylist *, struct bkey_i *);
+void bch_keylist_pop_front(struct keylist *);
+
static inline void bch_keylist_init(struct keylist *l, u64 *inline_keys,
size_t nr_inline_u64s)
{
- l->bot_p = l->top_p = l->start_keys_p = inline_keys;
- l->end_keys_p = l->start_keys_p + nr_inline_u64s;
- l->has_buf = false;
-}
-
-static inline size_t bch_keylist_capacity(struct keylist *l)
-{
- return l->end_keys_p - l->start_keys_p;
+ l->top_p = l->keys_p = inline_keys;
}
-/*
- * XXX: why are we using BKEY_EXTENT_U64s_MAX here? keylists aren't used just
- * for extents, this doesn't make any sense
- */
-
-static inline bool bch_keylist_fits(struct keylist *l, size_t u64s)
+static inline void bch_keylist_free(struct keylist *l, u64 *inline_keys)
{
- if (l->bot_p > l->top_p)
- return (l->bot_p - l->top_p) > u64s;
- else if (l->top_p + u64s + BKEY_EXTENT_U64s_MAX > l->end_keys_p)
- return l->start_keys_p != l->bot_p;
- else
- return true;
-}
-
-static inline struct bkey_i *__bch_keylist_next(struct keylist *l,
- struct bkey_i *k)
-{
- k = bkey_next(k);
- BUG_ON(k > l->end_keys);
-
- /* single_keylists don't wrap */
- if (k == l->top)
- return k;
-
- if ((u64 *) k + BKEY_EXTENT_U64s_MAX > l->end_keys_p)
- return l->start_keys;
-
- return k;
+ if (l->keys_p != inline_keys)
+ kfree(l->keys_p);
+ memset(l, 0, sizeof(*l));
}
-#define for_each_keylist_key(_keys, _k) \
- for (_k = ACCESS_ONCE((_keys)->bot); \
- _k != (_keys)->top; \
- _k = __bch_keylist_next(_keys, _k))
-
-static inline void bch_keylist_enqueue(struct keylist *l)
+static inline void bch_keylist_push(struct keylist *l)
{
- BUG_ON(!bch_keylist_fits(l, l->top->k.u64s));
- l->top = __bch_keylist_next(l, l->top);
+ l->top = bkey_next(l->top);
}
static inline void bch_keylist_add(struct keylist *l, const struct bkey_i *k)
{
bkey_copy(l->top, k);
- bch_keylist_enqueue(l);
+ bch_keylist_push(l);
}
static inline bool bch_keylist_empty(struct keylist *l)
{
- return l->bot == l->top;
+ return l->top == l->keys;
}
-static inline void bch_keylist_free(struct keylist *l)
+static inline size_t bch_keylist_u64s(struct keylist *l)
{
- if (l->has_buf)
- kfree(l->start_keys_p);
- memset(l, 0, sizeof(*l));
+ return l->top_p - l->keys_p;
}
-/*
- * This returns the number of u64s, rather than the number of keys. As keys are
- * variable sized, the actual number of keys would have to be counted.
- */
-static inline size_t bch_keylist_nkeys(struct keylist *l)
+static inline size_t bch_keylist_bytes(struct keylist *l)
{
- /*
- * We don't know the exact number of u64s in the wrapped case
- * because of internal fragmentation at the end!
- */
- if (l->top_p >= l->bot_p)
- return l->top_p - l->bot_p;
- else
- return ((l->top_p - l->start_keys_p) +
- (l->end_keys_p - l->bot_p));
+ return bch_keylist_u64s(l) * sizeof(u64);
}
static inline struct bkey_i *bch_keylist_front(struct keylist *l)
{
- return l->bot;
+ return l->keys;
}
-static inline void bch_keylist_dequeue(struct keylist *l)
-{
- BUG_ON(bch_keylist_empty(l));
- l->bot = __bch_keylist_next(l, l->bot);
-}
+#define for_each_keylist_key(_keylist, _k) \
+ for (_k = (_keylist)->keys; \
+ _k != (_keylist)->top; \
+ _k = bkey_next(_k))
-#define keylist_single(k) \
-((struct keylist) { \
- .start_keys = k, \
- .top = bkey_next(k), \
- .bot = k, \
- .end_keys = bkey_next(k) \
-})
-
-void bch_keylist_add_in_order(struct keylist *, struct bkey_i *);
-int bch_keylist_realloc(struct keylist *, unsigned);
-int bch_keylist_realloc_max(struct keylist *, unsigned, unsigned);
+#define keylist_single(k) \
+ ((struct keylist) { .keys = k, .top = bkey_next(k) })
#endif /* _BCACHE_KEYLIST_H */
diff --git a/drivers/md/bcache/keylist_types.h b/drivers/md/bcache/keylist_types.h
index 156fbe0745fd..195785bf6181 100644
--- a/drivers/md/bcache/keylist_types.h
+++ b/drivers/md/bcache/keylist_types.h
@@ -1,51 +1,15 @@
#ifndef _BCACHE_KEYLIST_TYPES_H
#define _BCACHE_KEYLIST_TYPES_H
-/*
- * Keylists are growable FIFOs storing bkeys.
- *
- * New keys are added via bch_keylist_enqueue(), which increments @top until
- * it wraps around.
- *
- * Old keys are removed via bch_keylist_dequeue() which increments @bot
- * until it wraps around.
- *
- * If @top == @bot, the keylist is empty.
- *
- * We always ensure there is room for a maximum-sized extent key at @top;
- * that is, @top_p + BKEY_EXTENT_U64s_MAX <= @end_keys_p.
- *
- * If this invariant does not hold after enqueuing a key, we wrap @top back
- * to @start_keys_p.
- *
- * If at any time, @top_p + BKEY_EXTENT_U64s_MAX >= @bot_p, the keylist is
- * full.
- */
-
-#define KEYLIST_MAX (1 << 18)
-
struct keylist {
- /* This is a pointer to the LSB (inline_keys until realloc'd) */
union {
- struct bkey_i *start_keys;
- u64 *start_keys_p;
+ struct bkey_i *keys;
+ u64 *keys_p;
};
- /* This is a pointer to the next to enqueue */
union {
struct bkey_i *top;
u64 *top_p;
};
- /* This is a pointer to the next to dequeue */
- union {
- struct bkey_i *bot;
- u64 *bot_p;
- };
- /* This is a pointer to beyond the MSB */
- union {
- struct bkey_i *end_keys;
- u64 *end_keys_p;
- };
- bool has_buf;
};
#endif /* _BCACHE_KEYLIST_TYPES_H */
diff --git a/drivers/md/bcache/move.c b/drivers/md/bcache/move.c
index ec54926426d0..9c49c6e64f1a 100644
--- a/drivers/md/bcache/move.c
+++ b/drivers/md/bcache/move.c
@@ -112,7 +112,7 @@ nomatch:
}
while (bkey_cmp(iter.pos, bch_keylist_front(keys)->k.p) >= 0) {
- bch_keylist_dequeue(keys);
+ bch_keylist_pop_front(keys);
if (bch_keylist_empty(keys))
goto out;
}
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 73b80c8648e4..e0476d5bfb99 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -998,38 +998,6 @@ DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc_fail,
/* Keylists */
-DECLARE_EVENT_CLASS(keylist,
- TP_PROTO(struct keylist *keys),
- TP_ARGS(keys),
-
- TP_STRUCT__entry(
- __field(struct keylist *, keys )
- __field(size_t, capacity )
- ),
-
- TP_fast_assign(
- __entry->keys = keys;
- __entry->capacity = bch_keylist_capacity(keys);
- ),
-
- TP_printk("%p capacity %zu", __entry->keys, __entry->capacity)
-);
-
-DEFINE_EVENT(keylist, bcache_keylist_realloc,
- TP_PROTO(struct keylist *keys),
- TP_ARGS(keys)
-);
-
-DEFINE_EVENT(keylist, bcache_keylist_realloc_full,
- TP_PROTO(struct keylist *keys),
- TP_ARGS(keys)
-);
-
-DEFINE_EVENT(keylist, bcache_keylist_realloc_fail,
- TP_PROTO(struct keylist *keys),
- TP_ARGS(keys)
-);
-
TRACE_EVENT(bcache_keyscan,
TP_PROTO(unsigned nr_found,
unsigned start_inode, u64 start_offset,
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 999ed8f4c535..1c5111d4d5a6 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -461,19 +461,18 @@ BKEY_VAL_TYPE(extent, BCH_EXTENT);
sizeof(struct bch_extent_ptr)) / sizeof(u64))
/* Maximum possible size of an entire extent value: */
-#if 0
/* There's a hack in the keylist code that needs to be fixed.. */
#define BKEY_EXTENT_VAL_U64s_MAX \
(BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX)
-#else
-#define BKEY_EXTENT_VAL_U64s_MAX 8
-#endif
/* * Maximum possible size of an entire extent, key + value: */
-#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-#define BKEY_BTREE_PTR_VAL_U64s_MAX BCH_REPLICAS_MAX
-#define BKEY_BTREE_PTR_U64s_MAX (BKEY_U64s + BCH_REPLICAS_MAX)
+/* Btree pointers don't carry around checksums: */
+#define BKEY_BTREE_PTR_VAL_U64s_MAX \
+ ((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX)
+#define BKEY_BTREE_PTR_U64s_MAX \
+ (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
/* Inodes */