diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-05 23:02:38 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 12:37:20 -0800 |
commit | 96627db39e119fb8e6ec4ee7da7d7ae388cb431a (patch) | |
tree | 911e7749f40684327e22d4af6191bdf26b3bb9ec | |
parent | ed55d6e18a6699e8628c8a384d7f8e07eee96889 (diff) |
bcache: Simpler keylists
With scan keylists gone, keylists don't have to hold more than a few
keys anymore, and we can go back to a much simpler implementation. Yay
for deleting code.
This also means we can finally fix the definition of
BKEY_EXTENT_VAL_U64s_MAX...
-rw-r--r-- | drivers/md/bcache/btree_gc.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/btree_update.c | 25 | ||||
-rw-r--r-- | drivers/md/bcache/fs-io.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 8 | ||||
-rw-r--r-- | drivers/md/bcache/keylist.c | 119 | ||||
-rw-r--r-- | drivers/md/bcache/keylist.h | 105 | ||||
-rw-r--r-- | drivers/md/bcache/keylist_types.h | 40 | ||||
-rw-r--r-- | drivers/md/bcache/move.c | 2 | ||||
-rw-r--r-- | include/trace/events/bcache.h | 32 | ||||
-rw-r--r-- | include/uapi/linux/bcache.h | 13 |
10 files changed, 80 insertions, 270 deletions
diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c index 8bd6070cfee0..f658012c02bd 100644 --- a/drivers/md/bcache/btree_gc.c +++ b/drivers/md/bcache/btree_gc.c @@ -468,7 +468,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES], if (IS_ERR(res)) return; - if (bch_keylist_realloc(&keylist, + if (bch_keylist_realloc(&keylist, NULL, 0, (BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) { trace_bcache_btree_gc_coalesce_fail(c); goto out; @@ -636,7 +636,7 @@ next: } } out: - bch_keylist_free(&keylist); + bch_keylist_free(&keylist, NULL); bch_btree_reserve_put(c, res); } diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c index bfbc2964394f..a8df0fab3e5f 100644 --- a/drivers/md/bcache/btree_update.c +++ b/drivers/md/bcache/btree_update.c @@ -733,10 +733,9 @@ static void verify_keys_sorted(struct keylist *l) #ifdef CONFIG_BCACHE_DEBUG struct bkey_i *k; - for (k = l->bot; - k < l->top && bkey_next(k) < l->top; - k = bkey_next(k)) - BUG_ON(bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0); + for_each_keylist_key(l, k) + BUG_ON(bkey_next(k) != l->top && + bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0); #endif } @@ -1100,7 +1099,7 @@ bch_btree_insert_keys_interior(struct btree *b, btree_node_lock_for_insert(b, iter); - if (bch_keylist_nkeys(insert_keys) > + if (bch_keylist_u64s(insert_keys) > bch_btree_keys_u64s_remaining(iter->c, b)) { btree_node_unlock_write(b, iter); return BTREE_INSERT_BTREE_NODE_FULL; @@ -1123,7 +1122,7 @@ bch_btree_insert_keys_interior(struct btree *b, bch_insert_fixup_btree_ptr(iter, b, insert, &node_iter, &res->disk_res); - bch_keylist_dequeue(insert_keys); + bch_keylist_pop_front(insert_keys); } btree_interior_update_updated_btree(iter->c, as, b); @@ -1232,7 +1231,7 @@ static void btree_split_insert_keys(struct btree_iter *iter, struct btree *b, while (!bch_keylist_empty(keys)) { k = bch_keylist_front(keys); - BUG_ON(bch_keylist_nkeys(keys) > + BUG_ON(bch_keylist_u64s(keys) > bch_btree_keys_u64s_remaining(iter->c, b)); BUG_ON(bkey_cmp(k->k.p, b->data->min_key) < 0); @@ -1242,7 +1241,7 @@ static void btree_split_insert_keys(struct btree_iter *iter, struct btree *b, } bch_insert_fixup_btree_ptr(iter, b, k, &node_iter, &res->disk_res); - bch_keylist_dequeue(keys); + bch_keylist_pop_front(keys); } six_unlock_write(&b->lock); @@ -1260,7 +1259,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter, struct btree *n1, *n2 = NULL, *n3 = NULL; uint64_t start_time = local_clock(); unsigned u64s_to_insert = b->level - ? bch_keylist_nkeys(insert_keys) : 0; + ? bch_keylist_u64s(insert_keys) : 0; BUG_ON(!parent && (b != btree_node_root(b))); BUG_ON(!btree_node_intent_locked(iter, btree_node_root(b)->level)); @@ -1745,7 +1744,7 @@ int bch_btree_insert_list_at(struct btree_iter *iter, BUG_ON(bch_keylist_empty(keys)); verify_keys_sorted(keys); - while (1) { + while (!bch_keylist_empty(keys)) { /* need to traverse between each insert */ int ret = bch_btree_iter_traverse(iter); if (ret) @@ -1757,10 +1756,10 @@ int bch_btree_insert_list_at(struct btree_iter *iter, if (ret) return ret; - bch_keylist_dequeue(keys); - if (bch_keylist_empty(keys)) - return 0; + bch_keylist_pop_front(keys); } + + return 0; } /** diff --git a/drivers/md/bcache/fs-io.c b/drivers/md/bcache/fs-io.c index ec460634aad1..05ce21bc9990 100644 --- a/drivers/md/bcache/fs-io.c +++ b/drivers/md/bcache/fs-io.c @@ -333,7 +333,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop) if (ret) break; - bch_keylist_dequeue(keys); + bch_keylist_pop_front(keys); } while (!bch_keylist_empty(keys)); bch_btree_iter_unlock(&extent_iter); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index c2829114f4f2..a984dc44cdd1 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -210,7 +210,7 @@ static void bch_write_done(struct closure *cl) bch_disk_reservation_put(op->c, &op->res); percpu_ref_put(&op->c->writes); - bch_keylist_free(&op->insert_keys); + bch_keylist_free(&op->insert_keys, op->inline_keys); closure_return(cl); } @@ -342,7 +342,7 @@ static void bch_write_io_error(struct closure *cl) } else { /* TODO: We could try to recover from this. */ while (!bch_keylist_empty(&op->insert_keys)) - bch_keylist_dequeue(&op->insert_keys); + bch_keylist_pop_front(&op->insert_keys); op->error = -EIO; op->flags |= BCH_WRITE_DONE; @@ -560,6 +560,8 @@ static void __bch_write(struct closure *cl) /* for the device pointers and 1 for the chksum */ if (bch_keylist_realloc(&op->insert_keys, + op->inline_keys, + ARRAY_SIZE(op->inline_keys), BKEY_EXTENT_U64s_MAX)) continue_at(cl, bch_write_index, op->c->wq); @@ -623,7 +625,7 @@ static void __bch_write(struct closure *cl) if (!(op->flags & BCH_WRITE_CACHED)) bch_check_mark_super(op->c, k, false); - bch_keylist_enqueue(&op->insert_keys); + bch_keylist_push(&op->insert_keys); trace_bcache_cache_insert(&k->k); } while (ret); diff --git a/drivers/md/bcache/keylist.c b/drivers/md/bcache/keylist.c index b7af04e5adfa..789e51ef6cee 100644 --- a/drivers/md/bcache/keylist.c +++ b/drivers/md/bcache/keylist.c @@ -1,120 +1,55 @@ #include "bcache.h" -#include "btree_gc.h" -#include "btree_iter.h" -#include "extents.h" #include "keylist.h" -#include <trace/events/bcache.h> - -/* Utilities for plain keylists */ - -int bch_keylist_realloc_max(struct keylist *l, - unsigned needu64s, - unsigned maxu64s) +int bch_keylist_realloc(struct keylist *l, u64 *inline_u64s, + size_t nr_inline_u64s, size_t new_u64s) { - size_t oldcap = bch_keylist_capacity(l); - size_t newsize = max(oldcap, BKEY_EXTENT_U64s_MAX) + needu64s; + size_t oldsize = bch_keylist_u64s(l); + size_t newsize = oldsize + new_u64s; + u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p; u64 *new_keys; - if (bch_keylist_fits(l, needu64s)) - return 0; - - /* - * The idea here is that the allocated size is always a power of two: - * thus, we know we need to reallocate if current_space_used and - * current_space_used + new_space spans a power of two - * - * Note that the max size may not be a power of two, in which case, - * the last reallocation may allocate very few new entries. - */ newsize = roundup_pow_of_two(newsize); - /* We simulate being out of memory -- the code using the key list - has to handle that case. */ - if (newsize > maxu64s) { - if (oldcap >= maxu64s) { - trace_bcache_keylist_realloc_full(l); - return -ENOMEM; - } - newsize = maxu64s; - } - - new_keys = kmalloc_array(newsize, sizeof(u64), GFP_NOIO); + if (newsize <= nr_inline_u64s || + (old_buf && roundup_pow_of_two(oldsize) == newsize)) + return 0; - if (!new_keys) { - trace_bcache_keylist_realloc_fail(l); + new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOIO); + if (!new_keys) return -ENOMEM; - } - - /* Has @top wrapped around? */ - if (l->top_p < l->bot_p) { - /* - * The FIFO wraps around the end with a "gap" in the - * middle. Copy the first half to the beginning and the - * second to the end and grow the gap. - */ - - /* Copy @start_keys up to @top */ - memcpy(new_keys, - l->start_keys_p, - (l->top_p - l->start_keys_p) * sizeof(u64)); - - /* Copy @bot up to @end_keys */ - memcpy(new_keys + newsize - (l->end_keys_p - l->bot_p), - l->bot_p, - (l->end_keys_p - l->bot_p) * sizeof(u64)); - - l->top_p = new_keys + (l->top_p - l->start_keys_p); - l->bot_p = new_keys + newsize - (l->end_keys_p - l->bot_p); - } else { - /* - * Else copy everything over and shift the bottom of - * the FIFO to align with the start of the keylist - */ - memcpy(new_keys, - l->bot_p, - (l->top_p - l->bot_p) * sizeof(u64)); - l->top_p = new_keys + (l->top_p - l->bot_p); - l->bot_p = new_keys; - } - if (l->has_buf) - kfree(l->start_keys_p); - l->has_buf = true; + if (!old_buf) + memcpy(new_keys, inline_u64s, sizeof(u64) * oldsize); - l->start_keys_p = new_keys; - l->end_keys_p = new_keys + newsize; - - trace_bcache_keylist_realloc(l); + l->keys_p = new_keys; + l->top_p = new_keys + oldsize; return 0; } -int bch_keylist_realloc(struct keylist *l, unsigned needu64s) -{ - return bch_keylist_realloc_max(l, needu64s, KEYLIST_MAX); -} - void bch_keylist_add_in_order(struct keylist *l, struct bkey_i *insert) { - struct bkey_i *where = l->bot; - - /* - * Shouldn't fire since we only use this on a fresh keylist - * before calling bch_keylist_dequeue() - */ - BUG_ON(l->top_p < l->bot_p); + struct bkey_i *where; - while (where != l->top && - bkey_cmp(insert->k.p, where->k.p) >= 0) - where = bkey_next(where); + for_each_keylist_key(l, where) + if (bkey_cmp(insert->k.p, where->k.p) < 0) + break; memmove((u64 *) where + insert->k.u64s, where, ((void *) l->top) - ((void *) where)); l->top_p += insert->k.u64s; - BUG_ON(l->top_p > l->end_keys_p); bkey_copy(where, insert); } + +void bch_keylist_pop_front(struct keylist *l) +{ + l->top_p -= bch_keylist_front(l)->k.u64s; + + memmove(l->keys, + bkey_next(l->keys), + bch_keylist_bytes(l)); +} diff --git a/drivers/md/bcache/keylist.h b/drivers/md/bcache/keylist.h index 990ba72ee60c..1166f9415f1f 100644 --- a/drivers/md/bcache/keylist.h +++ b/drivers/md/bcache/keylist.h @@ -3,117 +3,60 @@ #include "keylist_types.h" +int bch_keylist_realloc(struct keylist *, u64 *, size_t, size_t); +void bch_keylist_add_in_order(struct keylist *, struct bkey_i *); +void bch_keylist_pop_front(struct keylist *); + static inline void bch_keylist_init(struct keylist *l, u64 *inline_keys, size_t nr_inline_u64s) { - l->bot_p = l->top_p = l->start_keys_p = inline_keys; - l->end_keys_p = l->start_keys_p + nr_inline_u64s; - l->has_buf = false; -} - -static inline size_t bch_keylist_capacity(struct keylist *l) -{ - return l->end_keys_p - l->start_keys_p; + l->top_p = l->keys_p = inline_keys; } -/* - * XXX: why are we using BKEY_EXTENT_U64s_MAX here? keylists aren't used just - * for extents, this doesn't make any sense - */ - -static inline bool bch_keylist_fits(struct keylist *l, size_t u64s) +static inline void bch_keylist_free(struct keylist *l, u64 *inline_keys) { - if (l->bot_p > l->top_p) - return (l->bot_p - l->top_p) > u64s; - else if (l->top_p + u64s + BKEY_EXTENT_U64s_MAX > l->end_keys_p) - return l->start_keys_p != l->bot_p; - else - return true; -} - -static inline struct bkey_i *__bch_keylist_next(struct keylist *l, - struct bkey_i *k) -{ - k = bkey_next(k); - BUG_ON(k > l->end_keys); - - /* single_keylists don't wrap */ - if (k == l->top) - return k; - - if ((u64 *) k + BKEY_EXTENT_U64s_MAX > l->end_keys_p) - return l->start_keys; - - return k; + if (l->keys_p != inline_keys) + kfree(l->keys_p); + memset(l, 0, sizeof(*l)); } -#define for_each_keylist_key(_keys, _k) \ - for (_k = ACCESS_ONCE((_keys)->bot); \ - _k != (_keys)->top; \ - _k = __bch_keylist_next(_keys, _k)) - -static inline void bch_keylist_enqueue(struct keylist *l) +static inline void bch_keylist_push(struct keylist *l) { - BUG_ON(!bch_keylist_fits(l, l->top->k.u64s)); - l->top = __bch_keylist_next(l, l->top); + l->top = bkey_next(l->top); } static inline void bch_keylist_add(struct keylist *l, const struct bkey_i *k) { bkey_copy(l->top, k); - bch_keylist_enqueue(l); + bch_keylist_push(l); } static inline bool bch_keylist_empty(struct keylist *l) { - return l->bot == l->top; + return l->top == l->keys; } -static inline void bch_keylist_free(struct keylist *l) +static inline size_t bch_keylist_u64s(struct keylist *l) { - if (l->has_buf) - kfree(l->start_keys_p); - memset(l, 0, sizeof(*l)); + return l->top_p - l->keys_p; } -/* - * This returns the number of u64s, rather than the number of keys. As keys are - * variable sized, the actual number of keys would have to be counted. - */ -static inline size_t bch_keylist_nkeys(struct keylist *l) +static inline size_t bch_keylist_bytes(struct keylist *l) { - /* - * We don't know the exact number of u64s in the wrapped case - * because of internal fragmentation at the end! - */ - if (l->top_p >= l->bot_p) - return l->top_p - l->bot_p; - else - return ((l->top_p - l->start_keys_p) + - (l->end_keys_p - l->bot_p)); + return bch_keylist_u64s(l) * sizeof(u64); } static inline struct bkey_i *bch_keylist_front(struct keylist *l) { - return l->bot; + return l->keys; } -static inline void bch_keylist_dequeue(struct keylist *l) -{ - BUG_ON(bch_keylist_empty(l)); - l->bot = __bch_keylist_next(l, l->bot); -} +#define for_each_keylist_key(_keylist, _k) \ + for (_k = (_keylist)->keys; \ + _k != (_keylist)->top; \ + _k = bkey_next(_k)) -#define keylist_single(k) \ -((struct keylist) { \ - .start_keys = k, \ - .top = bkey_next(k), \ - .bot = k, \ - .end_keys = bkey_next(k) \ -}) - -void bch_keylist_add_in_order(struct keylist *, struct bkey_i *); -int bch_keylist_realloc(struct keylist *, unsigned); -int bch_keylist_realloc_max(struct keylist *, unsigned, unsigned); +#define keylist_single(k) \ + ((struct keylist) { .keys = k, .top = bkey_next(k) }) #endif /* _BCACHE_KEYLIST_H */ diff --git a/drivers/md/bcache/keylist_types.h b/drivers/md/bcache/keylist_types.h index 156fbe0745fd..195785bf6181 100644 --- a/drivers/md/bcache/keylist_types.h +++ b/drivers/md/bcache/keylist_types.h @@ -1,51 +1,15 @@ #ifndef _BCACHE_KEYLIST_TYPES_H #define _BCACHE_KEYLIST_TYPES_H -/* - * Keylists are growable FIFOs storing bkeys. - * - * New keys are added via bch_keylist_enqueue(), which increments @top until - * it wraps around. - * - * Old keys are removed via bch_keylist_dequeue() which increments @bot - * until it wraps around. - * - * If @top == @bot, the keylist is empty. - * - * We always ensure there is room for a maximum-sized extent key at @top; - * that is, @top_p + BKEY_EXTENT_U64s_MAX <= @end_keys_p. - * - * If this invariant does not hold after enqueuing a key, we wrap @top back - * to @start_keys_p. - * - * If at any time, @top_p + BKEY_EXTENT_U64s_MAX >= @bot_p, the keylist is - * full. - */ - -#define KEYLIST_MAX (1 << 18) - struct keylist { - /* This is a pointer to the LSB (inline_keys until realloc'd) */ union { - struct bkey_i *start_keys; - u64 *start_keys_p; + struct bkey_i *keys; + u64 *keys_p; }; - /* This is a pointer to the next to enqueue */ union { struct bkey_i *top; u64 *top_p; }; - /* This is a pointer to the next to dequeue */ - union { - struct bkey_i *bot; - u64 *bot_p; - }; - /* This is a pointer to beyond the MSB */ - union { - struct bkey_i *end_keys; - u64 *end_keys_p; - }; - bool has_buf; }; #endif /* _BCACHE_KEYLIST_TYPES_H */ diff --git a/drivers/md/bcache/move.c b/drivers/md/bcache/move.c index ec54926426d0..9c49c6e64f1a 100644 --- a/drivers/md/bcache/move.c +++ b/drivers/md/bcache/move.c @@ -112,7 +112,7 @@ nomatch: } while (bkey_cmp(iter.pos, bch_keylist_front(keys)->k.p) >= 0) { - bch_keylist_dequeue(keys); + bch_keylist_pop_front(keys); if (bch_keylist_empty(keys)) goto out; } diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 73b80c8648e4..e0476d5bfb99 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -998,38 +998,6 @@ DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc_fail, /* Keylists */ -DECLARE_EVENT_CLASS(keylist, - TP_PROTO(struct keylist *keys), - TP_ARGS(keys), - - TP_STRUCT__entry( - __field(struct keylist *, keys ) - __field(size_t, capacity ) - ), - - TP_fast_assign( - __entry->keys = keys; - __entry->capacity = bch_keylist_capacity(keys); - ), - - TP_printk("%p capacity %zu", __entry->keys, __entry->capacity) -); - -DEFINE_EVENT(keylist, bcache_keylist_realloc, - TP_PROTO(struct keylist *keys), - TP_ARGS(keys) -); - -DEFINE_EVENT(keylist, bcache_keylist_realloc_full, - TP_PROTO(struct keylist *keys), - TP_ARGS(keys) -); - -DEFINE_EVENT(keylist, bcache_keylist_realloc_fail, - TP_PROTO(struct keylist *keys), - TP_ARGS(keys) -); - TRACE_EVENT(bcache_keyscan, TP_PROTO(unsigned nr_found, unsigned start_inode, u64 start_offset, diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 999ed8f4c535..1c5111d4d5a6 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -461,19 +461,18 @@ BKEY_VAL_TYPE(extent, BCH_EXTENT); sizeof(struct bch_extent_ptr)) / sizeof(u64)) /* Maximum possible size of an entire extent value: */ -#if 0 /* There's a hack in the keylist code that needs to be fixed.. */ #define BKEY_EXTENT_VAL_U64s_MAX \ (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX) -#else -#define BKEY_EXTENT_VAL_U64s_MAX 8 -#endif /* * Maximum possible size of an entire extent, key + value: */ -#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) +#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) -#define BKEY_BTREE_PTR_VAL_U64s_MAX BCH_REPLICAS_MAX -#define BKEY_BTREE_PTR_U64s_MAX (BKEY_U64s + BCH_REPLICAS_MAX) +/* Btree pointers don't carry around checksums: */ +#define BKEY_BTREE_PTR_VAL_U64s_MAX \ + ((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX) +#define BKEY_BTREE_PTR_U64s_MAX \ + (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) /* Inodes */ |