diff options
author | Slava Pestov <sviatoslavpestov@gmail.com> | 2014-06-26 17:24:09 -0700 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 09:00:23 -0800 |
commit | 24c1381a9642c9342afce095c86158ed56ca7e3a (patch) | |
tree | 388abf5c21165569d0861867a78b18589b45daaa | |
parent | 9b1a614c7ca2faa11e035ca5fe436d7d9fdb2dab (diff) |
bcache: allocator triggers tiering and moving GC
Previously, the allocator thread would wake up btree GC when it could
not find sufficient buckets to invalidate. If we were in a situation
where only tiering or moving GC could make space, this behavior was
counter-productive -- btree GC would be woken up over and over again,
and since it currently holds a write lock on the root, it would
throttle writes issued by tiering and moving GC, while itself failing
to make any progress.
Instead, we now wake up tiering if we're out of space on a lower tier,
and wake up moving GC if we're out of space on the highest tier. To
prevent CPU spinning, the allocator then waits for the next btree GC
to run. Tiering and moving GC kick off a btree GC either if they
themselves run out of space, or when they complete a pass.
Finally, wake up moving GC every 30 seconds to try to do some work,
otherwise we would never do any moving GC on any device not in the
highest tier.
We still need to tune the PD controllers for tiering and moving GC,
but at least btree GC no longer starts spinning when we're out of
space.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | drivers/md/bcache/alloc.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/movinggc.c | 11 | ||||
-rw-r--r-- | drivers/md/bcache/tier.c | 1 | ||||
-rw-r--r-- | include/trace/events/bcache.h | 7 |
4 files changed, 20 insertions, 13 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 3c6902f0ae9d..c7485e902718 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -74,19 +74,27 @@ static void alloc_failed(struct cache *ca) struct cache_set *c = ca->set; unsigned i, gc_count; + gc_count = bch_gc_count(c); + + /* Check if there are caches in higher tiers; we could potentially + * make room on our cache by tiering */ for (i = CACHE_TIER(&ca->sb) + 1; i < ARRAY_SIZE(c->cache_by_alloc); i++) if (c->cache_by_alloc[i].nr_devices) { c->tiering_pd.rate.rate = UINT_MAX; bch_ratelimit_reset(&c->tiering_pd.rate); + wake_up_process(c->tiering_thread); + trace_bcache_alloc_wake_tiering(ca); + goto wait; } - trace_bcache_alloc_wait(ca); + /* If this is the highest tier cache, just do a btree GC */ + wake_up_process(ca->moving_gc_thread); + trace_bcache_alloc_wake_moving(ca); +wait: mutex_unlock(&c->bucket_lock); - gc_count = bch_gc_count(c); - wake_up_gc(c); bch_wait_for_next_gc(c, gc_count); mutex_lock(&c->bucket_lock); } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index dcd5718c0f93..fa83cd68f840 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -233,20 +233,13 @@ static int bch_moving_gc_thread(void *arg) struct cache *ca = arg; struct cache_set *c = ca->set; unsigned long last = jiffies; - unsigned gc_count; do { - gc_count = bch_gc_count(c); - bch_wait_for_next_gc(c, gc_count); - if (kthread_wait_freezable(c->copy_gc_enabled)) break; - if (bch_moving_gc(ca)) { - gc_count = bch_gc_count(c); - wake_up_gc(c); - bch_wait_for_next_gc(c, gc_count); - } + bch_moving_gc(ca); + wake_up_gc(c); } while (!bch_kthread_loop_ratelimit(&last, c->btree_scan_ratelimit * HZ)); diff --git a/drivers/md/bcache/tier.c b/drivers/md/bcache/tier.c index 180cf1463046..034974b61996 100644 --- a/drivers/md/bcache/tier.c +++ b/drivers/md/bcache/tier.c @@ -147,6 +147,7 @@ static int bch_tiering_thread(void *arg) break; read_tiering(c); + wake_up_gc(c); } while (!bch_kthread_loop_ratelimit(&last, c->btree_scan_ratelimit * HZ)); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 3f66e2512ba6..defd85ca1d4c 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -380,7 +380,12 @@ DECLARE_EVENT_CLASS(cache, TP_printk("%pU", __entry->uuid) ); -DEFINE_EVENT(cache, bcache_alloc_wait, +DEFINE_EVENT(cache, bcache_alloc_wake_tiering, + TP_PROTO(struct cache *ca), + TP_ARGS(ca) +); + +DEFINE_EVENT(cache, bcache_alloc_wake_moving, TP_PROTO(struct cache *ca), TP_ARGS(ca) ); |