diff options
-rw-r--r-- | drivers/md/bcache/bcache.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/clock.c | 50 | ||||
-rw-r--r-- | drivers/md/bcache/clock.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/movinggc.c | 24 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 19 | ||||
-rw-r--r-- | drivers/md/bcache/tier.c | 32 | ||||
-rw-r--r-- | drivers/md/bcache/util.c | 17 | ||||
-rw-r--r-- | drivers/md/bcache/util.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 36 |
10 files changed, 121 insertions, 63 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b7791384bdcf..a4ea07723e18 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -639,7 +639,6 @@ struct cache_set { unsigned copy_gc_enabled:1; unsigned tiering_enabled:1; unsigned tiering_percent; - unsigned btree_scan_ratelimit; /* * foreground writes will be throttled when the number of free diff --git a/drivers/md/bcache/clock.c b/drivers/md/bcache/clock.c index bfc068f278db..1e453cd30b92 100644 --- a/drivers/md/bcache/clock.c +++ b/drivers/md/bcache/clock.c @@ -1,6 +1,9 @@ #include "bcache.h" #include "clock.h" +#include <linux/freezer.h> +#include <linux/kthread.h> + static inline bool io_timer_cmp(struct io_timer *l, struct io_timer *r) { return time_after(l->expire, r->expire); @@ -13,6 +16,53 @@ void bch_io_timer_add(struct io_clock *clock, struct io_timer *timer) spin_unlock(&clock->timer_lock); } +struct io_clock_wait { + struct io_timer timer; + struct task_struct *task; + int expired; +}; + +static void io_clock_wait_fn(struct io_timer *timer) +{ + struct io_clock_wait *wait = container_of(timer, + struct io_clock_wait, timer); + + wait->expired = 1; + wake_up_process(wait->task); +} + +/* + * _only_ to be used from a kthread + */ +void bch_kthread_io_clock_wait(struct io_clock *clock, + unsigned long until) +{ + struct io_clock_wait wait; + + /* XXX: calculate sleep time rigorously */ + wait.timer.expire = until; + wait.timer.fn = io_clock_wait_fn; + wait.task = current; + wait.expired = 0; + bch_io_timer_add(clock, &wait.timer); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); + break; + } + + if (wait.expired) { + __set_current_state(TASK_RUNNING); + break; + } + + schedule(); + try_to_freeze(); + } +} + static struct io_timer *get_expired_timer(struct io_clock *clock, unsigned long now) { diff --git a/drivers/md/bcache/clock.h b/drivers/md/bcache/clock.h index 37d5f20174a5..c38679699a16 100644 --- a/drivers/md/bcache/clock.h +++ b/drivers/md/bcache/clock.h @@ -2,6 +2,7 @@ #define _BCACHE_CLOCK_H void bch_io_timer_add(struct io_clock *, struct io_timer *); +void bch_kthread_io_clock_wait(struct io_clock *, unsigned long); void bch_increment_clock(struct cache_set *, unsigned, int); void bch_io_clock_exit(struct io_clock *); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index d3aeeb523dd3..6cc7e8a556f6 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -7,6 +7,7 @@ #include "bcache.h" #include "btree.h" #include "buckets.h" +#include "clock.h" #include "extents.h" #include "io.h" #include "keylist.h" @@ -223,6 +224,8 @@ static bool bch_moving_gc(struct cache *ca) for (i = 0; i < ca->heap.used; i++) sectors_to_move += ca->heap.data[i].val; + /* XXX: calculate this threshold rigorously */ + if (ca->heap.used < ca->free_inc.size / 2 && sectors_to_move < reserve_sectors) { mutex_unlock(&ca->heap_lock); @@ -281,15 +284,26 @@ static int bch_moving_gc_thread(void *arg) { struct cache *ca = arg; struct cache_set *c = ca->set; - unsigned long last = jiffies; + struct io_clock *clock = &c->io_clock[WRITE]; + unsigned long last; + bool moved; - do { + while (!kthread_should_stop()) { if (kthread_wait_freezable(c->copy_gc_enabled)) break; - bch_moving_gc(ca); - } while (!bch_kthread_loop_ratelimit(&last, - c->btree_scan_ratelimit * HZ)); + last = atomic_long_read(&clock->now); + + moved = bch_moving_gc(ca); + + /* + * This really should be a library code, but it has to be + * kthread specific... ugh + */ + if (!moved) + bch_kthread_io_clock_wait(clock, + last + ca->free_inc.size / 2); + } return 0; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 94af71c14745..6d488115a6e0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -972,8 +972,6 @@ static const char *bch_cache_set_alloc(struct cache_sb *sb, c->btree_flush_delay = 30; - c->btree_scan_ratelimit = 30; - c->copy_gc_enabled = 1; c->tiering_enabled = 1; c->tiering_percent = 10; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 9cf23379f896..e727e69a2a3d 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -178,7 +178,6 @@ rw_attribute(tiering_stripe_size); sysfs_pd_controller_attribute(foreground_write); rw_attribute(btree_flush_delay); -rw_attribute(btree_scan_ratelimit); rw_attribute(pd_controllers_update_seconds); rw_attribute(foreground_target_percent); @@ -656,7 +655,6 @@ SHOW(bch_cache_set) sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd); - sysfs_print(btree_scan_ratelimit, c->btree_scan_ratelimit); sysfs_print(pd_controllers_update_seconds, c->pd_controllers_update_seconds); sysfs_print(foreground_target_percent, c->foreground_target_percent); @@ -832,22 +830,6 @@ STORE(__bch_cache_set) return size; } - if (attr == &sysfs_btree_scan_ratelimit) { - struct cache *ca; - unsigned i; - ssize_t ret = strtoul_safe(buf, c->btree_scan_ratelimit) - ?: (ssize_t) size; - - for_each_cache(ca, c, i) - if (ca->moving_gc_read) - wake_up_process(ca->moving_gc_read); - - if (c->tiering_read) - wake_up_process(c->tiering_read); - - return ret; - } - sysfs_strtoul(pd_controllers_update_seconds, c->pd_controllers_update_seconds); sysfs_strtoul(foreground_target_percent, c->foreground_target_percent); @@ -969,7 +951,6 @@ static struct attribute *bch_cache_set_files[] = { &sysfs_data_replicas_have, &sysfs_btree_flush_delay, - &sysfs_btree_scan_ratelimit, &sysfs_foreground_target_percent, &sysfs_sector_reserve_percent, &sysfs_tiering_percent, diff --git a/drivers/md/bcache/tier.c b/drivers/md/bcache/tier.c index 494ea4877578..5ca859d5904f 100644 --- a/drivers/md/bcache/tier.c +++ b/drivers/md/bcache/tier.c @@ -2,6 +2,7 @@ #include "bcache.h" #include "btree.h" #include "buckets.h" +#include "clock.h" #include "extents.h" #include "io.h" #include "keylist.h" @@ -332,7 +333,7 @@ static int tiering_next_cache(struct cache_set *c, } } -static void read_tiering(struct cache_set *c) +static u64 read_tiering(struct cache_set *c) { struct moving_context ctxt; struct tiering_refill refill; @@ -360,21 +361,40 @@ static void read_tiering(struct cache_set *c) refill_done(&refill); trace_bcache_tiering_end(c, ctxt.sectors_moved, ctxt.keys_moved); + + return ctxt.sectors_moved; } static int bch_tiering_thread(void *arg) { struct cache_set *c = arg; - unsigned long last = jiffies; + struct io_clock *clock = &c->io_clock[WRITE]; + struct cache *ca; + u64 sectors, tier_capacity; + unsigned long last; + unsigned i; - do { + while (!kthread_should_stop()) { if (kthread_wait_freezable(c->tiering_enabled && c->cache_tiers[1].nr_devices)) break; - read_tiering(c); - } while (!bch_kthread_loop_ratelimit(&last, - c->btree_scan_ratelimit * HZ)); + last = atomic_long_read(&clock->now); + + sectors = read_tiering(c); + + tier_capacity = 0; + rcu_read_lock(); + group_for_each_cache_rcu(ca, &c->cache_tiers[0], i) + tier_capacity += + (ca->mi.nbuckets - + ca->mi.first_bucket) << ca->bucket_bits; + rcu_read_unlock(); + + if (sectors < tier_capacity >> 4) + bch_kthread_io_clock_wait(clock, + last + (tier_capacity >> 5)); + } return 0; } diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index bf13a9361716..b87dd213b88b 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -501,23 +501,6 @@ uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len) return crc; } -int bch_kthread_loop_ratelimit(unsigned long *last, unsigned long delay) -{ - unsigned long next = *last + delay; - - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) { - __set_current_state(TASK_RUNNING); - return -1; - } - - schedule_timeout(max_t(long, 0, next - jiffies)); - try_to_freeze(); - *last = jiffies; - - return 0; -} - size_t bch_rand_range(size_t max) { size_t rand; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index ecd79e292f94..5311afcd3a1c 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -581,8 +581,6 @@ do { \ uint64_t bch_crc64_update(uint64_t, const void *, size_t); uint64_t bch_crc64(const void *, size_t); -int bch_kthread_loop_ratelimit(unsigned long *, unsigned long); - #define kthread_wait_freezable(cond) \ ({ \ int _ret = 0; \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 1255e7e213d1..4f42adc75e9a 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -8,6 +8,7 @@ #include "bcache.h" #include "btree.h" +#include "clock.h" #include "debug.h" #include "extents.h" #include "io.h" @@ -189,7 +190,7 @@ static void read_dirty_submit(struct closure *cl) continue_at(cl, write_dirty, system_wq); } -static void read_dirty(struct cached_dev *dc) +static u64 read_dirty(struct cached_dev *dc) { struct keybuf_key *w; struct dirty_io *io; @@ -198,6 +199,7 @@ static void read_dirty(struct cached_dev *dc) const struct bch_extent_ptr *ptr; unsigned i; struct bio_vec *bv; + u64 sectors_written = 0; BKEY_PADDED(k) tmp; closure_init_stack(&cl); @@ -208,6 +210,7 @@ static void read_dirty(struct cached_dev *dc) if (!w) break; + sectors_written += w->key.k.size; bkey_copy(&tmp.k, &w->key); while (tmp.k.k.size) { @@ -288,6 +291,8 @@ static void read_dirty(struct cached_dev *dc) * freed) before refilling again */ closure_sync(&cl); + + return sectors_written; } /* Scan for dirty data */ @@ -398,13 +403,14 @@ next: } } -static void bch_writeback(struct cached_dev *dc) +static u64 bch_writeback(struct cached_dev *dc) { struct keybuf *buf = &dc->writeback_keys; unsigned inode = bcache_dev_inum(&dc->disk); struct bpos start = POS(inode, 0); struct bpos end = POS(inode, KEY_OFFSET_MAX); struct bpos start_pos; + u64 sectors_written = 0; buf->last_scanned = POS(inode, 0); @@ -417,11 +423,11 @@ static void bch_writeback(struct cached_dev *dc) set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) - return; + return sectors_written; schedule(); try_to_freeze(); - return; + return sectors_written; } if (bkey_cmp(buf->last_scanned, end) >= 0) @@ -458,26 +464,34 @@ refill_done: up_write(&dc->writeback_lock); bch_ratelimit_reset(&dc->writeback_pd.rate); - read_dirty(dc); + sectors_written += read_dirty(dc); } + + return sectors_written; } static int bch_writeback_thread(void *arg) { struct cached_dev *dc = arg; struct cache_set *c = dc->disk.c; - unsigned long last = jiffies; + struct io_clock *clock = &c->io_clock[WRITE]; + unsigned long last; + u64 sectors_written; - do { + while (!kthread_should_stop()) { if (kthread_wait_freezable(dc->writeback_running || test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))) break; - bch_writeback(dc); - } while (!bch_kthread_loop_ratelimit(&last, - test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) - ? 0 : c->btree_scan_ratelimit * HZ)); + last = atomic_long_read(&clock->now); + + sectors_written = bch_writeback(dc); + + if (sectors_written < c->capacity >> 4) + bch_kthread_io_clock_wait(clock, + last + (c->capacity >> 5)); + } return 0; } |