summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/clock.c50
-rw-r--r--drivers/md/bcache/clock.h1
-rw-r--r--drivers/md/bcache/movinggc.c24
-rw-r--r--drivers/md/bcache/super.c2
-rw-r--r--drivers/md/bcache/sysfs.c19
-rw-r--r--drivers/md/bcache/tier.c32
-rw-r--r--drivers/md/bcache/util.c17
-rw-r--r--drivers/md/bcache/util.h2
-rw-r--r--drivers/md/bcache/writeback.c36
10 files changed, 121 insertions, 63 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b7791384bdcf..a4ea07723e18 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -639,7 +639,6 @@ struct cache_set {
unsigned copy_gc_enabled:1;
unsigned tiering_enabled:1;
unsigned tiering_percent;
- unsigned btree_scan_ratelimit;
/*
* foreground writes will be throttled when the number of free
diff --git a/drivers/md/bcache/clock.c b/drivers/md/bcache/clock.c
index bfc068f278db..1e453cd30b92 100644
--- a/drivers/md/bcache/clock.c
+++ b/drivers/md/bcache/clock.c
@@ -1,6 +1,9 @@
#include "bcache.h"
#include "clock.h"
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+
static inline bool io_timer_cmp(struct io_timer *l, struct io_timer *r)
{
return time_after(l->expire, r->expire);
@@ -13,6 +16,53 @@ void bch_io_timer_add(struct io_clock *clock, struct io_timer *timer)
spin_unlock(&clock->timer_lock);
}
+struct io_clock_wait {
+ struct io_timer timer;
+ struct task_struct *task;
+ int expired;
+};
+
+static void io_clock_wait_fn(struct io_timer *timer)
+{
+ struct io_clock_wait *wait = container_of(timer,
+ struct io_clock_wait, timer);
+
+ wait->expired = 1;
+ wake_up_process(wait->task);
+}
+
+/*
+ * _only_ to be used from a kthread
+ */
+void bch_kthread_io_clock_wait(struct io_clock *clock,
+ unsigned long until)
+{
+ struct io_clock_wait wait;
+
+ /* XXX: calculate sleep time rigorously */
+ wait.timer.expire = until;
+ wait.timer.fn = io_clock_wait_fn;
+ wait.task = current;
+ wait.expired = 0;
+ bch_io_timer_add(clock, &wait.timer);
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop()) {
+ __set_current_state(TASK_RUNNING);
+ break;
+ }
+
+ if (wait.expired) {
+ __set_current_state(TASK_RUNNING);
+ break;
+ }
+
+ schedule();
+ try_to_freeze();
+ }
+}
+
static struct io_timer *get_expired_timer(struct io_clock *clock,
unsigned long now)
{
diff --git a/drivers/md/bcache/clock.h b/drivers/md/bcache/clock.h
index 37d5f20174a5..c38679699a16 100644
--- a/drivers/md/bcache/clock.h
+++ b/drivers/md/bcache/clock.h
@@ -2,6 +2,7 @@
#define _BCACHE_CLOCK_H
void bch_io_timer_add(struct io_clock *, struct io_timer *);
+void bch_kthread_io_clock_wait(struct io_clock *, unsigned long);
void bch_increment_clock(struct cache_set *, unsigned, int);
void bch_io_clock_exit(struct io_clock *);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index d3aeeb523dd3..6cc7e8a556f6 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -7,6 +7,7 @@
#include "bcache.h"
#include "btree.h"
#include "buckets.h"
+#include "clock.h"
#include "extents.h"
#include "io.h"
#include "keylist.h"
@@ -223,6 +224,8 @@ static bool bch_moving_gc(struct cache *ca)
for (i = 0; i < ca->heap.used; i++)
sectors_to_move += ca->heap.data[i].val;
+ /* XXX: calculate this threshold rigorously */
+
if (ca->heap.used < ca->free_inc.size / 2 &&
sectors_to_move < reserve_sectors) {
mutex_unlock(&ca->heap_lock);
@@ -281,15 +284,26 @@ static int bch_moving_gc_thread(void *arg)
{
struct cache *ca = arg;
struct cache_set *c = ca->set;
- unsigned long last = jiffies;
+ struct io_clock *clock = &c->io_clock[WRITE];
+ unsigned long last;
+ bool moved;
- do {
+ while (!kthread_should_stop()) {
if (kthread_wait_freezable(c->copy_gc_enabled))
break;
- bch_moving_gc(ca);
- } while (!bch_kthread_loop_ratelimit(&last,
- c->btree_scan_ratelimit * HZ));
+ last = atomic_long_read(&clock->now);
+
+ moved = bch_moving_gc(ca);
+
+ /*
+ * This really should be a library code, but it has to be
+ * kthread specific... ugh
+ */
+ if (!moved)
+ bch_kthread_io_clock_wait(clock,
+ last + ca->free_inc.size / 2);
+ }
return 0;
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 94af71c14745..6d488115a6e0 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -972,8 +972,6 @@ static const char *bch_cache_set_alloc(struct cache_sb *sb,
c->btree_flush_delay = 30;
- c->btree_scan_ratelimit = 30;
-
c->copy_gc_enabled = 1;
c->tiering_enabled = 1;
c->tiering_percent = 10;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 9cf23379f896..e727e69a2a3d 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -178,7 +178,6 @@ rw_attribute(tiering_stripe_size);
sysfs_pd_controller_attribute(foreground_write);
rw_attribute(btree_flush_delay);
-rw_attribute(btree_scan_ratelimit);
rw_attribute(pd_controllers_update_seconds);
rw_attribute(foreground_target_percent);
@@ -656,7 +655,6 @@ SHOW(bch_cache_set)
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd);
- sysfs_print(btree_scan_ratelimit, c->btree_scan_ratelimit);
sysfs_print(pd_controllers_update_seconds,
c->pd_controllers_update_seconds);
sysfs_print(foreground_target_percent, c->foreground_target_percent);
@@ -832,22 +830,6 @@ STORE(__bch_cache_set)
return size;
}
- if (attr == &sysfs_btree_scan_ratelimit) {
- struct cache *ca;
- unsigned i;
- ssize_t ret = strtoul_safe(buf, c->btree_scan_ratelimit)
- ?: (ssize_t) size;
-
- for_each_cache(ca, c, i)
- if (ca->moving_gc_read)
- wake_up_process(ca->moving_gc_read);
-
- if (c->tiering_read)
- wake_up_process(c->tiering_read);
-
- return ret;
- }
-
sysfs_strtoul(pd_controllers_update_seconds,
c->pd_controllers_update_seconds);
sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
@@ -969,7 +951,6 @@ static struct attribute *bch_cache_set_files[] = {
&sysfs_data_replicas_have,
&sysfs_btree_flush_delay,
- &sysfs_btree_scan_ratelimit,
&sysfs_foreground_target_percent,
&sysfs_sector_reserve_percent,
&sysfs_tiering_percent,
diff --git a/drivers/md/bcache/tier.c b/drivers/md/bcache/tier.c
index 494ea4877578..5ca859d5904f 100644
--- a/drivers/md/bcache/tier.c
+++ b/drivers/md/bcache/tier.c
@@ -2,6 +2,7 @@
#include "bcache.h"
#include "btree.h"
#include "buckets.h"
+#include "clock.h"
#include "extents.h"
#include "io.h"
#include "keylist.h"
@@ -332,7 +333,7 @@ static int tiering_next_cache(struct cache_set *c,
}
}
-static void read_tiering(struct cache_set *c)
+static u64 read_tiering(struct cache_set *c)
{
struct moving_context ctxt;
struct tiering_refill refill;
@@ -360,21 +361,40 @@ static void read_tiering(struct cache_set *c)
refill_done(&refill);
trace_bcache_tiering_end(c, ctxt.sectors_moved, ctxt.keys_moved);
+
+ return ctxt.sectors_moved;
}
static int bch_tiering_thread(void *arg)
{
struct cache_set *c = arg;
- unsigned long last = jiffies;
+ struct io_clock *clock = &c->io_clock[WRITE];
+ struct cache *ca;
+ u64 sectors, tier_capacity;
+ unsigned long last;
+ unsigned i;
- do {
+ while (!kthread_should_stop()) {
if (kthread_wait_freezable(c->tiering_enabled &&
c->cache_tiers[1].nr_devices))
break;
- read_tiering(c);
- } while (!bch_kthread_loop_ratelimit(&last,
- c->btree_scan_ratelimit * HZ));
+ last = atomic_long_read(&clock->now);
+
+ sectors = read_tiering(c);
+
+ tier_capacity = 0;
+ rcu_read_lock();
+ group_for_each_cache_rcu(ca, &c->cache_tiers[0], i)
+ tier_capacity +=
+ (ca->mi.nbuckets -
+ ca->mi.first_bucket) << ca->bucket_bits;
+ rcu_read_unlock();
+
+ if (sectors < tier_capacity >> 4)
+ bch_kthread_io_clock_wait(clock,
+ last + (tier_capacity >> 5));
+ }
return 0;
}
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index bf13a9361716..b87dd213b88b 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -501,23 +501,6 @@ uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len)
return crc;
}
-int bch_kthread_loop_ratelimit(unsigned long *last, unsigned long delay)
-{
- unsigned long next = *last + delay;
-
- set_current_state(TASK_INTERRUPTIBLE);
- if (kthread_should_stop()) {
- __set_current_state(TASK_RUNNING);
- return -1;
- }
-
- schedule_timeout(max_t(long, 0, next - jiffies));
- try_to_freeze();
- *last = jiffies;
-
- return 0;
-}
-
size_t bch_rand_range(size_t max)
{
size_t rand;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index ecd79e292f94..5311afcd3a1c 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -581,8 +581,6 @@ do { \
uint64_t bch_crc64_update(uint64_t, const void *, size_t);
uint64_t bch_crc64(const void *, size_t);
-int bch_kthread_loop_ratelimit(unsigned long *, unsigned long);
-
#define kthread_wait_freezable(cond) \
({ \
int _ret = 0; \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 1255e7e213d1..4f42adc75e9a 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -8,6 +8,7 @@
#include "bcache.h"
#include "btree.h"
+#include "clock.h"
#include "debug.h"
#include "extents.h"
#include "io.h"
@@ -189,7 +190,7 @@ static void read_dirty_submit(struct closure *cl)
continue_at(cl, write_dirty, system_wq);
}
-static void read_dirty(struct cached_dev *dc)
+static u64 read_dirty(struct cached_dev *dc)
{
struct keybuf_key *w;
struct dirty_io *io;
@@ -198,6 +199,7 @@ static void read_dirty(struct cached_dev *dc)
const struct bch_extent_ptr *ptr;
unsigned i;
struct bio_vec *bv;
+ u64 sectors_written = 0;
BKEY_PADDED(k) tmp;
closure_init_stack(&cl);
@@ -208,6 +210,7 @@ static void read_dirty(struct cached_dev *dc)
if (!w)
break;
+ sectors_written += w->key.k.size;
bkey_copy(&tmp.k, &w->key);
while (tmp.k.k.size) {
@@ -288,6 +291,8 @@ static void read_dirty(struct cached_dev *dc)
* freed) before refilling again
*/
closure_sync(&cl);
+
+ return sectors_written;
}
/* Scan for dirty data */
@@ -398,13 +403,14 @@ next:
}
}
-static void bch_writeback(struct cached_dev *dc)
+static u64 bch_writeback(struct cached_dev *dc)
{
struct keybuf *buf = &dc->writeback_keys;
unsigned inode = bcache_dev_inum(&dc->disk);
struct bpos start = POS(inode, 0);
struct bpos end = POS(inode, KEY_OFFSET_MAX);
struct bpos start_pos;
+ u64 sectors_written = 0;
buf->last_scanned = POS(inode, 0);
@@ -417,11 +423,11 @@ static void bch_writeback(struct cached_dev *dc)
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
- return;
+ return sectors_written;
schedule();
try_to_freeze();
- return;
+ return sectors_written;
}
if (bkey_cmp(buf->last_scanned, end) >= 0)
@@ -458,26 +464,34 @@ refill_done:
up_write(&dc->writeback_lock);
bch_ratelimit_reset(&dc->writeback_pd.rate);
- read_dirty(dc);
+ sectors_written += read_dirty(dc);
}
+
+ return sectors_written;
}
static int bch_writeback_thread(void *arg)
{
struct cached_dev *dc = arg;
struct cache_set *c = dc->disk.c;
- unsigned long last = jiffies;
+ struct io_clock *clock = &c->io_clock[WRITE];
+ unsigned long last;
+ u64 sectors_written;
- do {
+ while (!kthread_should_stop()) {
if (kthread_wait_freezable(dc->writeback_running ||
test_bit(BCACHE_DEV_DETACHING,
&dc->disk.flags)))
break;
- bch_writeback(dc);
- } while (!bch_kthread_loop_ratelimit(&last,
- test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)
- ? 0 : c->btree_scan_ratelimit * HZ));
+ last = atomic_long_read(&clock->now);
+
+ sectors_written = bch_writeback(dc);
+
+ if (sectors_written < c->capacity >> 4)
+ bch_kthread_io_clock_wait(clock,
+ last + (c->capacity >> 5));
+ }
return 0;
}