summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2015-06-01 19:32:22 -0700
committerKent Overstreet <kent.overstreet@gmail.com>2016-10-07 12:34:21 -0800
commitb5ecdc495b4053c64f7bca90125fb5df20d7e0f1 (patch)
treefdfd8cb1ec591c09923af1393e84e66e0bc6c619
parent5749e6138348d7c1546e28b4ac0ae9032c94e0c0 (diff)
bcache: Cache set RO/RW path improvements
Now we should be able to go RW -> RO and back again - which we need for bcachefs remounting
-rw-r--r--drivers/md/bcache/alloc.c43
-rw-r--r--drivers/md/bcache/alloc.h1
-rw-r--r--drivers/md/bcache/blockdev.c5
-rw-r--r--drivers/md/bcache/gc.c10
-rw-r--r--drivers/md/bcache/gc.h1
-rw-r--r--drivers/md/bcache/io.c1
-rw-r--r--drivers/md/bcache/super.c211
-rw-r--r--drivers/md/bcache/super.h3
-rw-r--r--include/uapi/linux/bcache.h2
9 files changed, 170 insertions, 107 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 7b51888c5968..442a3575dc0b 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -1540,6 +1540,30 @@ void bch_cache_allocator_stop(struct cache *ca)
}
/*
+ * Startup the allocator thread for transition to RW mode:
+ */
+const char *bch_cache_allocator_start(struct cache *ca)
+{
+ struct cache_set *c = ca->set;
+ struct cache_group *tier = &c->cache_tiers[CACHE_TIER(&ca->mi)];
+ struct task_struct *k;
+
+ k = kthread_create(bch_allocator_thread, ca, "bcache_allocator");
+ if (IS_ERR(k))
+ return "error starting allocator thread";
+
+ ca->alloc_thread = k;
+ wake_up_process(k);
+
+ bch_cache_group_add_cache(tier, ca);
+ bch_cache_group_add_cache(&c->cache_all, ca);
+
+ bch_recalc_capacity(c);
+
+ return NULL;
+}
+
+/*
* bch_cache_allocator_start - fill freelists directly with completely unused
* buckets
*
@@ -1562,11 +1586,8 @@ void bch_cache_allocator_stop(struct cache *ca)
* should always be some of when this function is called, since the last time
* we shut down there should have been unused buckets stranded on freelists.
*/
-const char *bch_cache_allocator_start(struct cache *ca)
+const char *bch_cache_allocator_start_once(struct cache *ca)
{
- struct cache_set *c = ca->set;
- struct cache_group *tier = &c->cache_tiers[CACHE_TIER(&ca->mi)];
- struct task_struct *k;
struct bucket *g;
spin_lock(&ca->freelist_lock);
@@ -1588,19 +1609,7 @@ const char *bch_cache_allocator_start(struct cache *ca)
if (!fifo_full(&ca->free[RESERVE_PRIO]))
return "couldn't find enough available buckets to write prios";
- k = kthread_create(bch_allocator_thread, ca, "bcache_allocator");
- if (IS_ERR(k))
- return "error starting allocator thread";
-
- ca->alloc_thread = k;
- wake_up_process(k);
-
- bch_cache_group_add_cache(tier, ca);
- bch_cache_group_add_cache(&c->cache_all, ca);
-
- bch_recalc_capacity(c);
-
- return NULL;
+ return bch_cache_allocator_start(ca);
}
void bch_open_buckets_init(struct cache_set *c)
diff --git a/drivers/md/bcache/alloc.h b/drivers/md/bcache/alloc.h
index c0118db8440e..106eaa52b0e0 100644
--- a/drivers/md/bcache/alloc.h
+++ b/drivers/md/bcache/alloc.h
@@ -54,6 +54,7 @@ static inline void bch_wake_allocator(struct cache *ca)
void bch_cache_allocator_stop(struct cache *);
const char *bch_cache_allocator_start(struct cache *);
+const char *bch_cache_allocator_start_once(struct cache *);
void bch_open_buckets_init(struct cache_set *);
#endif /* _BCACHE_ALLOC_H */
diff --git a/drivers/md/bcache/blockdev.c b/drivers/md/bcache/blockdev.c
index 34efb73c8d6b..4faecf99805a 100644
--- a/drivers/md/bcache/blockdev.c
+++ b/drivers/md/bcache/blockdev.c
@@ -483,10 +483,7 @@ static void cached_dev_free(struct closure *cl)
{
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
- cancel_delayed_work_sync(&dc->writeback_pd_update);
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- kthread_stop(dc->writeback_thread);
-
+ bch_cached_dev_writeback_stop(dc);
bch_cached_dev_writeback_free(dc);
mutex_lock(&bch_register_lock);
diff --git a/drivers/md/bcache/gc.c b/drivers/md/bcache/gc.c
index 8dbfa6ee52c4..bd3cdda4120f 100644
--- a/drivers/md/bcache/gc.c
+++ b/drivers/md/bcache/gc.c
@@ -660,8 +660,18 @@ static int bch_gc_thread(void *arg)
return 0;
}
+void bch_gc_thread_stop(struct cache_set *c)
+{
+ set_bit(CACHE_SET_GC_STOPPING, &c->flags);
+
+ if (!IS_ERR_OR_NULL(c->gc_thread))
+ kthread_stop(c->gc_thread);
+}
+
int bch_gc_thread_start(struct cache_set *c)
{
+ clear_bit(CACHE_SET_GC_STOPPING, &c->flags);
+
c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
if (IS_ERR(c->gc_thread))
return PTR_ERR(c->gc_thread);
diff --git a/drivers/md/bcache/gc.h b/drivers/md/bcache/gc.h
index 81e50be44af5..b3d60bd2bfc9 100644
--- a/drivers/md/bcache/gc.h
+++ b/drivers/md/bcache/gc.h
@@ -7,6 +7,7 @@ static inline void set_gc_sectors(struct cache_set *c)
}
void bch_gc(struct cache_set *);
+void bch_gc_thread_stop(struct cache_set *);
int bch_gc_thread_start(struct cache_set *);
int bch_initial_gc(struct cache_set *, struct list_head *);
u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index c0d17ad94623..75fd276550bc 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -1016,7 +1016,6 @@ void bch_wake_delayed_writes(unsigned long data)
}
spin_unlock_irqrestore(&c->foreground_write_pd_lock, flags);
-
}
/**
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 05c62ca25f74..f67aa72d18e0 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -638,24 +638,66 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
__bcache_write_super(c);
}
-/* Cache set */
+/* Cache set RO/RW: */
+
+/*
+ * For startup/shutdown of RW stuff, the dependencies are:
+ *
+ * - foreground writes depend on copygc and tiering (to free up space)
+ *
+ * - copygc and tiering depend on mark and sweep gc (they actually probably
+ * don't because they either reserve ahead of time or don't block if
+ * allocations fail, but allocations can require mark and sweep gc to run
+ * because of generation number wraparound)
+ *
+ * - all of the above depends on the allocator threads
+ *
+ * - allocator depends on the journal (when it rewrites prios and gens)
+ */
static void __bch_cache_read_only(struct cache *ca);
-static void bch_cache_set_read_only(struct cache_set *c)
+static void __bch_cache_set_read_only(struct cache_set *c)
{
- struct cached_dev *dc;
- struct bcache_device *d;
- struct radix_tree_iter iter;
struct closure cl;
- void **slot;
-
struct cache *ca;
unsigned i;
- lockdep_assert_held(&bch_register_lock);
closure_init_stack(&cl);
+ c->tiering_pd.rate.rate = UINT_MAX;
+ bch_ratelimit_reset(&c->tiering_pd.rate);
+ bch_tiering_read_stop(c);
+
+ for_each_cache(ca, c, i) {
+ bch_tiering_write_stop(ca);
+ bch_moving_gc_stop(ca);
+ }
+
+ bch_gc_thread_stop(c);
+
+ bch_btree_flush(c);
+
+ for_each_cache(ca, c, i)
+ bch_cache_allocator_stop(ca);
+
+ bch_journal_flush(&c->journal, &cl);
+ closure_sync(&cl);
+
+ cancel_delayed_work_sync(&c->journal.write_work);
+}
+
+static void bch_writes_disabled(struct percpu_ref *writes)
+{
+ struct cache_set *c = container_of(writes, struct cache_set, writes);
+
+ complete(&c->write_disable_complete);
+}
+
+void bch_cache_set_read_only(struct cache_set *c)
+{
+ lockdep_assert_held(&bch_register_lock);
+
if (test_and_set_bit(CACHE_SET_RO, &c->flags))
return;
@@ -664,7 +706,12 @@ static void bch_cache_set_read_only(struct cache_set *c)
/*
* Block new foreground-end write operations from starting - any new
* writes will return -EROFS:
+ *
+ * (This is really blocking new _allocations_, writes to previously
+ * allocated space can still happen until stopping the allocator in
+ * bch_cache_allocator_stop()).
*/
+ init_completion(&c->write_disable_complete);
percpu_ref_kill(&c->writes);
bch_wake_delayed_writes((unsigned long) c);
@@ -674,39 +721,78 @@ static void bch_cache_set_read_only(struct cache_set *c)
/* Wait for outstanding writes to complete: */
wait_for_completion(&c->write_disable_complete);
- radix_tree_for_each_slot(slot, &c->devices, &iter, 0) {
- d = rcu_dereference_protected(*slot,
- lockdep_is_held(&bch_register_lock));
+ __bch_cache_set_read_only(c);
- if (!INODE_FLASH_ONLY(&d->inode.v)) {
- dc = container_of(d, struct cached_dev, disk);
- bch_cached_dev_writeback_stop(dc);
+ bch_notify_cache_set_read_only(c);
+ trace_bcache_cache_set_read_only_done(c);
+}
+
+static const char *__bch_cache_set_read_write(struct cache_set *c)
+{
+ struct cache *ca;
+ const char *err;
+ unsigned i;
+
+ err = "error starting btree GC thread";
+ if (bch_gc_thread_start(c))
+ goto err;
+
+ for_each_cache(ca, c, i) {
+ if (CACHE_STATE(&ca->mi) != CACHE_ACTIVE)
+ continue;
+
+ err = "error starting moving GC thread";
+ if (bch_moving_gc_thread_start(ca)) {
+ percpu_ref_put(&ca->ref);
+ goto err;
}
+
+ err = "error starting tiering write workqueue";
+ if (bch_tiering_write_start(ca))
+ return err;
}
- c->tiering_pd.rate.rate = UINT_MAX;
- bch_ratelimit_reset(&c->tiering_pd.rate);
- bch_tiering_read_stop(c);
+ err = "error starting tiering thread";
+ if (bch_tiering_read_start(c))
+ goto err;
+
+ schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
+
+ return NULL;
+err:
+ __bch_cache_set_read_only(c);
+ return err;
+}
- set_bit(CACHE_SET_GC_STOPPING, &c->flags);
+const char *bch_cache_set_read_write(struct cache_set *c)
+{
+ struct cache *ca;
+ const char *err;
+ unsigned i;
- if (!IS_ERR_OR_NULL(c->gc_thread))
- kthread_stop(c->gc_thread);
+ lockdep_assert_held(&bch_register_lock);
- /* Should skip this if we're unregistering because of an error */
- bch_btree_flush(c);
+ if (!test_bit(CACHE_SET_RO, &c->flags))
+ return NULL;
for_each_cache(ca, c, i)
- __bch_cache_read_only(ca);
-
- bch_journal_flush(&c->journal, &cl);
- closure_sync(&cl);
+ if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE &&
+ (err = bch_cache_allocator_start(ca))) {
+ percpu_ref_put(&ca->ref);
+ goto err;
+ }
- cancel_delayed_work_sync(&c->journal.write_work);
+ err = __bch_cache_set_read_write(c);
+ if (err)
+ return err;
- bch_notify_cache_set_read_only(c);
+ percpu_ref_reinit(&c->writes);
+ clear_bit(CACHE_SET_RO, &c->flags);
- trace_bcache_cache_set_read_only_done(c);
+ return NULL;
+err:
+ __bch_cache_set_read_only(c);
+ return err;
}
static void bch_cache_set_read_only_work(struct work_struct *work)
@@ -719,6 +805,8 @@ static void bch_cache_set_read_only_work(struct work_struct *work)
mutex_unlock(&bch_register_lock);
}
+/* Cache set startup/shutdown: */
+
void bch_cache_set_fail(struct cache_set *c)
{
switch (CACHE_ERROR_ACTION(&c->sb)) {
@@ -884,13 +972,6 @@ static unsigned cache_set_nr_online_devices(struct cache_set *c)
return nr;
}
-static void bch_writes_disabled(struct percpu_ref *writes)
-{
- struct cache_set *c = container_of(writes, struct cache_set, writes);
-
- complete(&c->write_disable_complete);
-}
-
#define alloc_bucket_pages(gfp, ca) \
((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca))))
@@ -904,9 +985,6 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
if (!c)
return NULL;
- if (percpu_ref_init(&c->writes, bch_writes_disabled, 0, GFP_KERNEL))
- goto err_free;
-
__module_get(THIS_MODULE);
closure_init(&c->cl, NULL);
set_closure_fn(&c->cl, cache_set_free, system_wq);
@@ -935,7 +1013,6 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
mutex_init(&c->btree_cache_lock);
mutex_init(&c->bucket_lock);
spin_lock_init(&c->btree_root_lock);
- init_completion(&c->write_disable_complete);
INIT_WORK(&c->read_only_work, bch_cache_set_read_only_work);
init_rwsem(&c->gc_lock);
@@ -994,6 +1071,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
goto err;
if (!(c->wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
+ percpu_ref_init(&c->writes, bch_writes_disabled, 0, GFP_KERNEL) ||
mempool_init_slab_pool(&c->search, 1, bch_search_cache) ||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
BTREE_RESERVE_SIZE) ||
@@ -1031,10 +1109,6 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
err:
bch_cache_set_stop(c);
return NULL;
-
-err_free:
- kfree(c);
- return NULL;
}
static int bch_cache_set_online(struct cache_set *c)
@@ -1069,8 +1143,6 @@ static int bch_cache_set_online(struct cache_set *c)
return 0;
}
-static const char *__bch_cache_read_write(struct cache *ca);
-
static const char *run_cache_set(struct cache_set *c)
{
const char *err = "cannot allocate memory";
@@ -1170,7 +1242,7 @@ static const char *run_cache_set(struct cache_set *c)
for_each_cache(ca, c, i)
if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE &&
- (err = __bch_cache_read_write(ca))) {
+ (err = bch_cache_allocator_start_once(ca))) {
percpu_ref_put(&ca->ref);
goto err;
}
@@ -1204,7 +1276,7 @@ static const char *run_cache_set(struct cache_set *c)
for_each_cache(ca, c, i)
if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE &&
- (err = __bch_cache_read_write(ca))) {
+ (err = bch_cache_allocator_start_once(ca))) {
percpu_ref_put(&ca->ref);
goto err;
}
@@ -1241,29 +1313,12 @@ static const char *run_cache_set(struct cache_set *c)
bch_prio_timer_start(c, READ);
bch_prio_timer_start(c, WRITE);
- err = "error starting btree GC thread";
- if (bch_gc_thread_start(c))
- goto err;
-
- for_each_cache(ca, c, i) {
- if (CACHE_STATE(&ca->mi) != CACHE_ACTIVE)
- continue;
-
- err = "error starting moving GC thread";
- if (bch_moving_gc_thread_start(ca)) {
- percpu_ref_put(&ca->ref);
- goto err;
- }
- }
+ closure_sync(&cl);
- err = "error starting tiering thread";
- if (bch_tiering_read_start(c))
+ err = __bch_cache_set_read_write(c);
+ if (err)
goto err;
- schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
-
- closure_sync(&cl);
-
now = get_seconds();
mi = cache_member_info_get(c);
for_each_cache_rcu(ca, c, i)
@@ -1345,8 +1400,8 @@ static void __bch_cache_read_only(struct cache *ca)
{
trace_bcache_cache_read_only(ca);
- bch_moving_gc_stop(ca);
bch_tiering_write_stop(ca);
+ bch_moving_gc_stop(ca);
/*
* This stops new data writes (e.g. to existing open data
@@ -1435,7 +1490,9 @@ void bch_cache_read_only(struct cache *ca)
return;
}
-static const char *__bch_cache_read_write(struct cache *ca)
+/* This does not write the super-block, should it? */
+
+const char *bch_cache_read_write(struct cache *ca)
{
const char *err;
@@ -1452,16 +1509,6 @@ static const char *__bch_cache_read_write(struct cache *ca)
trace_bcache_cache_read_write_done(ca);
return NULL;
-}
-
-/* This does not write the super-block, should it? */
-
-const char *bch_cache_read_write(struct cache *ca)
-{
- const char *err = __bch_cache_read_write(ca);
-
- if (err != NULL)
- return err;
err = "error starting moving GC thread";
if (!bch_moving_gc_thread_start(ca))
@@ -1491,7 +1538,6 @@ static void bch_cache_free_work(struct work_struct *work)
{
struct cache *ca = container_of(work, struct cache, free_work);
struct cache_set *c = ca->set;
- char buf[BDEVNAME_SIZE];
unsigned i;
/*
@@ -1537,9 +1583,6 @@ static void bch_cache_free_work(struct work_struct *work)
for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]);
- if (ca->disk_sb.bdev)
- pr_notice("%s removed", bdevname(ca->disk_sb.bdev, buf));
-
free_super(&ca->disk_sb);
if (ca->kobj.state_in_sysfs)
diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h
index 8bf531e86ee8..b13a17eada97 100644
--- a/drivers/md/bcache/super.h
+++ b/drivers/md/bcache/super.h
@@ -162,6 +162,9 @@ const char *bch_register_one(const char *path);
const char *bch_register_cache_set(char * const *, unsigned,
struct cache_set **);
+void bch_cache_set_read_only(struct cache_set *);
+const char *bch_cache_set_read_write(struct cache_set *);
+
void bch_cache_read_only(struct cache *);
const char *bch_cache_read_write(struct cache *);
bool bch_cache_remove(struct cache *, bool force);
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index a5ab2935c146..b1ac5b9aa794 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -105,7 +105,7 @@ struct bkey {
/* Size of combined key and value, in u64s */
__u8 u64s;
- /* Format of key (0 for format local to btree node */
+ /* Format of key (0 for format local to btree node) */
__u8 format;
/* Type of the value */