diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2015-06-01 19:32:22 -0700 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 12:34:21 -0800 |
commit | b5ecdc495b4053c64f7bca90125fb5df20d7e0f1 (patch) | |
tree | fdfd8cb1ec591c09923af1393e84e66e0bc6c619 | |
parent | 5749e6138348d7c1546e28b4ac0ae9032c94e0c0 (diff) |
bcache: Cache set RO/RW path improvements
Now we should be able to go RW -> RO and back again - which we need for bcachefs
remounting
-rw-r--r-- | drivers/md/bcache/alloc.c | 43 | ||||
-rw-r--r-- | drivers/md/bcache/alloc.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/blockdev.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/gc.c | 10 | ||||
-rw-r--r-- | drivers/md/bcache/gc.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 211 | ||||
-rw-r--r-- | drivers/md/bcache/super.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/bcache.h | 2 |
9 files changed, 170 insertions, 107 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 7b51888c5968..442a3575dc0b 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -1540,6 +1540,30 @@ void bch_cache_allocator_stop(struct cache *ca) } /* + * Startup the allocator thread for transition to RW mode: + */ +const char *bch_cache_allocator_start(struct cache *ca) +{ + struct cache_set *c = ca->set; + struct cache_group *tier = &c->cache_tiers[CACHE_TIER(&ca->mi)]; + struct task_struct *k; + + k = kthread_create(bch_allocator_thread, ca, "bcache_allocator"); + if (IS_ERR(k)) + return "error starting allocator thread"; + + ca->alloc_thread = k; + wake_up_process(k); + + bch_cache_group_add_cache(tier, ca); + bch_cache_group_add_cache(&c->cache_all, ca); + + bch_recalc_capacity(c); + + return NULL; +} + +/* * bch_cache_allocator_start - fill freelists directly with completely unused * buckets * @@ -1562,11 +1586,8 @@ void bch_cache_allocator_stop(struct cache *ca) * should always be some of when this function is called, since the last time * we shut down there should have been unused buckets stranded on freelists. */ -const char *bch_cache_allocator_start(struct cache *ca) +const char *bch_cache_allocator_start_once(struct cache *ca) { - struct cache_set *c = ca->set; - struct cache_group *tier = &c->cache_tiers[CACHE_TIER(&ca->mi)]; - struct task_struct *k; struct bucket *g; spin_lock(&ca->freelist_lock); @@ -1588,19 +1609,7 @@ const char *bch_cache_allocator_start(struct cache *ca) if (!fifo_full(&ca->free[RESERVE_PRIO])) return "couldn't find enough available buckets to write prios"; - k = kthread_create(bch_allocator_thread, ca, "bcache_allocator"); - if (IS_ERR(k)) - return "error starting allocator thread"; - - ca->alloc_thread = k; - wake_up_process(k); - - bch_cache_group_add_cache(tier, ca); - bch_cache_group_add_cache(&c->cache_all, ca); - - bch_recalc_capacity(c); - - return NULL; + return bch_cache_allocator_start(ca); } void bch_open_buckets_init(struct cache_set *c) diff --git a/drivers/md/bcache/alloc.h b/drivers/md/bcache/alloc.h index c0118db8440e..106eaa52b0e0 100644 --- a/drivers/md/bcache/alloc.h +++ b/drivers/md/bcache/alloc.h @@ -54,6 +54,7 @@ static inline void bch_wake_allocator(struct cache *ca) void bch_cache_allocator_stop(struct cache *); const char *bch_cache_allocator_start(struct cache *); +const char *bch_cache_allocator_start_once(struct cache *); void bch_open_buckets_init(struct cache_set *); #endif /* _BCACHE_ALLOC_H */ diff --git a/drivers/md/bcache/blockdev.c b/drivers/md/bcache/blockdev.c index 34efb73c8d6b..4faecf99805a 100644 --- a/drivers/md/bcache/blockdev.c +++ b/drivers/md/bcache/blockdev.c @@ -483,10 +483,7 @@ static void cached_dev_free(struct closure *cl) { struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); - cancel_delayed_work_sync(&dc->writeback_pd_update); - if (!IS_ERR_OR_NULL(dc->writeback_thread)) - kthread_stop(dc->writeback_thread); - + bch_cached_dev_writeback_stop(dc); bch_cached_dev_writeback_free(dc); mutex_lock(&bch_register_lock); diff --git a/drivers/md/bcache/gc.c b/drivers/md/bcache/gc.c index 8dbfa6ee52c4..bd3cdda4120f 100644 --- a/drivers/md/bcache/gc.c +++ b/drivers/md/bcache/gc.c @@ -660,8 +660,18 @@ static int bch_gc_thread(void *arg) return 0; } +void bch_gc_thread_stop(struct cache_set *c) +{ + set_bit(CACHE_SET_GC_STOPPING, &c->flags); + + if (!IS_ERR_OR_NULL(c->gc_thread)) + kthread_stop(c->gc_thread); +} + int bch_gc_thread_start(struct cache_set *c) { + clear_bit(CACHE_SET_GC_STOPPING, &c->flags); + c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc"); if (IS_ERR(c->gc_thread)) return PTR_ERR(c->gc_thread); diff --git a/drivers/md/bcache/gc.h b/drivers/md/bcache/gc.h index 81e50be44af5..b3d60bd2bfc9 100644 --- a/drivers/md/bcache/gc.h +++ b/drivers/md/bcache/gc.h @@ -7,6 +7,7 @@ static inline void set_gc_sectors(struct cache_set *c) } void bch_gc(struct cache_set *); +void bch_gc_thread_stop(struct cache_set *); int bch_gc_thread_start(struct cache_set *); int bch_initial_gc(struct cache_set *, struct list_head *); u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index c0d17ad94623..75fd276550bc 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -1016,7 +1016,6 @@ void bch_wake_delayed_writes(unsigned long data) } spin_unlock_irqrestore(&c->foreground_write_pd_lock, flags); - } /** diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 05c62ca25f74..f67aa72d18e0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -638,24 +638,66 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k, __bcache_write_super(c); } -/* Cache set */ +/* Cache set RO/RW: */ + +/* + * For startup/shutdown of RW stuff, the dependencies are: + * + * - foreground writes depend on copygc and tiering (to free up space) + * + * - copygc and tiering depend on mark and sweep gc (they actually probably + * don't because they either reserve ahead of time or don't block if + * allocations fail, but allocations can require mark and sweep gc to run + * because of generation number wraparound) + * + * - all of the above depends on the allocator threads + * + * - allocator depends on the journal (when it rewrites prios and gens) + */ static void __bch_cache_read_only(struct cache *ca); -static void bch_cache_set_read_only(struct cache_set *c) +static void __bch_cache_set_read_only(struct cache_set *c) { - struct cached_dev *dc; - struct bcache_device *d; - struct radix_tree_iter iter; struct closure cl; - void **slot; - struct cache *ca; unsigned i; - lockdep_assert_held(&bch_register_lock); closure_init_stack(&cl); + c->tiering_pd.rate.rate = UINT_MAX; + bch_ratelimit_reset(&c->tiering_pd.rate); + bch_tiering_read_stop(c); + + for_each_cache(ca, c, i) { + bch_tiering_write_stop(ca); + bch_moving_gc_stop(ca); + } + + bch_gc_thread_stop(c); + + bch_btree_flush(c); + + for_each_cache(ca, c, i) + bch_cache_allocator_stop(ca); + + bch_journal_flush(&c->journal, &cl); + closure_sync(&cl); + + cancel_delayed_work_sync(&c->journal.write_work); +} + +static void bch_writes_disabled(struct percpu_ref *writes) +{ + struct cache_set *c = container_of(writes, struct cache_set, writes); + + complete(&c->write_disable_complete); +} + +void bch_cache_set_read_only(struct cache_set *c) +{ + lockdep_assert_held(&bch_register_lock); + if (test_and_set_bit(CACHE_SET_RO, &c->flags)) return; @@ -664,7 +706,12 @@ static void bch_cache_set_read_only(struct cache_set *c) /* * Block new foreground-end write operations from starting - any new * writes will return -EROFS: + * + * (This is really blocking new _allocations_, writes to previously + * allocated space can still happen until stopping the allocator in + * bch_cache_allocator_stop()). */ + init_completion(&c->write_disable_complete); percpu_ref_kill(&c->writes); bch_wake_delayed_writes((unsigned long) c); @@ -674,39 +721,78 @@ static void bch_cache_set_read_only(struct cache_set *c) /* Wait for outstanding writes to complete: */ wait_for_completion(&c->write_disable_complete); - radix_tree_for_each_slot(slot, &c->devices, &iter, 0) { - d = rcu_dereference_protected(*slot, - lockdep_is_held(&bch_register_lock)); + __bch_cache_set_read_only(c); - if (!INODE_FLASH_ONLY(&d->inode.v)) { - dc = container_of(d, struct cached_dev, disk); - bch_cached_dev_writeback_stop(dc); + bch_notify_cache_set_read_only(c); + trace_bcache_cache_set_read_only_done(c); +} + +static const char *__bch_cache_set_read_write(struct cache_set *c) +{ + struct cache *ca; + const char *err; + unsigned i; + + err = "error starting btree GC thread"; + if (bch_gc_thread_start(c)) + goto err; + + for_each_cache(ca, c, i) { + if (CACHE_STATE(&ca->mi) != CACHE_ACTIVE) + continue; + + err = "error starting moving GC thread"; + if (bch_moving_gc_thread_start(ca)) { + percpu_ref_put(&ca->ref); + goto err; } + + err = "error starting tiering write workqueue"; + if (bch_tiering_write_start(ca)) + return err; } - c->tiering_pd.rate.rate = UINT_MAX; - bch_ratelimit_reset(&c->tiering_pd.rate); - bch_tiering_read_stop(c); + err = "error starting tiering thread"; + if (bch_tiering_read_start(c)) + goto err; + + schedule_delayed_work(&c->pd_controllers_update, 5 * HZ); + + return NULL; +err: + __bch_cache_set_read_only(c); + return err; +} - set_bit(CACHE_SET_GC_STOPPING, &c->flags); +const char *bch_cache_set_read_write(struct cache_set *c) +{ + struct cache *ca; + const char *err; + unsigned i; - if (!IS_ERR_OR_NULL(c->gc_thread)) - kthread_stop(c->gc_thread); + lockdep_assert_held(&bch_register_lock); - /* Should skip this if we're unregistering because of an error */ - bch_btree_flush(c); + if (!test_bit(CACHE_SET_RO, &c->flags)) + return NULL; for_each_cache(ca, c, i) - __bch_cache_read_only(ca); - - bch_journal_flush(&c->journal, &cl); - closure_sync(&cl); + if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE && + (err = bch_cache_allocator_start(ca))) { + percpu_ref_put(&ca->ref); + goto err; + } - cancel_delayed_work_sync(&c->journal.write_work); + err = __bch_cache_set_read_write(c); + if (err) + return err; - bch_notify_cache_set_read_only(c); + percpu_ref_reinit(&c->writes); + clear_bit(CACHE_SET_RO, &c->flags); - trace_bcache_cache_set_read_only_done(c); + return NULL; +err: + __bch_cache_set_read_only(c); + return err; } static void bch_cache_set_read_only_work(struct work_struct *work) @@ -719,6 +805,8 @@ static void bch_cache_set_read_only_work(struct work_struct *work) mutex_unlock(&bch_register_lock); } +/* Cache set startup/shutdown: */ + void bch_cache_set_fail(struct cache_set *c) { switch (CACHE_ERROR_ACTION(&c->sb)) { @@ -884,13 +972,6 @@ static unsigned cache_set_nr_online_devices(struct cache_set *c) return nr; } -static void bch_writes_disabled(struct percpu_ref *writes) -{ - struct cache_set *c = container_of(writes, struct cache_set, writes); - - complete(&c->write_disable_complete); -} - #define alloc_bucket_pages(gfp, ca) \ ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca)))) @@ -904,9 +985,6 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) if (!c) return NULL; - if (percpu_ref_init(&c->writes, bch_writes_disabled, 0, GFP_KERNEL)) - goto err_free; - __module_get(THIS_MODULE); closure_init(&c->cl, NULL); set_closure_fn(&c->cl, cache_set_free, system_wq); @@ -935,7 +1013,6 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) mutex_init(&c->btree_cache_lock); mutex_init(&c->bucket_lock); spin_lock_init(&c->btree_root_lock); - init_completion(&c->write_disable_complete); INIT_WORK(&c->read_only_work, bch_cache_set_read_only_work); init_rwsem(&c->gc_lock); @@ -994,6 +1071,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) goto err; if (!(c->wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || + percpu_ref_init(&c->writes, bch_writes_disabled, 0, GFP_KERNEL) || mempool_init_slab_pool(&c->search, 1, bch_search_cache) || mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1, BTREE_RESERVE_SIZE) || @@ -1031,10 +1109,6 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) err: bch_cache_set_stop(c); return NULL; - -err_free: - kfree(c); - return NULL; } static int bch_cache_set_online(struct cache_set *c) @@ -1069,8 +1143,6 @@ static int bch_cache_set_online(struct cache_set *c) return 0; } -static const char *__bch_cache_read_write(struct cache *ca); - static const char *run_cache_set(struct cache_set *c) { const char *err = "cannot allocate memory"; @@ -1170,7 +1242,7 @@ static const char *run_cache_set(struct cache_set *c) for_each_cache(ca, c, i) if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE && - (err = __bch_cache_read_write(ca))) { + (err = bch_cache_allocator_start_once(ca))) { percpu_ref_put(&ca->ref); goto err; } @@ -1204,7 +1276,7 @@ static const char *run_cache_set(struct cache_set *c) for_each_cache(ca, c, i) if (CACHE_STATE(&ca->mi) == CACHE_ACTIVE && - (err = __bch_cache_read_write(ca))) { + (err = bch_cache_allocator_start_once(ca))) { percpu_ref_put(&ca->ref); goto err; } @@ -1241,29 +1313,12 @@ static const char *run_cache_set(struct cache_set *c) bch_prio_timer_start(c, READ); bch_prio_timer_start(c, WRITE); - err = "error starting btree GC thread"; - if (bch_gc_thread_start(c)) - goto err; - - for_each_cache(ca, c, i) { - if (CACHE_STATE(&ca->mi) != CACHE_ACTIVE) - continue; - - err = "error starting moving GC thread"; - if (bch_moving_gc_thread_start(ca)) { - percpu_ref_put(&ca->ref); - goto err; - } - } + closure_sync(&cl); - err = "error starting tiering thread"; - if (bch_tiering_read_start(c)) + err = __bch_cache_set_read_write(c); + if (err) goto err; - schedule_delayed_work(&c->pd_controllers_update, 5 * HZ); - - closure_sync(&cl); - now = get_seconds(); mi = cache_member_info_get(c); for_each_cache_rcu(ca, c, i) @@ -1345,8 +1400,8 @@ static void __bch_cache_read_only(struct cache *ca) { trace_bcache_cache_read_only(ca); - bch_moving_gc_stop(ca); bch_tiering_write_stop(ca); + bch_moving_gc_stop(ca); /* * This stops new data writes (e.g. to existing open data @@ -1435,7 +1490,9 @@ void bch_cache_read_only(struct cache *ca) return; } -static const char *__bch_cache_read_write(struct cache *ca) +/* This does not write the super-block, should it? */ + +const char *bch_cache_read_write(struct cache *ca) { const char *err; @@ -1452,16 +1509,6 @@ static const char *__bch_cache_read_write(struct cache *ca) trace_bcache_cache_read_write_done(ca); return NULL; -} - -/* This does not write the super-block, should it? */ - -const char *bch_cache_read_write(struct cache *ca) -{ - const char *err = __bch_cache_read_write(ca); - - if (err != NULL) - return err; err = "error starting moving GC thread"; if (!bch_moving_gc_thread_start(ca)) @@ -1491,7 +1538,6 @@ static void bch_cache_free_work(struct work_struct *work) { struct cache *ca = container_of(work, struct cache, free_work); struct cache_set *c = ca->set; - char buf[BDEVNAME_SIZE]; unsigned i; /* @@ -1537,9 +1583,6 @@ static void bch_cache_free_work(struct work_struct *work) for (i = 0; i < RESERVE_NR; i++) free_fifo(&ca->free[i]); - if (ca->disk_sb.bdev) - pr_notice("%s removed", bdevname(ca->disk_sb.bdev, buf)); - free_super(&ca->disk_sb); if (ca->kobj.state_in_sysfs) diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h index 8bf531e86ee8..b13a17eada97 100644 --- a/drivers/md/bcache/super.h +++ b/drivers/md/bcache/super.h @@ -162,6 +162,9 @@ const char *bch_register_one(const char *path); const char *bch_register_cache_set(char * const *, unsigned, struct cache_set **); +void bch_cache_set_read_only(struct cache_set *); +const char *bch_cache_set_read_write(struct cache_set *); + void bch_cache_read_only(struct cache *); const char *bch_cache_read_write(struct cache *); bool bch_cache_remove(struct cache *, bool force); diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index a5ab2935c146..b1ac5b9aa794 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -105,7 +105,7 @@ struct bkey { /* Size of combined key and value, in u64s */ __u8 u64s; - /* Format of key (0 for format local to btree node */ + /* Format of key (0 for format local to btree node) */ __u8 format; /* Type of the value */ |