diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-03-13 22:01:47 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-03-14 20:27:44 -0400 |
commit | 2377cd8d5f32d320a7224c3ccf385d715cbc2d37 (patch) | |
tree | 06b9af1a1ded35bcdc79d22831efe2873ebd8e6e | |
parent | 150a02ff98d1668b523cd0caf1af3c3586cc1bec (diff) |
bcachefs: New erasure coding shutdown path
This implements a new shutdown path for erasure coding, which is needed
for the upcoming BCH_WRITE_WAIT_FOR_EC write path.
The process is:
- Cancel new stripes being built up
- Close out/cancel open buckets on write points or the partial list
that are for stripes
- Shutdown rebalance/copygc
- Then wait for in flight new stripes to finish
With BCH_WRITE_WAIT_FOR_EC, move ops will be waiting on stripes to fill
up before they complete; the new ec shutdown path is needed for shutting
down copygc/rebalance without deadlocking.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/alloc_background.c | 39 | ||||
-rw-r--r-- | fs/bcachefs/alloc_foreground.c | 96 | ||||
-rw-r--r-- | fs/bcachefs/alloc_foreground.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/data_update.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/ec.c | 54 | ||||
-rw-r--r-- | fs/bcachefs/ec.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 10 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 12 |
10 files changed, 141 insertions, 94 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index a64b48f6889c..009a85bccc53 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2158,44 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) */ bch2_recalc_capacity(c); - /* Next, close write points that point to this device... */ - for (i = 0; i < ARRAY_SIZE(c->write_points); i++) - bch2_writepoint_stop(c, ca, &c->write_points[i]); - - bch2_writepoint_stop(c, ca, &c->copygc_write_point); - bch2_writepoint_stop(c, ca, &c->rebalance_write_point); - bch2_writepoint_stop(c, ca, &c->btree_write_point); - - mutex_lock(&c->btree_reserve_cache_lock); - while (c->btree_reserve_cache_nr) { - struct btree_alloc *a = - &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - - bch2_open_buckets_put(c, &a->ob); - } - mutex_unlock(&c->btree_reserve_cache_lock); - - spin_lock(&c->freelist_lock); - i = 0; - while (i < c->open_buckets_partial_nr) { - struct open_bucket *ob = - c->open_buckets + c->open_buckets_partial[i]; - - if (ob->dev == ca->dev_idx) { - --c->open_buckets_partial_nr; - swap(c->open_buckets_partial[i], - c->open_buckets_partial[c->open_buckets_partial_nr]); - ob->on_partial_list = false; - spin_unlock(&c->freelist_lock); - bch2_open_bucket_put(c, ob); - spin_lock(&c->freelist_lock); - } else { - i++; - } - } - spin_unlock(&c->freelist_lock); - - bch2_ec_stop_dev(c, ca); + bch2_open_buckets_stop(c, ca, false); /* * Wake up threads that were blocked on allocation, so they can notice diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index e3e21798a102..d52f30ac66ba 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1023,45 +1023,96 @@ static int open_bucket_add_buckets(struct btree_trans *trans, return ret < 0 ? ret : 0; } -void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca, - struct open_buckets *obs) +static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, + struct bch_dev *ca, bool ec) { - struct open_buckets ptrs = { .nr = 0 }; - struct open_bucket *ob, *ob2; - unsigned i, j; - - open_bucket_for_each(c, obs, ob, i) { - bool drop = !ca || ob->dev == ca->dev_idx; + if (ec) { + return ob->ec != NULL; + } else if (ca) { + bool drop = ob->dev == ca->dev_idx; + struct open_bucket *ob2; + unsigned i; if (!drop && ob->ec) { mutex_lock(&ob->ec->lock); - for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) { - if (!ob->ec->blocks[j]) + for (i = 0; i < ob->ec->new_stripe.key.v.nr_blocks; i++) { + if (!ob->ec->blocks[i]) continue; - ob2 = c->open_buckets + ob->ec->blocks[j]; + ob2 = c->open_buckets + ob->ec->blocks[i]; drop |= ob2->dev == ca->dev_idx; } mutex_unlock(&ob->ec->lock); } - if (drop) - bch2_open_bucket_put(c, ob); - else - ob_push(c, &ptrs, ob); + return drop; + } else { + return true; } - - *obs = ptrs; } -void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, - struct write_point *wp) +static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, + bool ec, struct write_point *wp) { + struct open_buckets ptrs = { .nr = 0 }; + struct open_bucket *ob; + unsigned i; + mutex_lock(&wp->lock); - bch2_open_buckets_stop_dev(c, ca, &wp->ptrs); + open_bucket_for_each(c, &wp->ptrs, ob, i) + if (should_drop_bucket(ob, c, ca, ec)) + bch2_open_bucket_put(c, ob); + else + ob_push(c, &ptrs, ob); + wp->ptrs = ptrs; mutex_unlock(&wp->lock); } +void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, + bool ec) +{ + unsigned i; + + /* Next, close write points that point to this device... */ + for (i = 0; i < ARRAY_SIZE(c->write_points); i++) + bch2_writepoint_stop(c, ca, ec, &c->write_points[i]); + + bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point); + bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); + bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); + + mutex_lock(&c->btree_reserve_cache_lock); + while (c->btree_reserve_cache_nr) { + struct btree_alloc *a = + &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; + + bch2_open_buckets_put(c, &a->ob); + } + mutex_unlock(&c->btree_reserve_cache_lock); + + spin_lock(&c->freelist_lock); + i = 0; + while (i < c->open_buckets_partial_nr) { + struct open_bucket *ob = + c->open_buckets + c->open_buckets_partial[i]; + + if (should_drop_bucket(ob, c, ca, ec)) { + --c->open_buckets_partial_nr; + swap(c->open_buckets_partial[i], + c->open_buckets_partial[c->open_buckets_partial_nr]); + ob->on_partial_list = false; + spin_unlock(&c->freelist_lock); + bch2_open_bucket_put(c, ob); + spin_lock(&c->freelist_lock); + } else { + i++; + } + } + spin_unlock(&c->freelist_lock); + + bch2_ec_stop_dev(c, ca); +} + static inline struct hlist_head *writepoint_hash(struct bch_fs *c, unsigned long write_point) { @@ -1107,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c) return true; } -static bool try_decrease_writepoints(struct bch_fs *c, - unsigned old_nr) +static bool try_decrease_writepoints(struct bch_fs *c, unsigned old_nr) { struct write_point *wp; @@ -1129,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c, hlist_del_rcu(&wp->node); mutex_unlock(&c->write_points_hash_lock); - bch2_writepoint_stop(c, NULL, wp); + bch2_writepoint_stop(c, NULL, false, wp); return true; } diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 1fa96f8c6879..8a1cf425091b 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *, struct bkey_i *, unsigned, bool); void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *); -void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *, - struct open_buckets *); - -void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *, - struct write_point *); +void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool); static inline struct write_point_specifier writepoint_hashed(unsigned long v) { diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index dc943652b2df..348ee8e8c015 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -655,7 +655,6 @@ typedef struct { x(fallocate) \ x(discard) \ x(invalidate) \ - x(move) \ x(delete_dead_snapshots) \ x(snapshot_delete_pagecache) \ x(sysfs) @@ -958,14 +957,14 @@ struct bch_fs { struct list_head ec_stripe_new_list; struct mutex ec_stripe_new_lock; + wait_queue_head_t ec_stripe_new_wait; struct work_struct ec_stripe_create_work; u64 ec_stripe_hint; - struct bio_set ec_bioset; - struct work_struct ec_stripe_delete_work; - struct llist_head ec_stripe_delete_list; + + struct bio_set ec_bioset; /* REFLINK */ u64 reflink_hint; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 17f139370ec2..e414d1afd434 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -253,6 +253,7 @@ restart_drop_extra_replicas: BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, &op->res, NULL, + BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| m->data_opts.btree_insert_flags); if (!ret) { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index af3a72acc67f..1e621dcc1d37 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -989,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b while (1) { ret = commit_do(trans, NULL, NULL, + BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL, ec_stripe_update_extent(trans, bucket_pos, bucket.gen, s, &bp_offset)); @@ -1127,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err; } - ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL, + ret = bch2_trans_do(c, &s->res, NULL, + BTREE_INSERT_NOCHECK_RW| + BTREE_INSERT_NOFAIL, ec_stripe_key_update(&trans, &s->new_stripe.key, !s->have_existing_stripe)); if (ret) { @@ -1409,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans, if (ret) return ERR_PTR(ret); + if (test_bit(BCH_FS_GOING_RO, &c->flags)) { + h = ERR_PTR(-EROFS); + goto found; + } + list_for_each_entry(h, &c->ec_stripe_head_list, list) if (h->target == target && h->algo == algo && @@ -1753,7 +1761,7 @@ err: return ERR_PTR(ret); } -void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) +static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) { struct ec_stripe_head *h; struct open_bucket *ob; @@ -1761,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) mutex_lock(&c->ec_stripe_head_lock); list_for_each_entry(h, &c->ec_stripe_head_list, list) { - mutex_lock(&h->lock); if (!h->s) goto unlock; + if (!ca) + goto found; + for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) { if (!h->s->blocks[i]) continue; @@ -1784,6 +1794,32 @@ unlock: mutex_unlock(&c->ec_stripe_head_lock); } +void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) +{ + __bch2_ec_stop(c, ca); +} + +void bch2_fs_ec_stop(struct bch_fs *c) +{ + __bch2_ec_stop(c, NULL); +} + +static bool bch2_fs_ec_flush_done(struct bch_fs *c) +{ + bool ret; + + mutex_lock(&c->ec_stripe_new_lock); + ret = list_empty(&c->ec_stripe_new_list); + mutex_unlock(&c->ec_stripe_new_lock); + + return ret; +} + +void bch2_fs_ec_flush(struct bch_fs *c) +{ + wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c)); +} + int bch2_stripes_read(struct bch_fs *c) { struct btree_trans trans; @@ -1915,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c) void bch2_fs_ec_init_early(struct bch_fs *c) { + spin_lock_init(&c->ec_stripes_new_lock); + mutex_init(&c->ec_stripes_heap_lock); + + INIT_LIST_HEAD(&c->ec_stripe_head_list); + mutex_init(&c->ec_stripe_head_lock); + + INIT_LIST_HEAD(&c->ec_stripe_new_list); + mutex_init(&c->ec_stripe_new_lock); + init_waitqueue_head(&c->ec_stripe_new_wait); + INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work); INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work); } int bch2_fs_ec_init(struct bch_fs *c) { - spin_lock_init(&c->ec_stripes_new_lock); - return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio), BIOSET_NEED_BVECS); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 8f777a37e43d..7c08a49d7419 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -245,8 +245,8 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, } void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); - -void bch2_ec_flush_new_stripes(struct bch_fs *); +void bch2_fs_ec_stop(struct bch_fs *); +void bch2_fs_ec_flush(struct bch_fs *); int bch2_stripes_read(struct bch_fs *); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 19461aacc894..d11feb100d7e 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -706,7 +706,8 @@ static void bch2_write_done(struct closure *cl) struct bch_fs *c = op->c; bch2_disk_reservation_put(c, &op->res); - bch2_write_ref_put(c, BCH_WRITE_REF_write); + if (!(op->flags & BCH_WRITE_MOVE)) + bch2_write_ref_put(c, BCH_WRITE_REF_write); bch2_keylist_free(&op->insert_keys, op->inline_keys); bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); @@ -1843,7 +1844,12 @@ void bch2_write(struct closure *cl) goto err; } - if (c->opts.nochanges || + if (c->opts.nochanges) { + op->error = -BCH_ERR_erofs_no_writes; + goto err; + } + + if (!(op->flags & BCH_WRITE_MOVE) && !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { op->error = -BCH_ERR_erofs_no_writes; goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7f13aa6ef20b..6f5851ea81b5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -60,7 +60,6 @@ struct moving_io { static void move_free(struct moving_io *io) { struct moving_context *ctxt = io->write.ctxt; - struct bch_fs *c = ctxt->c; if (io->b) atomic_dec(&io->b->count); @@ -72,7 +71,6 @@ static void move_free(struct moving_io *io) wake_up(&ctxt->wait); mutex_unlock(&ctxt->lock); - bch2_write_ref_put(c, BCH_WRITE_REF_move); kfree(io); } @@ -281,9 +279,6 @@ static int bch2_move_extent(struct btree_trans *trans, return 0; } - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move)) - return -BCH_ERR_erofs_no_writes; - /* * Before memory allocations & taking nocow locks in * bch2_data_update_init(): @@ -379,7 +374,6 @@ err_free_pages: err_free: kfree(io); err: - bch2_write_ref_put(c, BCH_WRITE_REF_move); trace_and_count(c, move_extent_alloc_mem_fail, k.k); return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7957721c2d91..8a269b680295 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -206,9 +206,12 @@ static void __bch2_fs_read_only(struct bch_fs *c) unsigned i, clean_passes = 0; u64 seq = 0; + bch2_fs_ec_stop(c); + bch2_open_buckets_stop(c, NULL, true); bch2_rebalance_stop(c); bch2_copygc_stop(c); bch2_gc_thread_stop(c); + bch2_fs_ec_flush(c); bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu", journal_cur_seq(&c->journal)); @@ -700,15 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->fsck_errors); mutex_init(&c->fsck_error_lock); - INIT_LIST_HEAD(&c->ec_stripe_head_list); - mutex_init(&c->ec_stripe_head_lock); - - INIT_LIST_HEAD(&c->ec_stripe_new_list); - mutex_init(&c->ec_stripe_new_lock); - - - mutex_init(&c->ec_stripes_heap_lock); - seqcount_init(&c->gc_pos_lock); seqcount_init(&c->usage_lock); |