diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-03-28 07:58:22 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-03-28 10:37:16 -0400 |
commit | d7a266609deae90a0b36c212cfbf9f85735e7a61 (patch) | |
tree | 0684d5a55e69fe9175e3238c40c9bfaddeaaf675 | |
parent | 9d4143ad7bc734425751225dbbae79f188d0ad81 (diff) |
bcachefs: Fix some allocator startup bugs
-rw-r--r-- | fs/bcachefs/alloc.c | 140 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 13 |
6 files changed, 129 insertions, 44 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c index cba399ee713c..0b8d0b20b5ed 100644 --- a/fs/bcachefs/alloc.c +++ b/fs/bcachefs/alloc.c @@ -58,6 +58,7 @@ #include "btree_cache.h" #include "btree_io.h" #include "btree_update.h" +#include "btree_update_interior.h" #include "btree_gc.h" #include "buckets.h" #include "checksum.h" @@ -536,7 +537,7 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca) while (1) { set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) { - ret = -1; + ret = 1; break; } @@ -694,6 +695,8 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca) }; heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp); + + cond_resched(); } up_read(&ca->bucket_lock); @@ -729,6 +732,8 @@ static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca) if (bch2_can_invalidate_bucket(ca, b, m)) bch2_invalidate_one_bucket(c, ca, b); + + cond_resched(); } } @@ -749,6 +754,8 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca if (bch2_can_invalidate_bucket(ca, b, m)) bch2_invalidate_one_bucket(c, ca, b); + + cond_resched(); } } @@ -850,7 +857,7 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t if ((current->flags & PF_KTHREAD) && kthread_should_stop()) { - ret = -1; + ret = 1; break; } @@ -880,7 +887,7 @@ static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca) ca->mi.bucket_size, GFP_NOIO, 0); if (push_invalidated_bucket(c, ca, bucket)) - return -1; + return 1; } return 0; @@ -905,17 +912,32 @@ static int bch2_allocator_thread(void *arg) while (1) { while (1) { + cond_resched(); + + pr_debug("discarding %zu invalidated buckets", + ca->nr_invalidated); + ret = discard_invalidated_buckets(c, ca); if (ret) - return 0; + goto stop; if (fifo_empty(&ca->free_inc)) break; + pr_debug("invalidating %zu buckets", + fifo_used(&ca->free_inc)); + journal_seq = 0; ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX); - if (ret) - return 0; + if (ret) { + bch_err(ca, "error invalidating buckets: %i", ret); + goto stop; + } + + if (!ca->nr_invalidated) { + bch_err(ca, "allocator thread unable to make forward progress!"); + goto stop; + } if (ca->allocator_invalidating_data) ret = bch2_journal_flush_seq(&c->journal, journal_seq); @@ -927,22 +949,29 @@ static int bch2_allocator_thread(void *arg) * journal error - buckets haven't actually been * invalidated, can't discard them: */ - if (ret) - return 0; + if (ret) { + bch_err(ca, "journal error: %i", ret); + goto stop; + } } + pr_debug("free_inc now empty"); + /* Reset front/back so we can easily sort fifo entries later: */ ca->free_inc.front = ca->free_inc.back = 0; ca->allocator_journal_seq_flush = 0; ca->allocator_invalidating_data = false; down_read(&c->gc_lock); - if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) { - up_read(&c->gc_lock); - return 0; - } - while (1) { + size_t prev = fifo_used(&ca->free_inc); + + if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) { + up_read(&c->gc_lock); + bch_err(ca, "gc failure"); + goto stop; + } + /* * Find some buckets that we can invalidate, either * they're completely unused, or only contain clean data @@ -950,7 +979,14 @@ static int bch2_allocator_thread(void *arg) * another cache tier */ + pr_debug("scanning for reclaimable buckets"); + find_reclaimable_buckets(c, ca); + + pr_debug("found %zu buckets (free_inc %zu/%zu)", + fifo_used(&ca->free_inc) - prev, + fifo_used(&ca->free_inc), ca->free_inc.size); + trace_alloc_batch(ca, fifo_used(&ca->free_inc), ca->free_inc.size); @@ -977,15 +1013,20 @@ static int bch2_allocator_thread(void *arg) ca->allocator_blocked = true; closure_wake_up(&c->freelist_wait); - if (wait_buckets_available(c, ca)) { + ret = wait_buckets_available(c, ca); + if (ret) { up_read(&c->gc_lock); - return 0; + goto stop; } } ca->allocator_blocked = false; up_read(&c->gc_lock); + pr_debug("free_inc now %zu/%zu", + fifo_used(&ca->free_inc), + ca->free_inc.size); + sort_free_inc(c, ca); /* @@ -993,6 +1034,10 @@ static int bch2_allocator_thread(void *arg) * write out the new bucket gens: */ } + +stop: + pr_debug("alloc thread stopping (ret %i)", ret); + return 0; } /* Allocation */ @@ -1897,7 +1942,8 @@ int bch2_dev_allocator_start(struct bch_dev *ca) if (ca->alloc_thread) return 0; - p = kthread_create(bch2_allocator_thread, ca, "bcache_allocator"); + p = kthread_create(bch2_allocator_thread, ca, + "bch_alloc[%s]", ca->name); if (IS_ERR(p)) return PTR_ERR(p); @@ -1931,7 +1977,7 @@ static void allocator_start_issue_discards(struct bch_fs *c) static int __bch2_fs_allocator_start(struct bch_fs *c) { struct bch_dev *ca; - size_t bu, i, devs_have_enough = 0; + size_t bu, i; unsigned dev_iter; u64 journal_seq = 0; bool invalidating_data = false; @@ -1972,16 +2018,19 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) /* did we find enough buckets? */ for_each_rw_member(ca, c, dev_iter) - devs_have_enough += (fifo_used(&ca->free_inc) >= - ca->free[RESERVE_BTREE].size); + if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) + goto not_enough; - if (devs_have_enough >= c->opts.metadata_replicas) - return 0; + return 0; +not_enough: + pr_debug("did not find enough empty buckets; issuing discards"); /* clear out free_inc - find_reclaimable_buckets() assumes it's empty */ for_each_rw_member(ca, c, dev_iter) discard_invalidated_buckets(c, ca); + pr_debug("scanning for reclaimable buckets"); + for_each_rw_member(ca, c, dev_iter) { BUG_ON(!fifo_empty(&ca->free_inc)); ca->free_inc.front = ca->free_inc.back = 0; @@ -1996,6 +2045,8 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) break; } + pr_debug("done scanning for reclaimable buckets"); + /* * We're moving buckets to freelists _before_ they've been marked as * invalidated on disk - we have to so that we can allocate new btree @@ -2005,10 +2056,13 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) * have cached data in them, which is live until they're marked as * invalidated on disk: */ - if (invalidating_data) + if (invalidating_data) { + pr_debug("invalidating existing data"); set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); - else + } else { + pr_debug("issuing discards"); allocator_start_issue_discards(c); + } /* * XXX: it's possible for this to deadlock waiting on journal reclaim, @@ -2025,13 +2079,15 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) } if (invalidating_data) { + pr_debug("flushing journal"); + ret = bch2_journal_flush_seq(&c->journal, journal_seq); if (ret) return ret; - } - if (invalidating_data) + pr_debug("issuing discards"); allocator_start_issue_discards(c); + } for_each_rw_member(ca, c, dev_iter) while (ca->nr_invalidated) { @@ -2046,19 +2102,43 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; + bool flush_updates; + size_t nr_pending_updates; clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); again: + pr_debug("flushing dirty btree nodes"); + cond_resched(); + + flush_updates = false; + nr_pending_updates = bch2_btree_interior_updates_nr_pending(c); + + rcu_read_lock(); for_each_cached_btree(b, c, tbl, i, pos) if (btree_node_dirty(b) && (!b->written || b->level)) { - rcu_read_unlock(); - six_lock_read(&b->lock); - bch2_btree_node_write(c, b, SIX_LOCK_read); - six_unlock_read(&b->lock); - goto again; + if (btree_node_may_write(b)) { + rcu_read_unlock(); + six_lock_read(&b->lock); + bch2_btree_node_write(c, b, SIX_LOCK_read); + six_unlock_read(&b->lock); + goto again; + } else { + flush_updates = true; + } } rcu_read_unlock(); + + /* + * This is ugly, but it's needed to flush btree node writes + * without spinning... + */ + if (flush_updates) { + closure_wait_event(&c->btree_interior_update_wait, + bch2_btree_interior_updates_nr_pending(c) < + nr_pending_updates); + goto again; + } } return 0; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index c144f8f498d4..0cda0975301e 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -384,7 +384,7 @@ struct bch_dev { alloc_fifo free[RESERVE_NR]; alloc_fifo free_inc; spinlock_t freelist_lock; - unsigned nr_invalidated; + size_t nr_invalidated; u8 open_buckets_partial[OPEN_BUCKETS_COUNT]; unsigned open_buckets_partial_nr; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index f42239dab71c..a35f6e7bf7c2 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2116,3 +2116,16 @@ ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf) return out - buf; } + +size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c) +{ + size_t ret = 0; + struct list_head *i; + + mutex_lock(&c->btree_interior_update_lock); + list_for_each(i, &c->btree_interior_update_list) + ret++; + mutex_unlock(&c->btree_interior_update_lock); + + return ret; +} diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 0b58ccc904a4..3e66d69eda1b 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -343,4 +343,6 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans, ssize_t bch2_btree_updates_print(struct bch_fs *, char *); +size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *); + #endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */ diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 3b4a5292ef6a..9c5dc7206a01 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -284,7 +284,8 @@ int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca) if (bch2_fs_init_fault("copygc_start")) return -ENOMEM; - t = kthread_create(bch2_copygc_thread, ca, "bch_copygc"); + t = kthread_create(bch2_copygc_thread, ca, + "bch_copygc[%s]", ca->name); if (IS_ERR(t)) return PTR_ERR(t); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index f901c3757651..637bf659a21f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -209,17 +209,6 @@ static void bch_fs_mark_clean(struct bch_fs *c) } } -static bool btree_interior_updates_done(struct bch_fs *c) -{ - bool ret; - - mutex_lock(&c->btree_interior_update_lock); - ret = list_empty(&c->btree_interior_update_list); - mutex_unlock(&c->btree_interior_update_lock); - - return ret; -} - static void __bch2_fs_read_only(struct bch_fs *c) { struct bch_dev *ca; @@ -251,7 +240,7 @@ static void __bch2_fs_read_only(struct bch_fs *c) * fully complete: */ closure_wait_event(&c->btree_interior_update_wait, - btree_interior_updates_done(c)); + !bch2_btree_interior_updates_nr_pending(c)); if (!test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) bch2_btree_verify_flushed(c); |