diff options
Diffstat (limited to 'fs/bcachefs/alloc.c')
-rw-r--r-- | fs/bcachefs/alloc.c | 140 |
1 files changed, 110 insertions, 30 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c index cba399ee713c..0b8d0b20b5ed 100644 --- a/fs/bcachefs/alloc.c +++ b/fs/bcachefs/alloc.c @@ -58,6 +58,7 @@ #include "btree_cache.h" #include "btree_io.h" #include "btree_update.h" +#include "btree_update_interior.h" #include "btree_gc.h" #include "buckets.h" #include "checksum.h" @@ -536,7 +537,7 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca) while (1) { set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) { - ret = -1; + ret = 1; break; } @@ -694,6 +695,8 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca) }; heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp); + + cond_resched(); } up_read(&ca->bucket_lock); @@ -729,6 +732,8 @@ static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca) if (bch2_can_invalidate_bucket(ca, b, m)) bch2_invalidate_one_bucket(c, ca, b); + + cond_resched(); } } @@ -749,6 +754,8 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca if (bch2_can_invalidate_bucket(ca, b, m)) bch2_invalidate_one_bucket(c, ca, b); + + cond_resched(); } } @@ -850,7 +857,7 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t if ((current->flags & PF_KTHREAD) && kthread_should_stop()) { - ret = -1; + ret = 1; break; } @@ -880,7 +887,7 @@ static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca) ca->mi.bucket_size, GFP_NOIO, 0); if (push_invalidated_bucket(c, ca, bucket)) - return -1; + return 1; } return 0; @@ -905,17 +912,32 @@ static int bch2_allocator_thread(void *arg) while (1) { while (1) { + cond_resched(); + + pr_debug("discarding %zu invalidated buckets", + ca->nr_invalidated); + ret = discard_invalidated_buckets(c, ca); if (ret) - return 0; + goto stop; if (fifo_empty(&ca->free_inc)) break; + pr_debug("invalidating %zu buckets", + fifo_used(&ca->free_inc)); + journal_seq = 0; ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX); - if (ret) - return 0; + if (ret) { + bch_err(ca, "error invalidating buckets: %i", ret); + goto stop; + } + + if (!ca->nr_invalidated) { + bch_err(ca, "allocator thread unable to make forward progress!"); + goto stop; + } if (ca->allocator_invalidating_data) ret = bch2_journal_flush_seq(&c->journal, journal_seq); @@ -927,22 +949,29 @@ static int bch2_allocator_thread(void *arg) * journal error - buckets haven't actually been * invalidated, can't discard them: */ - if (ret) - return 0; + if (ret) { + bch_err(ca, "journal error: %i", ret); + goto stop; + } } + pr_debug("free_inc now empty"); + /* Reset front/back so we can easily sort fifo entries later: */ ca->free_inc.front = ca->free_inc.back = 0; ca->allocator_journal_seq_flush = 0; ca->allocator_invalidating_data = false; down_read(&c->gc_lock); - if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) { - up_read(&c->gc_lock); - return 0; - } - while (1) { + size_t prev = fifo_used(&ca->free_inc); + + if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) { + up_read(&c->gc_lock); + bch_err(ca, "gc failure"); + goto stop; + } + /* * Find some buckets that we can invalidate, either * they're completely unused, or only contain clean data @@ -950,7 +979,14 @@ static int bch2_allocator_thread(void *arg) * another cache tier */ + pr_debug("scanning for reclaimable buckets"); + find_reclaimable_buckets(c, ca); + + pr_debug("found %zu buckets (free_inc %zu/%zu)", + fifo_used(&ca->free_inc) - prev, + fifo_used(&ca->free_inc), ca->free_inc.size); + trace_alloc_batch(ca, fifo_used(&ca->free_inc), ca->free_inc.size); @@ -977,15 +1013,20 @@ static int bch2_allocator_thread(void *arg) ca->allocator_blocked = true; closure_wake_up(&c->freelist_wait); - if (wait_buckets_available(c, ca)) { + ret = wait_buckets_available(c, ca); + if (ret) { up_read(&c->gc_lock); - return 0; + goto stop; } } ca->allocator_blocked = false; up_read(&c->gc_lock); + pr_debug("free_inc now %zu/%zu", + fifo_used(&ca->free_inc), + ca->free_inc.size); + sort_free_inc(c, ca); /* @@ -993,6 +1034,10 @@ static int bch2_allocator_thread(void *arg) * write out the new bucket gens: */ } + +stop: + pr_debug("alloc thread stopping (ret %i)", ret); + return 0; } /* Allocation */ @@ -1897,7 +1942,8 @@ int bch2_dev_allocator_start(struct bch_dev *ca) if (ca->alloc_thread) return 0; - p = kthread_create(bch2_allocator_thread, ca, "bcache_allocator"); + p = kthread_create(bch2_allocator_thread, ca, + "bch_alloc[%s]", ca->name); if (IS_ERR(p)) return PTR_ERR(p); @@ -1931,7 +1977,7 @@ static void allocator_start_issue_discards(struct bch_fs *c) static int __bch2_fs_allocator_start(struct bch_fs *c) { struct bch_dev *ca; - size_t bu, i, devs_have_enough = 0; + size_t bu, i; unsigned dev_iter; u64 journal_seq = 0; bool invalidating_data = false; @@ -1972,16 +2018,19 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) /* did we find enough buckets? */ for_each_rw_member(ca, c, dev_iter) - devs_have_enough += (fifo_used(&ca->free_inc) >= - ca->free[RESERVE_BTREE].size); + if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) + goto not_enough; - if (devs_have_enough >= c->opts.metadata_replicas) - return 0; + return 0; +not_enough: + pr_debug("did not find enough empty buckets; issuing discards"); /* clear out free_inc - find_reclaimable_buckets() assumes it's empty */ for_each_rw_member(ca, c, dev_iter) discard_invalidated_buckets(c, ca); + pr_debug("scanning for reclaimable buckets"); + for_each_rw_member(ca, c, dev_iter) { BUG_ON(!fifo_empty(&ca->free_inc)); ca->free_inc.front = ca->free_inc.back = 0; @@ -1996,6 +2045,8 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) break; } + pr_debug("done scanning for reclaimable buckets"); + /* * We're moving buckets to freelists _before_ they've been marked as * invalidated on disk - we have to so that we can allocate new btree @@ -2005,10 +2056,13 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) * have cached data in them, which is live until they're marked as * invalidated on disk: */ - if (invalidating_data) + if (invalidating_data) { + pr_debug("invalidating existing data"); set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); - else + } else { + pr_debug("issuing discards"); allocator_start_issue_discards(c); + } /* * XXX: it's possible for this to deadlock waiting on journal reclaim, @@ -2025,13 +2079,15 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) } if (invalidating_data) { + pr_debug("flushing journal"); + ret = bch2_journal_flush_seq(&c->journal, journal_seq); if (ret) return ret; - } - if (invalidating_data) + pr_debug("issuing discards"); allocator_start_issue_discards(c); + } for_each_rw_member(ca, c, dev_iter) while (ca->nr_invalidated) { @@ -2046,19 +2102,43 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; + bool flush_updates; + size_t nr_pending_updates; clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); again: + pr_debug("flushing dirty btree nodes"); + cond_resched(); + + flush_updates = false; + nr_pending_updates = bch2_btree_interior_updates_nr_pending(c); + + rcu_read_lock(); for_each_cached_btree(b, c, tbl, i, pos) if (btree_node_dirty(b) && (!b->written || b->level)) { - rcu_read_unlock(); - six_lock_read(&b->lock); - bch2_btree_node_write(c, b, SIX_LOCK_read); - six_unlock_read(&b->lock); - goto again; + if (btree_node_may_write(b)) { + rcu_read_unlock(); + six_lock_read(&b->lock); + bch2_btree_node_write(c, b, SIX_LOCK_read); + six_unlock_read(&b->lock); + goto again; + } else { + flush_updates = true; + } } rcu_read_unlock(); + + /* + * This is ugly, but it's needed to flush btree node writes + * without spinning... + */ + if (flush_updates) { + closure_wait_event(&c->btree_interior_update_wait, + bch2_btree_interior_updates_nr_pending(c) < + nr_pending_updates); + goto again; + } } return 0; |