summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-03-28 07:58:22 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-03-28 10:37:16 -0400
commitd7a266609deae90a0b36c212cfbf9f85735e7a61 (patch)
tree0684d5a55e69fe9175e3238c40c9bfaddeaaf675
parent9d4143ad7bc734425751225dbbae79f188d0ad81 (diff)
bcachefs: Fix some allocator startup bugs
-rw-r--r--fs/bcachefs/alloc.c140
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_update_interior.c13
-rw-r--r--fs/bcachefs/btree_update_interior.h2
-rw-r--r--fs/bcachefs/movinggc.c3
-rw-r--r--fs/bcachefs/super.c13
6 files changed, 129 insertions, 44 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c
index cba399ee713c..0b8d0b20b5ed 100644
--- a/fs/bcachefs/alloc.c
+++ b/fs/bcachefs/alloc.c
@@ -58,6 +58,7 @@
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
#include "btree_gc.h"
#include "buckets.h"
#include "checksum.h"
@@ -536,7 +537,7 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop()) {
- ret = -1;
+ ret = 1;
break;
}
@@ -694,6 +695,8 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
};
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
+
+ cond_resched();
}
up_read(&ca->bucket_lock);
@@ -729,6 +732,8 @@ static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
if (bch2_can_invalidate_bucket(ca, b, m))
bch2_invalidate_one_bucket(c, ca, b);
+
+ cond_resched();
}
}
@@ -749,6 +754,8 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca
if (bch2_can_invalidate_bucket(ca, b, m))
bch2_invalidate_one_bucket(c, ca, b);
+
+ cond_resched();
}
}
@@ -850,7 +857,7 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
if ((current->flags & PF_KTHREAD) &&
kthread_should_stop()) {
- ret = -1;
+ ret = 1;
break;
}
@@ -880,7 +887,7 @@ static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
ca->mi.bucket_size, GFP_NOIO, 0);
if (push_invalidated_bucket(c, ca, bucket))
- return -1;
+ return 1;
}
return 0;
@@ -905,17 +912,32 @@ static int bch2_allocator_thread(void *arg)
while (1) {
while (1) {
+ cond_resched();
+
+ pr_debug("discarding %zu invalidated buckets",
+ ca->nr_invalidated);
+
ret = discard_invalidated_buckets(c, ca);
if (ret)
- return 0;
+ goto stop;
if (fifo_empty(&ca->free_inc))
break;
+ pr_debug("invalidating %zu buckets",
+ fifo_used(&ca->free_inc));
+
journal_seq = 0;
ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX);
- if (ret)
- return 0;
+ if (ret) {
+ bch_err(ca, "error invalidating buckets: %i", ret);
+ goto stop;
+ }
+
+ if (!ca->nr_invalidated) {
+ bch_err(ca, "allocator thread unable to make forward progress!");
+ goto stop;
+ }
if (ca->allocator_invalidating_data)
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
@@ -927,22 +949,29 @@ static int bch2_allocator_thread(void *arg)
* journal error - buckets haven't actually been
* invalidated, can't discard them:
*/
- if (ret)
- return 0;
+ if (ret) {
+ bch_err(ca, "journal error: %i", ret);
+ goto stop;
+ }
}
+ pr_debug("free_inc now empty");
+
/* Reset front/back so we can easily sort fifo entries later: */
ca->free_inc.front = ca->free_inc.back = 0;
ca->allocator_journal_seq_flush = 0;
ca->allocator_invalidating_data = false;
down_read(&c->gc_lock);
- if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
- up_read(&c->gc_lock);
- return 0;
- }
-
while (1) {
+ size_t prev = fifo_used(&ca->free_inc);
+
+ if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
+ up_read(&c->gc_lock);
+ bch_err(ca, "gc failure");
+ goto stop;
+ }
+
/*
* Find some buckets that we can invalidate, either
* they're completely unused, or only contain clean data
@@ -950,7 +979,14 @@ static int bch2_allocator_thread(void *arg)
* another cache tier
*/
+ pr_debug("scanning for reclaimable buckets");
+
find_reclaimable_buckets(c, ca);
+
+ pr_debug("found %zu buckets (free_inc %zu/%zu)",
+ fifo_used(&ca->free_inc) - prev,
+ fifo_used(&ca->free_inc), ca->free_inc.size);
+
trace_alloc_batch(ca, fifo_used(&ca->free_inc),
ca->free_inc.size);
@@ -977,15 +1013,20 @@ static int bch2_allocator_thread(void *arg)
ca->allocator_blocked = true;
closure_wake_up(&c->freelist_wait);
- if (wait_buckets_available(c, ca)) {
+ ret = wait_buckets_available(c, ca);
+ if (ret) {
up_read(&c->gc_lock);
- return 0;
+ goto stop;
}
}
ca->allocator_blocked = false;
up_read(&c->gc_lock);
+ pr_debug("free_inc now %zu/%zu",
+ fifo_used(&ca->free_inc),
+ ca->free_inc.size);
+
sort_free_inc(c, ca);
/*
@@ -993,6 +1034,10 @@ static int bch2_allocator_thread(void *arg)
* write out the new bucket gens:
*/
}
+
+stop:
+ pr_debug("alloc thread stopping (ret %i)", ret);
+ return 0;
}
/* Allocation */
@@ -1897,7 +1942,8 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
if (ca->alloc_thread)
return 0;
- p = kthread_create(bch2_allocator_thread, ca, "bcache_allocator");
+ p = kthread_create(bch2_allocator_thread, ca,
+ "bch_alloc[%s]", ca->name);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -1931,7 +1977,7 @@ static void allocator_start_issue_discards(struct bch_fs *c)
static int __bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
- size_t bu, i, devs_have_enough = 0;
+ size_t bu, i;
unsigned dev_iter;
u64 journal_seq = 0;
bool invalidating_data = false;
@@ -1972,16 +2018,19 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
/* did we find enough buckets? */
for_each_rw_member(ca, c, dev_iter)
- devs_have_enough += (fifo_used(&ca->free_inc) >=
- ca->free[RESERVE_BTREE].size);
+ if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size)
+ goto not_enough;
- if (devs_have_enough >= c->opts.metadata_replicas)
- return 0;
+ return 0;
+not_enough:
+ pr_debug("did not find enough empty buckets; issuing discards");
/* clear out free_inc - find_reclaimable_buckets() assumes it's empty */
for_each_rw_member(ca, c, dev_iter)
discard_invalidated_buckets(c, ca);
+ pr_debug("scanning for reclaimable buckets");
+
for_each_rw_member(ca, c, dev_iter) {
BUG_ON(!fifo_empty(&ca->free_inc));
ca->free_inc.front = ca->free_inc.back = 0;
@@ -1996,6 +2045,8 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
break;
}
+ pr_debug("done scanning for reclaimable buckets");
+
/*
* We're moving buckets to freelists _before_ they've been marked as
* invalidated on disk - we have to so that we can allocate new btree
@@ -2005,10 +2056,13 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
* have cached data in them, which is live until they're marked as
* invalidated on disk:
*/
- if (invalidating_data)
+ if (invalidating_data) {
+ pr_debug("invalidating existing data");
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
- else
+ } else {
+ pr_debug("issuing discards");
allocator_start_issue_discards(c);
+ }
/*
* XXX: it's possible for this to deadlock waiting on journal reclaim,
@@ -2025,13 +2079,15 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
}
if (invalidating_data) {
+ pr_debug("flushing journal");
+
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
if (ret)
return ret;
- }
- if (invalidating_data)
+ pr_debug("issuing discards");
allocator_start_issue_discards(c);
+ }
for_each_rw_member(ca, c, dev_iter)
while (ca->nr_invalidated) {
@@ -2046,19 +2102,43 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
struct bucket_table *tbl;
struct rhash_head *pos;
struct btree *b;
+ bool flush_updates;
+ size_t nr_pending_updates;
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
again:
+ pr_debug("flushing dirty btree nodes");
+ cond_resched();
+
+ flush_updates = false;
+ nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
+
+
rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos)
if (btree_node_dirty(b) && (!b->written || b->level)) {
- rcu_read_unlock();
- six_lock_read(&b->lock);
- bch2_btree_node_write(c, b, SIX_LOCK_read);
- six_unlock_read(&b->lock);
- goto again;
+ if (btree_node_may_write(b)) {
+ rcu_read_unlock();
+ six_lock_read(&b->lock);
+ bch2_btree_node_write(c, b, SIX_LOCK_read);
+ six_unlock_read(&b->lock);
+ goto again;
+ } else {
+ flush_updates = true;
+ }
}
rcu_read_unlock();
+
+ /*
+ * This is ugly, but it's needed to flush btree node writes
+ * without spinning...
+ */
+ if (flush_updates) {
+ closure_wait_event(&c->btree_interior_update_wait,
+ bch2_btree_interior_updates_nr_pending(c) <
+ nr_pending_updates);
+ goto again;
+ }
}
return 0;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index c144f8f498d4..0cda0975301e 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -384,7 +384,7 @@ struct bch_dev {
alloc_fifo free[RESERVE_NR];
alloc_fifo free_inc;
spinlock_t freelist_lock;
- unsigned nr_invalidated;
+ size_t nr_invalidated;
u8 open_buckets_partial[OPEN_BUCKETS_COUNT];
unsigned open_buckets_partial_nr;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index f42239dab71c..a35f6e7bf7c2 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -2116,3 +2116,16 @@ ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf)
return out - buf;
}
+
+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
+{
+ size_t ret = 0;
+ struct list_head *i;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ list_for_each(i, &c->btree_interior_update_list)
+ ret++;
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ return ret;
+}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 0b58ccc904a4..3e66d69eda1b 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -343,4 +343,6 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans,
ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
+
#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 3b4a5292ef6a..9c5dc7206a01 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -284,7 +284,8 @@ int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca)
if (bch2_fs_init_fault("copygc_start"))
return -ENOMEM;
- t = kthread_create(bch2_copygc_thread, ca, "bch_copygc");
+ t = kthread_create(bch2_copygc_thread, ca,
+ "bch_copygc[%s]", ca->name);
if (IS_ERR(t))
return PTR_ERR(t);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index f901c3757651..637bf659a21f 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -209,17 +209,6 @@ static void bch_fs_mark_clean(struct bch_fs *c)
}
}
-static bool btree_interior_updates_done(struct bch_fs *c)
-{
- bool ret;
-
- mutex_lock(&c->btree_interior_update_lock);
- ret = list_empty(&c->btree_interior_update_list);
- mutex_unlock(&c->btree_interior_update_lock);
-
- return ret;
-}
-
static void __bch2_fs_read_only(struct bch_fs *c)
{
struct bch_dev *ca;
@@ -251,7 +240,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
* fully complete:
*/
closure_wait_event(&c->btree_interior_update_wait,
- btree_interior_updates_done(c));
+ !bch2_btree_interior_updates_nr_pending(c));
if (!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
bch2_btree_verify_flushed(c);