summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-11-19 01:31:41 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2019-04-03 12:44:04 -0400
commitc8f5393792d1c6fd5a45eb4d8e49864eb740493f (patch)
tree9afaf7081f3f8fd4a12c6ac7272262d88a027955
parent8e5e355eaa8145e0260b8c9fdb3c90471f2b67be (diff)
bcachefs: Persist alloc info on clean shutdown
- Does not persist alloc info for stripes yet - Also does not yet include filesystem block/sector counts yet, from struct fs_usage - Not made use of just yet
-rw-r--r--fs/bcachefs/alloc_background.c45
-rw-r--r--fs/bcachefs/alloc_background.h1
-rw-r--r--fs/bcachefs/bcachefs.h6
-rw-r--r--fs/bcachefs/btree_update.h3
-rw-r--r--fs/bcachefs/btree_update_leaf.c6
-rw-r--r--fs/bcachefs/buckets.c14
-rw-r--r--fs/bcachefs/buckets.h2
-rw-r--r--fs/bcachefs/super.c39
8 files changed, 92 insertions, 24 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index eb6b22177d26..2552d45799ca 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -249,6 +249,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
}
+ for_each_member_device(ca, c, i)
+ bch2_dev_usage_from_buckets(c, ca);
+
mutex_lock(&c->bucket_clock[READ].lock);
for_each_member_device(ca, c, i) {
down_read(&ca->bucket_lock);
@@ -280,35 +283,51 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
#endif
struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
struct bucket *g;
- struct bucket_mark m;
+ struct bucket_mark m, new;
int ret;
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
a->k.p = POS(ca->dev_idx, b);
+ bch2_btree_iter_set_pos(iter, a->k.p);
+
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ return ret;
+
percpu_down_read_preempt_disable(&c->mark_lock);
g = bucket(ca, b);
- m = bucket_cmpxchg(g, m, m.dirty = false);
+ m = READ_ONCE(g->mark);
+
+ if (!m.dirty) {
+ percpu_up_read_preempt_enable(&c->mark_lock);
+ return 0;
+ }
__alloc_write_key(a, g, m);
percpu_up_read_preempt_enable(&c->mark_lock);
bch2_btree_iter_cond_resched(iter);
- bch2_btree_iter_set_pos(iter, a->k.p);
-
ret = bch2_btree_insert_at(c, NULL, journal_seq,
+ BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
+ if (ret)
+ return ret;
- if (!ret && ca->buckets_written)
+ new = m;
+ new.dirty = false;
+ atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
+
+ if (ca->buckets_written)
set_bit(b, ca->buckets_written);
- return ret;
+ return 0;
}
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
@@ -898,10 +917,19 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
for (i = 0; i < RESERVE_NR; i++)
if (fifo_push(&ca->free[i], bucket)) {
fifo_pop(&ca->free_inc, bucket);
+
closure_wake_up(&c->freelist_wait);
+ ca->allocator_blocked_full = false;
+
spin_unlock(&c->freelist_lock);
goto out;
}
+
+ if (!ca->allocator_blocked_full) {
+ ca->allocator_blocked_full = true;
+ closure_wake_up(&c->freelist_wait);
+ }
+
spin_unlock(&c->freelist_lock);
if ((current->flags & PF_KTHREAD) &&
@@ -1226,6 +1254,11 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
set_bit(ca->dev_idx, c->rw_devs[i].d);
}
+void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
+{
+ closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
+}
+
/* stop allocator thread: */
void bch2_dev_allocator_stop(struct bch_dev *ca)
{
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index a0c08e347ad7..26561b3bafb8 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -51,6 +51,7 @@ void bch2_recalc_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
+void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index a9d021df1366..17f28c379db9 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -428,7 +428,13 @@ struct bch_dev {
size_t inc_gen_needs_gc;
size_t inc_gen_really_needs_gc;
+
+ /*
+ * XXX: this should be an enum for allocator state, so as to include
+ * error state
+ */
bool allocator_blocked;
+ bool allocator_blocked_full;
alloc_heap alloc_heap;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index dd9d255952d0..4bd0725846d0 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -77,6 +77,7 @@ enum {
__BTREE_INSERT_ATOMIC,
__BTREE_INSERT_NOUNLOCK,
__BTREE_INSERT_NOFAIL,
+ __BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
@@ -100,6 +101,8 @@ enum {
/* Don't check for -ENOSPC: */
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
+#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW)
+
/* for copygc, or when merging btree nodes */
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 7eca9203be01..0df894fcf1ae 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -628,7 +628,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
trans_for_each_entry(trans, i)
btree_insert_entry_checks(c, i);
- if (unlikely(!percpu_ref_tryget(&c->writes)))
+ if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
+ !percpu_ref_tryget(&c->writes)))
return -EROFS;
retry:
trans_for_each_iter(trans, i) {
@@ -658,7 +659,8 @@ retry:
trans_for_each_iter(trans, i)
bch2_btree_iter_downgrade(i->iter);
out:
- percpu_ref_put(&c->writes);
+ if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
+ percpu_ref_put(&c->writes);
/* make sure we didn't drop or screw up locks: */
trans_for_each_iter(trans, i) {
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 721d350f52e7..a1a9676f0048 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -382,7 +382,8 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
*old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
BUG_ON(!is_available_bucket(new));
- new.owned_by_allocator = 1;
+ new.owned_by_allocator = true;
+ new.dirty = true;
new.data_type = 0;
new.cached_sectors = 0;
new.dirty_sectors = 0;
@@ -455,6 +456,7 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
type != BCH_DATA_JOURNAL);
bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+ new.dirty = true;
new.data_type = type;
checked_add(new.dirty_sectors, sectors);
}));
@@ -480,13 +482,14 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
true);
} else {
struct bucket *g;
- struct bucket_mark old, new;
+ struct bucket_mark new;
rcu_read_lock();
g = bucket(ca, b);
- old = bucket_cmpxchg(g, new, ({
- new.data_type = type;
+ bucket_cmpxchg(g, new, ({
+ new.dirty = true;
+ new.data_type = type;
checked_add(new.dirty_sectors, sectors);
}));
@@ -537,6 +540,8 @@ static void bch2_mark_pointer(struct bch_fs *c,
do {
new.v.counter = old.v.counter = v;
+ new.dirty = true;
+
/*
* Check this after reading bucket mark to guard against
* the allocator invalidating a bucket after we've already
@@ -700,6 +705,7 @@ static void bucket_set_stripe(struct bch_fs *c,
BUG_ON(ptr_stale(ca, ptr));
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+ new.dirty = true;
new.stripe = enabled;
if (journal_seq) {
new.journal_seq_valid = 1;
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index fe93eb24dce9..6f3681728f0a 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -181,6 +181,8 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
+void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
+
static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage stats)
{
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index b19ed44138d3..46e4f61a1b2e 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -205,7 +205,9 @@ int bch2_congested(void *data, int bdi_bits)
static void __bch2_fs_read_only(struct bch_fs *c)
{
struct bch_dev *ca;
+ bool wrote;
unsigned i;
+ int ret;
bch2_rebalance_stop(c);
@@ -220,23 +222,36 @@ static void __bch2_fs_read_only(struct bch_fs *c)
*/
bch2_journal_flush_all_pins(&c->journal);
- for_each_member_device(ca, c, i)
- bch2_dev_allocator_stop(ca);
+ do {
+ ret = bch2_alloc_write(c, false, &wrote);
+ if (ret) {
+ bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
+ break;
+ }
- bch2_journal_flush_all_pins(&c->journal);
+ for_each_member_device(ca, c, i)
+ bch2_dev_allocator_quiesce(c, ca);
- /*
- * We need to explicitly wait on btree interior updates to complete
- * before stopping the journal, flushing all journal pins isn't
- * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
- * interior updates have to drop their journal pin before they're
- * fully complete:
- */
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
+ bch2_journal_flush_all_pins(&c->journal);
+
+ /*
+ * We need to explicitly wait on btree interior updates to complete
+ * before stopping the journal, flushing all journal pins isn't
+ * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
+ * interior updates have to drop their journal pin before they're
+ * fully complete:
+ */
+ closure_wait_event(&c->btree_interior_update_wait,
+ !bch2_btree_interior_updates_nr_pending(c));
+ } while (wrote);
+
+ for_each_member_device(ca, c, i)
+ bch2_dev_allocator_stop(ca);
bch2_fs_journal_stop(&c->journal);
+ /* XXX: mark super that alloc info is persistent */
+
/*
* the journal kicks off btree writes via reclaim - wait for in flight
* writes after stopping journal: