summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-03-29 22:22:45 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2019-04-04 12:31:42 -0400
commitd51031ccebeace92c9ec3745f3045f0085ec4e20 (patch)
tree6b10445a44c666f916cbf81746b4c4615c8c3e8f
parent69fd98eb7f996837d529992db75700f61ffb33c0 (diff)
bcachefs: add ability to run gc on metadata only
-rw-r--r--fs/bcachefs/alloc_background.c5
-rw-r--r--fs/bcachefs/btree_gc.c97
-rw-r--r--fs/bcachefs/btree_gc.h2
-rw-r--r--fs/bcachefs/buckets.c46
-rw-r--r--fs/bcachefs/buckets.h2
-rw-r--r--fs/bcachefs/buckets_types.h1
-rw-r--r--fs/bcachefs/recovery.c2
-rw-r--r--fs/bcachefs/sysfs.c2
8 files changed, 93 insertions, 64 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 18afef2e4a81..5a3065688341 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -290,8 +290,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
}
- for_each_member_device(ca, c, i)
- bch2_dev_usage_from_buckets(c, ca);
+ percpu_down_write(&c->mark_lock);
+ bch2_dev_usage_from_buckets(c);
+ percpu_up_write(&c->mark_lock);
mutex_lock(&c->bucket_clock[READ].lock);
for_each_member_device(ca, c, i) {
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 8b9b96253073..3feea91e6aab 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -201,7 +201,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
- bool initial)
+ bool initial, bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
@@ -221,7 +221,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
* and on startup, we have to read every btree node (XXX: only if it was
* an unclean shutdown)
*/
- if (initial || expensive_debug_checks(c))
+ if (metadata_only)
+ depth = 1;
+ else if (initial || expensive_debug_checks(c))
depth = 0;
btree_node_range_checks_init(&r, depth);
@@ -277,7 +279,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
}
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
- bool initial)
+ bool initial, bool metadata_only)
{
enum btree_id ids[BTREE_ID_NR];
u8 max_stale;
@@ -291,11 +293,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
enum btree_id id = ids[i];
enum btree_node_type type = __btree_node_type(0, id);
- int ret = bch2_gc_btree(c, id, initial);
+ int ret = bch2_gc_btree(c, id, initial, metadata_only);
if (ret)
return ret;
- if (journal && btree_node_type_needs_gc(type)) {
+ if (journal && !metadata_only &&
+ btree_node_type_needs_gc(type)) {
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
@@ -478,11 +481,13 @@ static void bch2_gc_free(struct bch_fs *c)
c->usage[1] = NULL;
}
-static int bch2_gc_done(struct bch_fs *c, bool initial)
+static int bch2_gc_done(struct bch_fs *c,
+ bool initial, bool metadata_only)
{
struct bch_dev *ca;
- bool verify = !initial ||
- (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
+ bool verify = !metadata_only &&
+ (!initial ||
+ (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
unsigned i;
int ret = 0;
@@ -517,7 +522,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_fs_field(_f, _msg, ...) \
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
- {
+ if (!metadata_only) {
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
@@ -569,26 +574,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
}
};
- for_each_member_device(ca, c, i) {
- unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
- struct bch_dev_usage *dst = (void *)
- bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
- struct bch_dev_usage *src = (void *)
- bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
- unsigned b;
-
- for (b = 0; b < BCH_DATA_NR; b++)
- copy_dev_field(buckets[b], "buckets[%s]",
- bch2_data_types[b]);
- copy_dev_field(buckets_alloc, "buckets_alloc");
- copy_dev_field(buckets_ec, "buckets_ec");
- copy_dev_field(buckets_unavailable, "buckets_unavailable");
-
- for (b = 0; b < BCH_DATA_NR; b++)
- copy_dev_field(sectors[b], "sectors[%s]",
- bch2_data_types[b]);
- copy_dev_field(sectors_fragmented, "sectors_fragmented");
- }
+ bch2_dev_usage_from_buckets(c);
{
unsigned nr = fs_usage_u64s(c);
@@ -598,20 +584,29 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
copy_fs_field(hidden, "hidden");
- copy_fs_field(data, "data");
- copy_fs_field(cached, "cached");
- copy_fs_field(reserved, "reserved");
- copy_fs_field(nr_inodes, "nr_inodes");
+ copy_fs_field(btree, "btree");
- for (i = 0; i < BCH_REPLICAS_MAX; i++)
- copy_fs_field(persistent_reserved[i],
- "persistent_reserved[%i]", i);
+ if (!metadata_only) {
+ copy_fs_field(data, "data");
+ copy_fs_field(cached, "cached");
+ copy_fs_field(reserved, "reserved");
+ copy_fs_field(nr_inodes,"nr_inodes");
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
+ copy_fs_field(persistent_reserved[i],
+ "persistent_reserved[%i]", i);
+ }
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
char buf[80];
+ if (metadata_only &&
+ (e->data_type == BCH_DATA_USER ||
+ e->data_type == BCH_DATA_CACHED))
+ continue;
+
bch2_replicas_entry_to_text(&PBUF(buf), e);
copy_fs_field(replicas[i], "%s", buf);
@@ -627,7 +622,8 @@ fsck_err:
return ret;
}
-static int bch2_gc_start(struct bch_fs *c)
+static int bch2_gc_start(struct bch_fs *c,
+ bool metadata_only)
{
struct bch_dev *ca;
unsigned i;
@@ -673,10 +669,18 @@ static int bch2_gc_start(struct bch_fs *c)
dst->nbuckets = src->nbuckets;
for (b = 0; b < src->nbuckets; b++) {
- dst->b[b]._mark.gen =
- dst->b[b].oldest_gen =
- src->b[b].mark.gen;
- dst->b[b].gen_valid = src->b[b].gen_valid;
+ struct bucket *d = &dst->b[b];
+ struct bucket *s = &src->b[b];
+
+ d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
+ d->gen_valid = s->gen_valid;
+
+ if (metadata_only &&
+ (s->mark.data_type == BCH_DATA_USER ||
+ s->mark.data_type == BCH_DATA_CACHED)) {
+ d->_mark = s->mark;
+ d->_mark.owned_by_allocator = 0;
+ }
}
};
@@ -701,7 +705,8 @@ static int bch2_gc_start(struct bch_fs *c)
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/
-int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
+int bch2_gc(struct bch_fs *c, struct list_head *journal,
+ bool initial, bool metadata_only)
{
struct bch_dev *ca;
u64 start_time = local_clock();
@@ -713,7 +718,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
down_write(&c->gc_lock);
again:
percpu_down_write(&c->mark_lock);
- ret = bch2_gc_start(c);
+ ret = bch2_gc_start(c, metadata_only);
percpu_up_write(&c->mark_lock);
if (ret)
@@ -721,7 +726,7 @@ again:
bch2_mark_superblocks(c);
- ret = bch2_gc_btrees(c, journal, initial);
+ ret = bch2_gc_btrees(c, journal, initial, metadata_only);
if (ret)
goto out;
@@ -755,7 +760,7 @@ out:
percpu_down_write(&c->mark_lock);
if (!ret)
- ret = bch2_gc_done(c, initial);
+ ret = bch2_gc_done(c, initial, metadata_only);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
@@ -1157,7 +1162,7 @@ static int bch2_gc_thread(void *arg)
last = atomic_long_read(&clock->now);
last_kick = atomic_read(&c->kick_gc);
- ret = bch2_gc(c, NULL, false);
+ ret = bch2_gc(c, NULL, false, false);
if (ret)
bch_err(c, "btree gc failed: %i", ret);
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index df51eb83ab49..9e067deb8c0f 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -4,7 +4,7 @@
#include "btree_types.h"
void bch2_coalesce(struct bch_fs *);
-int bch2_gc(struct bch_fs *, struct list_head *, bool);
+int bch2_gc(struct bch_fs *, struct list_head *, bool, bool);
void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index ba2fe909cb0a..4fa131a113eb 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -131,6 +131,8 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
switch (e->data_type) {
case BCH_DATA_BTREE:
+ usage->btree += usage->replicas[i];
+ break;
case BCH_DATA_USER:
usage->data += usage->replicas[i];
break;
@@ -225,6 +227,7 @@ static u64 avail_factor(u64 r)
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
return min(fs_usage->hidden +
+ fs_usage->btree +
fs_usage->data +
reserve_factor(fs_usage->reserved +
fs_usage->online_reserved),
@@ -240,7 +243,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
ret.capacity = c->capacity -
percpu_u64_get(&c->usage[0]->hidden);
- data = percpu_u64_get(&c->usage[0]->data);
+ data = percpu_u64_get(&c->usage[0]->data) +
+ percpu_u64_get(&c->usage[0]->btree);
reserved = percpu_u64_get(&c->usage[0]->reserved) +
percpu_u64_get(&c->usage[0]->online_reserved);
@@ -383,21 +387,32 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_wake_allocator(ca);
}
-void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
+void bch2_dev_usage_from_buckets(struct bch_fs *c)
{
+ struct bch_dev *ca;
struct bucket_mark old = { .v.counter = 0 };
struct bch_fs_usage *fs_usage;
struct bucket_array *buckets;
struct bucket *g;
+ unsigned i;
+ int cpu;
- percpu_down_read_preempt_disable(&c->mark_lock);
- fs_usage = this_cpu_ptr(c->usage[0]);
- buckets = bucket_array(ca);
+ percpu_u64_set(&c->usage[0]->hidden, 0);
- for_each_bucket(g, buckets)
- if (g->mark.data_type)
- bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
- percpu_up_read_preempt_enable(&c->mark_lock);
+ for_each_member_device(ca, c, i) {
+ for_each_possible_cpu(cpu)
+ memset(per_cpu_ptr(ca->usage[0], cpu), 0,
+ sizeof(*ca->usage[0]));
+
+ preempt_disable();
+ fs_usage = this_cpu_ptr(c->usage[0]);
+ buckets = bucket_array(ca);
+
+ for_each_bucket(g, buckets)
+ bch2_dev_usage_update(c, ca, fs_usage,
+ old, g->mark, false);
+ preempt_enable();
+ }
}
#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
@@ -418,10 +433,17 @@ static inline void update_replicas(struct bch_fs *c,
BUG_ON(idx < 0);
BUG_ON(!sectors);
- if (r->data_type == BCH_DATA_CACHED)
- fs_usage->cached += sectors;
- else
+ switch (r->data_type) {
+ case BCH_DATA_BTREE:
+ fs_usage->btree += sectors;
+ break;
+ case BCH_DATA_USER:
fs_usage->data += sectors;
+ break;
+ case BCH_DATA_CACHED:
+ fs_usage->cached += sectors;
+ break;
+ }
fs_usage->replicas[idx] += sectors;
}
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 1fc02831f6ab..1033398e6c4b 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -173,7 +173,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
-void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
+void bch2_dev_usage_from_buckets(struct bch_fs *);
static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage stats)
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index 869a13146c29..2a1fd7a7ec20 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -69,6 +69,7 @@ struct bch_fs_usage {
u64 gc_start[0];
u64 hidden;
+ u64 btree;
u64 data;
u64 cached;
u64 reserved;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 2da19fe2d920..1cb78b6fd4b0 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -360,7 +360,7 @@ int bch2_fs_recovery(struct bch_fs *c)
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_verbose(c, "starting mark and sweep:");
err = "error in recovery";
- ret = bch2_gc(c, &journal, true);
+ ret = bch2_gc(c, &journal, true, false);
if (ret)
goto err;
bch_verbose(c, "mark and sweep done");
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index f97315139c71..7069bea565c2 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -496,7 +496,7 @@ STORE(__bch2_fs)
bch2_coalesce(c);
if (attr == &sysfs_trigger_gc)
- bch2_gc(c, NULL, false);
+ bch2_gc(c, NULL, false, false);
if (attr == &sysfs_trigger_alloc_write) {
bool wrote;