summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-02-10 19:34:47 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2019-04-17 16:55:43 -0400
commit073cdc0311f6be9afef05966a22747036e7da3bc (patch)
tree9711f17389ab57f18cd29ea17057a2a1bc37c0ca
parent03229fa3667ea960fc3a6e8878265792e84e2dac (diff)
bcachefs: Write out fs usage consistently
-rw-r--r--fs/bcachefs/bcachefs.h3
-rw-r--r--fs/bcachefs/btree_gc.c28
-rw-r--r--fs/bcachefs/btree_update_interior.c6
-rw-r--r--fs/bcachefs/buckets.c135
-rw-r--r--fs/bcachefs/buckets.h9
-rw-r--r--fs/bcachefs/journal_io.c3
-rw-r--r--fs/bcachefs/recovery.c8
-rw-r--r--fs/bcachefs/replicas.c107
-rw-r--r--fs/bcachefs/super-io.c29
-rw-r--r--fs/bcachefs/super-io.h2
-rw-r--r--fs/bcachefs/super.c11
-rw-r--r--fs/bcachefs/sysfs.c33
-rw-r--r--fs/bcachefs/util.h8
13 files changed, 240 insertions, 142 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index e675d22cfe4d..d6dc3bd457d3 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -642,7 +642,10 @@ struct bch_fs {
struct percpu_rw_semaphore mark_lock;
+ seqcount_t usage_lock;
+ struct bch_fs_usage *usage_base;
struct bch_fs_usage __percpu *usage[2];
+ struct bch_fs_usage __percpu *usage_gc;
/* single element mempool: */
struct mutex usage_scratch_lock;
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 2abf4d5baf60..9f0de5cd25ab 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -492,8 +492,8 @@ static void bch2_gc_free(struct bch_fs *c)
ca->usage[1] = NULL;
}
- free_percpu(c->usage[1]);
- c->usage[1] = NULL;
+ free_percpu(c->usage_gc);
+ c->usage_gc = NULL;
}
static int bch2_gc_done(struct bch_fs *c,
@@ -589,14 +589,16 @@ static int bch2_gc_done(struct bch_fs *c,
}
};
+ bch2_fs_usage_acc_to_base(c, 0);
+ bch2_fs_usage_acc_to_base(c, 1);
+
bch2_dev_usage_from_buckets(c);
{
unsigned nr = fs_usage_u64s(c);
- struct bch_fs_usage *dst = (void *)
- bch2_acc_percpu_u64s((void *) c->usage[0], nr);
+ struct bch_fs_usage *dst = c->usage_base;
struct bch_fs_usage *src = (void *)
- bch2_acc_percpu_u64s((void *) c->usage[1], nr);
+ bch2_acc_percpu_u64s((void *) c->usage_gc, nr);
copy_fs_field(hidden, "hidden");
copy_fs_field(btree, "btree");
@@ -649,11 +651,11 @@ static int bch2_gc_start(struct bch_fs *c,
*/
gc_pos_set(c, gc_phase(GC_PHASE_START));
- BUG_ON(c->usage[1]);
+ BUG_ON(c->usage_gc);
- c->usage[1] = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
+ c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
sizeof(u64), GFP_KERNEL);
- if (!c->usage[1])
+ if (!c->usage_gc)
return -ENOMEM;
for_each_member_device(ca, c, i) {
@@ -772,11 +774,17 @@ out:
ret = -EINVAL;
}
- percpu_down_write(&c->mark_lock);
+ if (!ret) {
+ bch2_journal_block(&c->journal);
- if (!ret)
+ percpu_down_write(&c->mark_lock);
ret = bch2_gc_done(c, initial, metadata_only);
+ bch2_journal_unblock(&c->journal);
+ } else {
+ percpu_down_write(&c->mark_lock);
+ }
+
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 19ba667be9bc..fb6bf79a1a8a 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1084,7 +1084,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
bch2_btree_node_free_index(as, NULL,
bkey_i_to_s_c(&old->key),
fs_usage);
- bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
+ bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, 0);
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
@@ -1189,7 +1189,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bkey_disassemble(b, k, &tmp),
fs_usage);
- bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
+ bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, 0);
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
@@ -2003,7 +2003,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bch2_btree_node_free_index(as, NULL,
bkey_i_to_s_c(&b->key),
fs_usage);
- bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
+ bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, 0);
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 3d019e75a9ec..3834b150a0d1 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -119,8 +119,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
unsigned i;
percpu_down_write(&c->mark_lock);
- usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0],
- fs_usage_u64s(c));
+ usage = c->usage_base;
+
+ bch2_fs_usage_acc_to_base(c, 0);
+ bch2_fs_usage_acc_to_base(c, 1);
for (i = 0; i < BCH_REPLICAS_MAX; i++)
usage->reserved += usage->persistent_reserved[i];
@@ -188,12 +190,40 @@ struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
return ret;
}
+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
+ unsigned journal_seq,
+ bool gc)
+{
+ return this_cpu_ptr(gc
+ ? c->usage_gc
+ : c->usage[journal_seq & 1]);
+}
+
+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
+{
+ ssize_t offset = v - (u64 *) c->usage_base;
+ unsigned seq;
+ u64 ret;
+
+ BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
+ percpu_rwsem_assert_held(&c->mark_lock);
+
+ do {
+ seq = read_seqcount_begin(&c->usage_lock);
+ ret = *v +
+ percpu_u64_get((u64 __percpu *) c->usage[0] + offset) +
+ percpu_u64_get((u64 __percpu *) c->usage[1] + offset);
+ } while (read_seqcount_retry(&c->usage_lock, seq));
+
+ return ret;
+}
+
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
{
struct bch_fs_usage *ret;
- unsigned v, u64s = fs_usage_u64s(c);
+ unsigned seq, v, u64s = fs_usage_u64s(c);
retry:
- ret = kzalloc(u64s * sizeof(u64), GFP_NOFS);
+ ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
if (unlikely(!ret))
return NULL;
@@ -207,11 +237,70 @@ retry:
goto retry;
}
- acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
+ do {
+ seq = read_seqcount_begin(&c->usage_lock);
+ memcpy(ret, c->usage_base, u64s * sizeof(u64));
+ acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
+ acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[1], u64s);
+ } while (read_seqcount_retry(&c->usage_lock, seq));
return ret;
}
+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
+{
+ unsigned u64s = fs_usage_u64s(c);
+
+ BUG_ON(idx >= 2);
+
+ write_seqcount_begin(&c->usage_lock);
+
+ acc_u64s_percpu((u64 *) c->usage_base,
+ (u64 __percpu *) c->usage[idx], u64s);
+ percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
+
+ write_seqcount_end(&c->usage_lock);
+}
+
+void bch2_fs_usage_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_fs_usage *fs_usage)
+{
+ unsigned i;
+
+ pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
+
+ pr_buf(out, "hidden:\t\t\t\t%llu\n",
+ fs_usage->hidden);
+ pr_buf(out, "data:\t\t\t\t%llu\n",
+ fs_usage->data);
+ pr_buf(out, "cached:\t\t\t\t%llu\n",
+ fs_usage->cached);
+ pr_buf(out, "reserved:\t\t\t%llu\n",
+ fs_usage->reserved);
+ pr_buf(out, "nr_inodes:\t\t\t%llu\n",
+ fs_usage->nr_inodes);
+ pr_buf(out, "online reserved:\t\t%llu\n",
+ fs_usage->online_reserved);
+
+ for (i = 0;
+ i < ARRAY_SIZE(fs_usage->persistent_reserved);
+ i++) {
+ pr_buf(out, "%u replicas:\n", i + 1);
+ pr_buf(out, "\treserved:\t\t%llu\n",
+ fs_usage->persistent_reserved[i]);
+ }
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ pr_buf(out, "\t");
+ bch2_replicas_entry_to_text(out, e);
+ pr_buf(out, ":\t%llu\n", fs_usage->replicas[i]);
+ }
+}
+
#define RESERVE_FACTOR 6
static u64 reserve_factor(u64 r)
@@ -241,17 +330,17 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
u64 data, reserved;
ret.capacity = c->capacity -
- percpu_u64_get(&c->usage[0]->hidden);
+ bch2_fs_usage_read_one(c, &c->usage_base->hidden);
- data = percpu_u64_get(&c->usage[0]->data) +
- percpu_u64_get(&c->usage[0]->btree);
- reserved = percpu_u64_get(&c->usage[0]->reserved) +
- percpu_u64_get(&c->usage[0]->online_reserved);
+ data = bch2_fs_usage_read_one(c, &c->usage_base->data) +
+ bch2_fs_usage_read_one(c, &c->usage_base->btree);
+ reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
+ bch2_fs_usage_read_one(c, &c->usage_base->online_reserved);
ret.used = min(ret.capacity, data + reserve_factor(reserved));
ret.free = ret.capacity - ret.used;
- ret.nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
+ ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
return ret;
}
@@ -300,7 +389,8 @@ static bool bucket_became_unavailable(struct bucket_mark old,
int bch2_fs_usage_apply(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
- struct disk_reservation *disk_res)
+ struct disk_reservation *disk_res,
+ unsigned journal_seq)
{
s64 added = fs_usage->data + fs_usage->reserved;
s64 should_not_have_added;
@@ -326,7 +416,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
}
preempt_disable();
- acc_u64s((u64 *) this_cpu_ptr(c->usage[0]),
+ acc_u64s((u64 *) fs_usage_ptr(c, journal_seq, false),
(u64 *) fs_usage, fs_usage_u64s(c));
preempt_enable();
@@ -391,27 +481,23 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c)
{
struct bch_dev *ca;
struct bucket_mark old = { .v.counter = 0 };
- struct bch_fs_usage *fs_usage;
struct bucket_array *buckets;
struct bucket *g;
unsigned i;
int cpu;
- percpu_u64_set(&c->usage[0]->hidden, 0);
+ c->usage_base->hidden = 0;
for_each_member_device(ca, c, i) {
for_each_possible_cpu(cpu)
memset(per_cpu_ptr(ca->usage[0], cpu), 0,
sizeof(*ca->usage[0]));
- preempt_disable();
- fs_usage = this_cpu_ptr(c->usage[0]);
buckets = bucket_array(ca);
for_each_bucket(g, buckets)
- bch2_dev_usage_update(c, ca, fs_usage,
+ bch2_dev_usage_update(c, ca, c->usage_base,
old, g->mark, false);
- preempt_enable();
}
}
@@ -475,7 +561,7 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct bucket_mark *ret,
bool gc)
{
- struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
+ struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
@@ -514,7 +600,7 @@ static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, bool owned_by_allocator,
bool gc)
{
- struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
+ struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
@@ -622,7 +708,7 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
old.dirty_sectors, sectors);
if (c)
- bch2_dev_usage_update(c, ca, this_cpu_ptr(c->usage[gc]),
+ bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc),
old, new, gc);
return 0;
@@ -958,7 +1044,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
preempt_disable();
if (!fs_usage || gc)
- fs_usage = this_cpu_ptr(c->usage[gc]);
+ fs_usage = fs_usage_ptr(c, journal_seq, gc);
switch (k.k->type) {
case KEY_TYPE_alloc:
@@ -1125,7 +1211,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
char buf[200];
- if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) ||
+ if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res,
+ trans->journal_res.seq) ||
warned_disk_usage ||
xchg(&warned_disk_usage, 1))
return;
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index e87ca1746d82..0aada931322f 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -221,8 +221,15 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c)
void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
+
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
+
+void bch2_fs_usage_to_text(struct printbuf *,
+ struct bch_fs *, struct bch_fs_usage *);
+
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
struct bch_fs_usage_short
@@ -251,7 +258,7 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct bch_fs_usage *,
u64, unsigned);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
- struct disk_reservation *);
+ struct disk_reservation *, unsigned);
int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
struct bkey_s_c, struct bkey_i *,
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 9e038a2b710c..73068c35b23d 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -993,7 +993,8 @@ void bch2_journal_write(struct closure *cl)
j->write_start_time = local_clock();
start = vstruct_last(jset);
- end = bch2_journal_super_entries_add_common(c, start);
+ end = bch2_journal_super_entries_add_common(c, start,
+ le64_to_cpu(jset->seq));
u64s = (u64 *) end - (u64 *) start;
BUG_ON(u64s > j->entry_u64s_reserved);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 5466e5e15eb7..3f60b91629b3 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -403,13 +403,11 @@ static int journal_replay_entry_early(struct bch_fs *c,
switch (entry->btree_id) {
case FS_USAGE_RESERVED:
if (entry->level < BCH_REPLICAS_MAX)
- percpu_u64_set(&c->usage[0]->
- persistent_reserved[entry->level],
- le64_to_cpu(u->v));
+ c->usage_base->persistent_reserved[entry->level] =
+ le64_to_cpu(u->v);
break;
case FS_USAGE_INODES:
- percpu_u64_set(&c->usage[0]->nr_inodes,
- le64_to_cpu(u->v));
+ c->usage_base->nr_inodes = le64_to_cpu(u->v);
break;
case FS_USAGE_KEY_VERSION:
atomic64_set(&c->key_version,
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 833fa14319d4..c2aa330c9932 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -1,5 +1,6 @@
#include "bcachefs.h"
+#include "buckets.h"
#include "journal.h"
#include "replicas.h"
#include "super-io.h"
@@ -234,20 +235,13 @@ bool bch2_replicas_marked(struct bch_fs *c,
return marked;
}
-static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
+static void __replicas_table_update(struct bch_fs_usage *dst,
struct bch_replicas_cpu *dst_r,
- struct bch_fs_usage __percpu *src_p,
+ struct bch_fs_usage *src,
struct bch_replicas_cpu *src_r)
{
- unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
- struct bch_fs_usage *dst, *src = (void *)
- bch2_acc_percpu_u64s((void *) src_p, src_nr);
int src_idx, dst_idx;
- preempt_disable();
- dst = this_cpu_ptr(dst_p);
- preempt_enable();
-
*dst = *src;
for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
@@ -262,6 +256,22 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
}
}
+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
+ struct bch_replicas_cpu *dst_r,
+ struct bch_fs_usage __percpu *src_p,
+ struct bch_replicas_cpu *src_r)
+{
+ unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
+ struct bch_fs_usage *dst, *src = (void *)
+ bch2_acc_percpu_u64s((void *) src_p, src_nr);
+
+ preempt_disable();
+ dst = this_cpu_ptr(dst_p);
+ preempt_enable();
+
+ __replicas_table_update(dst, dst_r, src, src_r);
+}
+
/*
* Resize filesystem accounting:
*/
@@ -270,34 +280,48 @@ static int replicas_table_update(struct bch_fs *c,
{
struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
struct bch_fs_usage *new_scratch = NULL;
+ struct bch_fs_usage __percpu *new_gc = NULL;
+ struct bch_fs_usage *new_base = NULL;
unsigned bytes = sizeof(struct bch_fs_usage) +
sizeof(u64) * new_r->nr;
int ret = -ENOMEM;
- if (!(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
+ if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
+ !(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
+ GFP_NOIO)) ||
+ !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
GFP_NOIO)) ||
- (c->usage[1] &&
- !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
- GFP_NOIO))) ||
- !(new_scratch = kmalloc(bytes, GFP_NOIO)))
+ !(new_scratch = kmalloc(bytes, GFP_NOIO)) ||
+ (c->usage_gc &&
+ !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO))))
goto err;
+ if (c->usage_base)
+ __replicas_table_update(new_base, new_r,
+ c->usage_base, &c->replicas);
if (c->usage[0])
- __replicas_table_update(new_usage[0], new_r,
- c->usage[0], &c->replicas);
+ __replicas_table_update_pcpu(new_usage[0], new_r,
+ c->usage[0], &c->replicas);
if (c->usage[1])
- __replicas_table_update(new_usage[1], new_r,
- c->usage[1], &c->replicas);
+ __replicas_table_update_pcpu(new_usage[1], new_r,
+ c->usage[1], &c->replicas);
+ if (c->usage_gc)
+ __replicas_table_update_pcpu(new_gc, new_r,
+ c->usage_gc, &c->replicas);
+ swap(c->usage_base, new_base);
swap(c->usage[0], new_usage[0]);
swap(c->usage[1], new_usage[1]);
swap(c->usage_scratch, new_scratch);
+ swap(c->usage_gc, new_gc);
swap(c->replicas, *new_r);
ret = 0;
err:
+ free_percpu(new_gc);
kfree(new_scratch);
free_percpu(new_usage[1]);
free_percpu(new_usage[0]);
+ kfree(new_base);
return ret;
}
@@ -456,9 +480,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
lockdep_assert_held(&c->replicas_gc_lock);
mutex_lock(&c->sb_lock);
-
- if (ret)
- goto err;
+ percpu_down_write(&c->mark_lock);
/*
* this is kind of crappy; the replicas gc mechanism needs to be ripped
@@ -469,26 +491,20 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
struct bch_replicas_cpu n;
- u64 v;
-
- if (__replicas_has_entry(&c->replicas_gc, e))
- continue;
- v = percpu_u64_get(&c->usage[0]->replicas[i]);
- if (!v)
- continue;
+ if (!__replicas_has_entry(&c->replicas_gc, e) &&
+ (c->usage_base->replicas[i] ||
+ percpu_u64_get(&c->usage[0]->replicas[i]) ||
+ percpu_u64_get(&c->usage[1]->replicas[i]))) {
+ n = cpu_replicas_add_entry(&c->replicas_gc, e);
+ if (!n.entries) {
+ ret = -ENOSPC;
+ goto err;
+ }
- n = cpu_replicas_add_entry(&c->replicas_gc, e);
- if (!n.entries) {
- ret = -ENOSPC;
- goto err;
+ swap(n, c->replicas_gc);
+ kfree(n.entries);
}
-
- percpu_down_write(&c->mark_lock);
- swap(n, c->replicas_gc);
- percpu_up_write(&c->mark_lock);
-
- kfree(n.entries);
}
if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
@@ -496,19 +512,18 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
goto err;
}
- bch2_write_super(c);
-
- /* don't update in memory replicas until changes are persistent */
+ ret = replicas_table_update(c, &c->replicas_gc);
err:
- percpu_down_write(&c->mark_lock);
- if (!ret)
- ret = replicas_table_update(c, &c->replicas_gc);
-
kfree(c->replicas_gc.entries);
c->replicas_gc.entries = NULL;
+
percpu_up_write(&c->mark_lock);
+ if (!ret)
+ bch2_write_super(c);
+
mutex_unlock(&c->sb_lock);
+
return ret;
}
@@ -575,7 +590,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
BUG_ON(ret < 0);
}
- percpu_u64_set(&c->usage[0]->replicas[idx], sectors);
+ c->usage_base->replicas[idx] = sectors;
return 0;
}
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index c0471c891efd..c675280cfe03 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1,5 +1,6 @@
#include "bcachefs.h"
+#include "buckets.h"
#include "checksum.h"
#include "disk_groups.h"
#include "ec.h"
@@ -953,7 +954,8 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
struct jset_entry *
bch2_journal_super_entries_add_common(struct bch_fs *c,
- struct jset_entry *entry)
+ struct jset_entry *entry,
+ u64 journal_seq)
{
struct btree_root *r;
unsigned i;
@@ -976,10 +978,16 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
mutex_unlock(&c->btree_root_lock);
- percpu_down_read_preempt_disable(&c->mark_lock);
+ percpu_down_write(&c->mark_lock);
+
+ if (!journal_seq) {
+ bch2_fs_usage_acc_to_base(c, 0);
+ bch2_fs_usage_acc_to_base(c, 1);
+ } else {
+ bch2_fs_usage_acc_to_base(c, journal_seq & 1);
+ }
{
- u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
@@ -987,7 +995,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
u->entry.type = BCH_JSET_ENTRY_usage;
u->entry.btree_id = FS_USAGE_INODES;
- u->v = cpu_to_le64(nr_inodes);
+ u->v = cpu_to_le64(c->usage_base->nr_inodes);
entry = vstruct_next(entry);
}
@@ -1008,17 +1016,13 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
- u64 sectors = percpu_u64_get(&c->usage[0]->persistent_reserved[i]);
-
- if (!sectors)
- continue;
memset(u, 0, sizeof(*u));
u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
u->entry.type = BCH_JSET_ENTRY_usage;
u->entry.btree_id = FS_USAGE_RESERVED;
u->entry.level = i;
- u->v = sectors;
+ u->v = cpu_to_le64(c->usage_base->persistent_reserved[i]);
entry = vstruct_next(entry);
}
@@ -1026,7 +1030,6 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
- u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);
@@ -1034,13 +1037,13 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
u->entry.u64s = DIV_ROUND_UP(sizeof(*u) + e->nr_devs,
sizeof(u64)) - 1;
u->entry.type = BCH_JSET_ENTRY_data_usage;
- u->v = cpu_to_le64(sectors);
+ u->v = cpu_to_le64(c->usage_base->replicas[i]);
memcpy(&u->r, e, replicas_entry_bytes(e));
entry = vstruct_next(entry);
}
- percpu_up_read_preempt_enable(&c->mark_lock);
+ percpu_up_write(&c->mark_lock);
return entry;
}
@@ -1076,7 +1079,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
entry = sb_clean->start;
- entry = bch2_journal_super_entries_add_common(c, entry);
+ entry = bch2_journal_super_entries_add_common(c, entry, 0);
BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
memset(entry, 0,
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index aa91b8216082..cf25b44a077e 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -136,7 +136,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
struct jset_entry *
bch2_journal_super_entries_add_common(struct bch_fs *,
- struct jset_entry *);
+ struct jset_entry *, u64);
void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index dad012e5ee53..dd6440e4db17 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -496,7 +496,9 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->mark_lock);
kfree(c->usage_scratch);
+ free_percpu(c->usage[1]);
free_percpu(c->usage[0]);
+ kfree(c->usage_base);
free_percpu(c->pcpu);
mempool_exit(&c->btree_iters_pool);
mempool_exit(&c->btree_bounce_pool);
@@ -690,6 +692,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
seqcount_init(&c->gc_pos_lock);
+ seqcount_init(&c->usage_lock);
+
c->copy_gc_enabled = 1;
c->rebalance.enabled = 1;
c->promote_whole_extents = true;
@@ -1469,13 +1473,8 @@ err:
static void dev_usage_clear(struct bch_dev *ca)
{
struct bucket_array *buckets;
- int cpu;
- for_each_possible_cpu(cpu) {
- struct bch_dev_usage *p =
- per_cpu_ptr(ca->usage[0], cpu);
- memset(p, 0, sizeof(*p));
- }
+ percpu_memset(ca->usage[0], 0, sizeof(*ca->usage[0]));
down_read(&ca->bucket_lock);
buckets = bucket_array(ca);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 817975a6b81b..c2744c7dd2ba 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -235,42 +235,11 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
{
struct printbuf out = _PBUF(buf, PAGE_SIZE);
struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
- unsigned i;
if (!fs_usage)
return -ENOMEM;
- pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
-
- pr_buf(&out, "hidden:\t\t\t\t%llu\n",
- fs_usage->hidden);
- pr_buf(&out, "data:\t\t\t\t%llu\n",
- fs_usage->data);
- pr_buf(&out, "cached:\t\t\t\t%llu\n",
- fs_usage->cached);
- pr_buf(&out, "reserved:\t\t\t%llu\n",
- fs_usage->reserved);
- pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
- fs_usage->nr_inodes);
- pr_buf(&out, "online reserved:\t\t%llu\n",
- fs_usage->online_reserved);
-
- for (i = 0;
- i < ARRAY_SIZE(fs_usage->persistent_reserved);
- i++) {
- pr_buf(&out, "%u replicas:\n", i + 1);
- pr_buf(&out, "\treserved:\t\t%llu\n",
- fs_usage->persistent_reserved[i]);
- }
-
- for (i = 0; i < c->replicas.nr; i++) {
- struct bch_replicas_entry *e =
- cpu_replicas_entry(&c->replicas, i);
-
- pr_buf(&out, "\t");
- bch2_replicas_entry_to_text(&out, e);
- pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
- }
+ bch2_fs_usage_to_text(&out, c, fs_usage);
percpu_up_read_preempt_enable(&c->mark_lock);
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 093eafa31274..f7a35880b0cf 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -738,6 +738,14 @@ static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
}
+static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ memset(per_cpu_ptr(p, cpu), c, bytes);
+}
+
u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
#define cmp_int(l, r) ((l > r) - (l < r))