summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-05-23 02:31:33 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2021-05-30 15:04:50 -0400
commitf1e0dadb42c9d2945aac4b4b0143b8e85179dbd8 (patch)
treeb35f890cb9ddd0dab18d6192eb3c425cdf19bec6
parentc5750de4be351e20afffcf10cf6e333c031c6aa9 (diff)
bcachefs: Fsck for reflink refcounts
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/bcachefs.h11
-rw-r--r--fs/bcachefs/btree_gc.c202
-rw-r--r--fs/bcachefs/buckets.c152
-rw-r--r--fs/bcachefs/reflink.c2
-rw-r--r--fs/bcachefs/reflink.h24
5 files changed, 359 insertions, 32 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 56c53d78a892..8be95d81180f 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -390,6 +390,14 @@ struct gc_pos {
unsigned level;
};
+struct reflink_gc {
+ u64 offset;
+ u32 size;
+ u32 refcount;
+};
+
+typedef GENRADIX(struct reflink_gc) reflink_gc_table;
+
struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
@@ -805,6 +813,9 @@ struct bch_fs {
/* REFLINK */
u64 reflink_hint;
+ reflink_gc_table reflink_gc_table;
+ size_t reflink_gc_nr;
+ size_t reflink_gc_idx;
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index e28292e0aa89..b03432c13fbb 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -23,6 +23,7 @@
#include "keylist.h"
#include "move.h"
#include "recovery.h"
+#include "reflink.h"
#include "replicas.h"
#include "super-io.h"
@@ -1282,6 +1283,201 @@ static int bch2_gc_start(struct bch_fs *c,
return 0;
}
+static int bch2_gc_reflink_done_initial_fn(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct reflink_gc *r;
+ const __le64 *refcount = bkey_refcount_c(k);
+ char buf[200];
+ int ret = 0;
+
+ if (!refcount)
+ return 0;
+
+ r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
+ if (!r)
+ return -ENOMEM;
+
+ if (!r ||
+ r->offset != k.k->p.offset ||
+ r->size != k.k->size) {
+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
+ return -EINVAL;
+ }
+
+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
+ "reflink key has wrong refcount:\n"
+ " %s\n"
+ " should be %u",
+ (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
+ r->refcount)) {
+ struct bkey_i *new;
+
+ new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
+ if (!new) {
+ ret = -ENOMEM;
+ goto fsck_err;
+ }
+
+ bkey_reassemble(new, k);
+
+ if (!r->refcount) {
+ new->k.type = KEY_TYPE_deleted;
+ new->k.size = 0;
+ } else {
+ *bkey_refcount(new) = cpu_to_le64(r->refcount);
+ }
+
+ ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
+ if (ret)
+ kfree(new);
+ }
+fsck_err:
+ return ret;
+}
+
+static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
+ bool metadata_only)
+{
+ struct btree_trans trans;
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ struct reflink_gc *r;
+ size_t idx = 0;
+ char buf[200];
+ int ret = 0;
+
+ if (metadata_only)
+ return 0;
+
+ if (initial) {
+ c->reflink_gc_idx = 0;
+
+ ret = bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
+ bch2_gc_reflink_done_initial_fn);
+ goto out;
+ }
+
+ bch2_trans_init(&trans, c, 0, 0);
+
+ for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ret) {
+ const __le64 *refcount = bkey_refcount_c(k);
+
+ if (!refcount)
+ continue;
+
+ r = genradix_ptr(&c->reflink_gc_table, idx);
+ if (!r ||
+ r->offset != k.k->p.offset ||
+ r->size != k.k->size) {
+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
+ ret = -EINVAL;
+ break;
+ }
+
+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
+ "reflink key has wrong refcount:\n"
+ " %s\n"
+ " should be %u",
+ (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
+ r->refcount)) {
+ struct bkey_i *new;
+
+ new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
+ if (!new) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ bkey_reassemble(new, k);
+
+ if (!r->refcount)
+ new->k.type = KEY_TYPE_deleted;
+ else
+ *bkey_refcount(new) = cpu_to_le64(r->refcount);
+
+ ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+ __bch2_btree_insert(&trans, BTREE_ID_reflink, new));
+ kfree(new);
+
+ if (ret)
+ break;
+ }
+ }
+fsck_err:
+ bch2_trans_iter_put(&trans, iter);
+ bch2_trans_exit(&trans);
+out:
+ genradix_free(&c->reflink_gc_table);
+ c->reflink_gc_nr = 0;
+ return ret;
+}
+
+static int bch2_gc_reflink_start_initial_fn(struct bch_fs *c, struct bkey_s_c k)
+{
+
+ struct reflink_gc *r;
+ const __le64 *refcount = bkey_refcount_c(k);
+
+ if (!refcount)
+ return 0;
+
+ r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
+ GFP_KERNEL);
+ if (!r)
+ return -ENOMEM;
+
+ r->offset = k.k->p.offset;
+ r->size = k.k->size;
+ r->refcount = 0;
+ return 0;
+}
+
+static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
+ bool metadata_only)
+{
+ struct btree_trans trans;
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ struct reflink_gc *r;
+ int ret;
+
+ if (metadata_only)
+ return 0;
+
+ genradix_free(&c->reflink_gc_table);
+ c->reflink_gc_nr = 0;
+
+ if (initial)
+ return bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
+ bch2_gc_reflink_start_initial_fn);
+
+ bch2_trans_init(&trans, c, 0, 0);
+
+ for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ret) {
+ const __le64 *refcount = bkey_refcount_c(k);
+
+ if (!refcount)
+ continue;
+
+ r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
+ GFP_KERNEL);
+ if (!r) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ r->offset = k.k->p.offset;
+ r->size = k.k->size;
+ r->refcount = 0;
+ }
+ bch2_trans_iter_put(&trans, iter);
+
+ bch2_trans_exit(&trans);
+ return 0;
+}
+
/**
* bch2_gc - walk _all_ references to buckets, and recompute them:
*
@@ -1316,7 +1512,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
again:
- ret = bch2_gc_start(c, metadata_only);
+ ret = bch2_gc_start(c, metadata_only) ?:
+ bch2_gc_reflink_start(c, initial, metadata_only);
if (ret)
goto out;
@@ -1378,7 +1575,8 @@ out:
bch2_journal_block(&c->journal);
percpu_down_write(&c->mark_lock);
- ret = bch2_gc_done(c, initial, metadata_only);
+ ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
+ bch2_gc_done(c, initial, metadata_only);
bch2_journal_unblock(&c->journal);
} else {
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 282bca166b16..d07085a2fd1b 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -14,6 +14,7 @@
#include "ec.h"
#include "error.h"
#include "movinggc.h"
+#include "reflink.h"
#include "replicas.h"
#include <linux/preempt.h>
@@ -1072,6 +1073,124 @@ static int bch2_mark_stripe(struct bch_fs *c,
return 0;
}
+static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p,
+ u64 p_start, u64 p_end,
+ u64 v_start, u64 v_end)
+{
+ if (p_start == p_end)
+ return false;
+
+ p_start += le64_to_cpu(p.v->idx);
+ p_end += le64_to_cpu(p.v->idx);
+
+ if (p_end <= v_start)
+ return false;
+ if (p_start >= v_end)
+ return false;
+ return true;
+}
+
+static int reflink_p_frag_references(struct bkey_s_c_reflink_p p,
+ u64 start, u64 end,
+ struct bkey_s_c k)
+{
+ return __reflink_p_frag_references(p, start, end,
+ bkey_start_offset(k.k),
+ k.k->p.offset);
+}
+
+static int __bch2_mark_reflink_p(struct bch_fs *c,
+ struct bkey_s_c_reflink_p p,
+ u64 idx, unsigned sectors,
+ unsigned front_frag,
+ unsigned back_frag,
+ unsigned flags,
+ size_t *r_idx)
+{
+ struct reflink_gc *r;
+ int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
+ int frags_referenced;
+
+ while (1) {
+ if (*r_idx >= c->reflink_gc_nr)
+ goto not_found;
+ r = genradix_ptr(&c->reflink_gc_table, *r_idx);
+ BUG_ON(!r);
+
+ if (r->offset > idx)
+ break;
+ (*r_idx)++;
+ }
+
+ frags_referenced =
+ __reflink_p_frag_references(p, 0, front_frag,
+ r->offset - r->size, r->offset) +
+ __reflink_p_frag_references(p, back_frag, p.k->size,
+ r->offset - r->size, r->offset);
+
+ if (frags_referenced == 2) {
+ BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
+ add = -add;
+ } else if (frags_referenced == 1) {
+ BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
+ add = 0;
+ }
+
+ BUG_ON((s64) r->refcount + add < 0);
+
+ r->refcount += add;
+ return min_t(u64, sectors, r->offset - idx);
+not_found:
+ bch2_fs_inconsistent(c,
+ "%llu:%llu len %u points to nonexistent indirect extent %llu",
+ p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ bch2_inconsistent_error(c);
+ return -EIO;
+}
+
+static int bch2_mark_reflink_p(struct bch_fs *c,
+ struct bkey_s_c_reflink_p p, unsigned offset,
+ s64 sectors, unsigned flags)
+{
+ u64 idx = le64_to_cpu(p.v->idx) + offset;
+ struct reflink_gc *ref;
+ size_t l, r, m;
+ unsigned front_frag, back_frag;
+ s64 ret = 0;
+
+ if (sectors < 0)
+ sectors = -sectors;
+
+ BUG_ON(offset + sectors > p.k->size);
+
+ front_frag = offset;
+ back_frag = offset + sectors;
+
+ l = 0;
+ r = c->reflink_gc_nr;
+ while (l < r) {
+ m = l + (r - l) / 2;
+
+ ref = genradix_ptr(&c->reflink_gc_table, m);
+ if (ref->offset <= idx)
+ l = m + 1;
+ else
+ r = m;
+ }
+
+ while (sectors) {
+ ret = __bch2_mark_reflink_p(c, p, idx, sectors,
+ front_frag, back_frag, flags, &l);
+ if (ret < 0)
+ return ret;
+
+ idx += ret;
+ sectors -= ret;
+ }
+
+ return 0;
+}
+
static int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c old,
struct bkey_s_c new,
@@ -1127,6 +1246,10 @@ static int bch2_mark_key_locked(struct bch_fs *c,
fs_usage->persistent_reserved[replicas - 1] += sectors;
break;
}
+ case KEY_TYPE_reflink_p:
+ ret = bch2_mark_reflink_p(c, bkey_s_c_to_reflink_p(k),
+ offset, sectors, flags);
+ break;
}
preempt_enable();
@@ -1689,35 +1812,6 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
return ret;
}
-static __le64 *bkey_refcount(struct bkey_i *k)
-{
- switch (k->k.type) {
- case KEY_TYPE_reflink_v:
- return &bkey_i_to_reflink_v(k)->v.refcount;
- case KEY_TYPE_indirect_inline_data:
- return &bkey_i_to_indirect_inline_data(k)->v.refcount;
- default:
- return NULL;
- }
-}
-
-static bool reflink_p_frag_references(struct bkey_s_c_reflink_p p,
- u64 start, u64 end,
- struct bkey_s_c k)
-{
- if (start == end)
- return false;
-
- start += le64_to_cpu(p.v->idx);
- end += le64_to_cpu(p.v->idx);
-
- if (end <= bkey_start_offset(k.k))
- return false;
- if (start >= k.k->p.offset)
- return false;
- return true;
-}
-
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index c624fabe1e1c..e986b5284d37 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
- refcount = (void *) &r_v->v;
+ refcount = bkey_refcount(r_v);
*refcount = 0;
memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
index 9d5e7dc58f2b..bfc785619ee8 100644
--- a/fs/bcachefs/reflink.h
+++ b/fs/bcachefs/reflink.h
@@ -34,6 +34,30 @@ void bch2_indirect_inline_data_to_text(struct printbuf *,
.val_to_text = bch2_indirect_inline_data_to_text, \
}
+static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_reflink_v:
+ return &bkey_s_c_to_reflink_v(k).v->refcount;
+ case KEY_TYPE_indirect_inline_data:
+ return &bkey_s_c_to_indirect_inline_data(k).v->refcount;
+ default:
+ return NULL;
+ }
+}
+
+static inline __le64 *bkey_refcount(struct bkey_i *k)
+{
+ switch (k->k.type) {
+ case KEY_TYPE_reflink_v:
+ return &bkey_i_to_reflink_v(k)->v.refcount;
+ case KEY_TYPE_indirect_inline_data:
+ return &bkey_i_to_indirect_inline_data(k)->v.refcount;
+ default:
+ return NULL;
+ }
+}
+
s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
u64, u64 *, u64, s64 *);