summaryrefslogtreecommitdiff
path: root/libbcachefs/alloc_background.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/alloc_background.c')
-rw-r--r--libbcachefs/alloc_background.c237
1 files changed, 176 insertions, 61 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index df340ebb..688a53b4 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -38,6 +38,15 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
#undef x
};
+struct bkey_alloc_buf {
+ struct bkey_i k;
+ struct bch_alloc_v3 v;
+
+#define x(_name, _bits) + _bits / 8
+ u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
+#undef x
+} __attribute__((packed, aligned(8)));
+
/* Persistent alloc info: */
static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
@@ -244,25 +253,24 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret;
}
-struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
- const struct bkey_alloc_unpacked src)
+static void bch2_alloc_pack(struct bch_fs *c,
+ struct bkey_alloc_buf *dst,
+ const struct bkey_alloc_unpacked src)
{
- struct bkey_alloc_buf *dst;
-
- dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
- if (!IS_ERR(dst))
- bch2_alloc_pack_v3(dst, src);
-
- return dst;
+ bch2_alloc_pack_v3(dst, src);
}
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_alloc_unpacked *u, unsigned trigger_flags)
{
- struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
+ struct bkey_alloc_buf *a;
+
+ a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
+ if (IS_ERR(a))
+ return PTR_ERR(a);
- return PTR_ERR_OR_ZERO(a) ?:
- bch2_trans_update(trans, iter, &a->k, trigger_flags);
+ bch2_alloc_pack(trans->c, a, *u);
+ return bch2_trans_update(trans, iter, &a->k, trigger_flags);
}
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
@@ -332,7 +340,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
#undef x
}
-int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
+int bch2_alloc_read(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter;
@@ -343,43 +351,108 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
int ret;
bch2_trans_init(&trans, c, 0, 0);
+ down_read(&c->gc_lock);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
+ if (!bkey_is_alloc(k.k))
+ continue;
+
ca = bch_dev_bkey_exists(c, k.k->p.inode);
- g = __bucket(ca, k.k->p.offset, gc);
+ g = bucket(ca, k.k->p.offset);
u = bch2_alloc_unpack(k);
- if (!gc)
- *bucket_gen(ca, k.k->p.offset) = u.gen;
-
+ *bucket_gen(ca, k.k->p.offset) = u.gen;
g->_mark.gen = u.gen;
+ g->_mark.data_type = u.data_type;
+ g->_mark.dirty_sectors = u.dirty_sectors;
+ g->_mark.cached_sectors = u.cached_sectors;
+ g->_mark.stripe = u.stripe != 0;
+ g->stripe = u.stripe;
+ g->stripe_redundancy = u.stripe_redundancy;
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
- g->oldest_gen = !gc ? u.oldest_gen : u.gen;
+ g->oldest_gen = u.oldest_gen;
g->gen_valid = 1;
-
- if (!gc ||
- (metadata_only &&
- (u.data_type == BCH_DATA_user ||
- u.data_type == BCH_DATA_cached ||
- u.data_type == BCH_DATA_parity))) {
- g->_mark.data_type = u.data_type;
- g->_mark.dirty_sectors = u.dirty_sectors;
- g->_mark.cached_sectors = u.cached_sectors;
- g->_mark.stripe = u.stripe != 0;
- g->stripe = u.stripe;
- g->stripe_redundancy = u.stripe_redundancy;
- }
-
}
bch2_trans_iter_exit(&trans, &iter);
+ up_read(&c->gc_lock);
bch2_trans_exit(&trans);
- if (ret)
+ if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c k;
+ struct bkey_alloc_unpacked old_u, new_u;
+ int ret;
+retry:
+ bch2_trans_begin(trans);
+ ret = bch2_btree_key_cache_flush(trans,
+ BTREE_ID_alloc, iter->pos);
+ if (ret)
+ goto err;
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ old_u = bch2_alloc_unpack(k);
+ new_u = alloc_mem_to_key(c, iter);
+
+ if (!bkey_alloc_unpacked_cmp(old_u, new_u))
+ return 0;
+
+ ret = bch2_alloc_write(trans, iter, &new_u,
+ BTREE_TRIGGER_NORUN) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|flags);
+err:
+ if (ret == -EINTR)
+ goto retry;
+ return ret;
+}
+
+int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bch_dev *ca;
+ unsigned i;
+ int ret = 0;
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ for_each_member_device(ca, c, i) {
+ bch2_btree_iter_set_pos(&iter,
+ POS(ca->dev_idx, ca->mi.first_bucket));
+
+ while (iter.pos.offset < ca->mi.nbuckets) {
+ ret = bch2_alloc_write_key(&trans, &iter, flags);
+ if (ret) {
+ percpu_ref_put(&ca->ref);
+ goto err;
+ }
+ bch2_btree_iter_advance(&iter);
+ }
+ }
+err:
+ bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_exit(&trans);
return ret;
}
@@ -390,20 +463,19 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct bkey_s_c k;
struct bkey_alloc_unpacked u;
u64 *time, now;
int ret = 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
+ ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto out;
- u = bch2_alloc_unpack(k);
+ u = alloc_mem_to_key(c, &iter);
time = rw == READ ? &u.read_time : &u.write_time;
now = atomic64_read(&c->io_clock[rw].now);
@@ -586,34 +658,56 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
return nr;
}
+/*
+ * returns sequence number of most recent journal entry that updated this
+ * bucket:
+ */
+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
+{
+ if (m.journal_seq_valid) {
+ u64 journal_seq = atomic64_read(&c->journal.seq);
+ u64 bucket_seq = journal_seq;
+
+ bucket_seq &= ~((u64) U16_MAX);
+ bucket_seq |= m.journal_seq;
+
+ if (bucket_seq > journal_seq)
+ bucket_seq -= 1 << 16;
+
+ return bucket_seq;
+ } else {
+ return 0;
+ }
+}
+
static int bucket_invalidate_btree(struct btree_trans *trans,
- struct bch_dev *ca, u64 b,
- struct bkey_alloc_unpacked *u)
+ struct bch_dev *ca, u64 b)
{
struct bch_fs *c = trans->c;
+ struct bkey_alloc_unpacked u;
struct btree_iter iter;
- struct bkey_s_c k;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ca->dev_idx, b),
BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
+ ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto err;
- *u = bch2_alloc_unpack(k);
- u->gen++;
- u->data_type = 0;
- u->dirty_sectors = 0;
- u->cached_sectors = 0;
- u->read_time = atomic64_read(&c->io_clock[READ].now);
- u->write_time = atomic64_read(&c->io_clock[WRITE].now);
+ u = alloc_mem_to_key(c, &iter);
+
+ u.gen++;
+ u.data_type = 0;
+ u.dirty_sectors = 0;
+ u.cached_sectors = 0;
+ u.read_time = atomic64_read(&c->io_clock[READ].now);
+ u.write_time = atomic64_read(&c->io_clock[WRITE].now);
- ret = bch2_alloc_write(trans, &iter, u,
+ ret = bch2_alloc_write(trans, &iter, &u,
BTREE_TRIGGER_BUCKET_INVALIDATE);
err:
bch2_trans_iter_exit(trans, &iter);
@@ -623,23 +717,21 @@ err:
static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
u64 *journal_seq, unsigned flags)
{
- struct bkey_alloc_unpacked u;
+ struct bucket *g;
+ struct bucket_mark m;
size_t b;
int ret = 0;
- /*
- * If the read-only path is trying to shut down, we can't be generating
- * new btree updates:
- */
- if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags))
- return 1;
-
BUG_ON(!ca->alloc_heap.used ||
!ca->alloc_heap.data[0].nr);
b = ca->alloc_heap.data[0].bucket;
/* first, put on free_inc and mark as owned by allocator: */
percpu_down_read(&c->mark_lock);
+ g = bucket(ca, b);
+ m = READ_ONCE(g->mark);
+
+ BUG_ON(m.dirty_sectors);
bch2_mark_alloc_bucket(c, ca, b, true);
@@ -648,15 +740,38 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(!fifo_push(&ca->free_inc, b));
spin_unlock(&c->freelist_lock);
+ /*
+ * If we're not invalidating cached data, we only increment the bucket
+ * gen in memory here, the incremented gen will be updated in the btree
+ * by bch2_trans_mark_pointer():
+ */
+ if (!m.cached_sectors &&
+ !bucket_needs_journal_commit(m, c->journal.last_seq_ondisk)) {
+ BUG_ON(m.data_type);
+ bucket_cmpxchg(g, m, m.gen++);
+ *bucket_gen(ca, b) = m.gen;
+ percpu_up_read(&c->mark_lock);
+ goto out;
+ }
+
percpu_up_read(&c->mark_lock);
+ /*
+ * If the read-only path is trying to shut down, we can't be generating
+ * new btree updates:
+ */
+ if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
+ ret = 1;
+ goto out;
+ }
+
ret = bch2_trans_do(c, NULL, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED|
flags,
- bucket_invalidate_btree(&trans, ca, b, &u));
-
+ bucket_invalidate_btree(&trans, ca, b));
+out:
if (!ret) {
/* remove from alloc_heap: */
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
@@ -672,7 +787,7 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
* bucket (i.e. deleting the last reference) before writing to
* this bucket again:
*/
- *journal_seq = max(*journal_seq, u.journal_seq);
+ *journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
} else {
size_t b2;