summaryrefslogtreecommitdiff
path: root/libbcachefs/alloc_foreground.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/alloc_foreground.c')
-rw-r--r--libbcachefs/alloc_foreground.c211
1 files changed, 70 insertions, 141 deletions
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 32f2a241..095bfe7c 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -156,12 +156,24 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
return ob;
}
+static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
+{
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs)
+ return false;
+
+ return bch2_is_superblock_bucket(ca, b);
+}
+
static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
{
BUG_ON(c->open_buckets_partial_nr >=
ARRAY_SIZE(c->open_buckets_partial));
spin_lock(&c->freelist_lock);
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
+ rcu_read_unlock();
+
ob->on_partial_list = true;
c->open_buckets_partial[c->open_buckets_partial_nr++] =
ob - c->open_buckets;
@@ -171,20 +183,6 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
closure_wake_up(&c->freelist_wait);
}
-/* _only_ for allocating the journal on a new device: */
-long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-{
- while (ca->new_fs_bucket_idx < ca->mi.nbuckets) {
- u64 b = ca->new_fs_bucket_idx++;
-
- if (!is_superblock_bucket(ca, b) &&
- (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse)))
- return b;
- }
-
- return -1;
-}
-
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{
switch (watermark) {
@@ -203,14 +201,16 @@ static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
}
static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
- u64 bucket,
+ u64 bucket, u8 gen,
enum bch_watermark watermark,
- const struct bch_alloc_v4 *a,
struct bucket_alloc_state *s,
struct closure *cl)
{
struct open_bucket *ob;
+ if (unlikely(is_superblock_bucket(c, ca, bucket)))
+ return NULL;
+
if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
s->skipped_nouse++;
return NULL;
@@ -257,7 +257,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
ob->valid = true;
ob->sectors_free = ca->mi.bucket_size;
ob->dev = ca->dev_idx;
- ob->gen = a->gen;
+ ob->gen = gen;
ob->bucket = bucket;
spin_unlock(&ob->lock);
@@ -272,111 +272,26 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
}
static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
- enum bch_watermark watermark, u64 free_entry,
+ enum bch_watermark watermark,
struct bucket_alloc_state *s,
- struct bkey_s_c freespace_k,
+ struct btree_iter *freespace_iter,
struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter = { NULL };
- struct bkey_s_c k;
- struct open_bucket *ob;
- struct bch_alloc_v4 a_convert;
- const struct bch_alloc_v4 *a;
- u64 b = free_entry & ~(~0ULL << 56);
- unsigned genbits = free_entry >> 56;
- struct printbuf buf = PRINTBUF;
- int ret;
-
- if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) {
- prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n"
- " freespace key ",
- ca->mi.first_bucket, ca->mi.nbuckets);
- bch2_bkey_val_to_text(&buf, c, freespace_k);
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- ob = ERR_PTR(-EIO);
- goto err;
- }
-
- k = bch2_bkey_get_iter(trans, &iter,
- BTREE_ID_alloc, POS(ca->dev_idx, b),
- BTREE_ITER_cached);
- ret = bkey_err(k);
- if (ret) {
- ob = ERR_PTR(ret);
- goto err;
- }
+ u64 b = freespace_iter->pos.offset & ~(~0ULL << 56);
+ u8 gen;
- a = bch2_alloc_to_v4(k, &a_convert);
-
- if (a->data_type != BCH_DATA_free) {
- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
- ob = NULL;
- goto err;
- }
-
- prt_printf(&buf, "non free bucket in freespace btree\n"
- " freespace key ");
- bch2_bkey_val_to_text(&buf, c, freespace_k);
- prt_printf(&buf, "\n ");
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- ob = ERR_PTR(-EIO);
- goto err;
- }
-
- if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
- " freespace key ",
- genbits, alloc_freespace_genbits(*a) >> 56);
- bch2_bkey_val_to_text(&buf, c, freespace_k);
- prt_printf(&buf, "\n ");
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- ob = ERR_PTR(-EIO);
- goto err;
- }
-
- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) {
- struct bch_backpointer bp;
- struct bpos bp_pos = POS_MIN;
-
- ret = bch2_get_next_backpointer(trans, ca, POS(ca->dev_idx, b), -1,
- &bp_pos, &bp,
- BTREE_ITER_nopreserve);
- if (ret) {
- ob = ERR_PTR(ret);
- goto err;
- }
-
- if (!bkey_eq(bp_pos, POS_MAX)) {
- /*
- * Bucket may have data in it - we don't call
- * bc2h_trans_inconnsistent() because fsck hasn't
- * finished yet
- */
- ob = NULL;
- goto err;
- }
- }
+ int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret)
+ return NULL;
- ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl);
- if (!ob)
- bch2_set_btree_iter_dontneed(&iter);
-err:
- if (iter.path)
- bch2_set_btree_iter_dontneed(&iter);
- bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
- return ob;
+ return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl);
}
/*
* This path is for before the freespace btree is initialized:
- *
- * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
- * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
*/
static noinline struct open_bucket *
bch2_bucket_alloc_early(struct btree_trans *trans,
@@ -388,7 +303,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
struct btree_iter iter, citer;
struct bkey_s_c k, ck;
struct open_bucket *ob = NULL;
- u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+ u64 first_bucket = ca->mi.first_bucket;
u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap];
u64 alloc_start = max(first_bucket, *dev_alloc_cursor);
u64 alloc_cursor = alloc_start;
@@ -411,10 +326,6 @@ again:
if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
break;
- if (ca->new_fs_bucket_idx &&
- is_superblock_bucket(ca, k.k->p.offset))
- continue;
-
if (s->btree_bitmap != BTREE_BITMAP_ANY &&
s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
@@ -448,7 +359,7 @@ again:
s->buckets_seen++;
- ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
+ ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, a->gen, watermark, s, cl);
next:
bch2_set_btree_iter_dontneed(&citer);
bch2_trans_iter_exit(trans, &citer);
@@ -485,20 +396,21 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor));
u64 alloc_cursor = alloc_start;
int ret;
-
- BUG_ON(ca->new_fs_bucket_idx);
again:
- for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
- POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
- if (k.k->p.inode != ca->dev_idx)
- break;
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace,
+ POS(ca->dev_idx, alloc_cursor),
+ POS(ca->dev_idx, U64_MAX),
+ 0, k, ret) {
+ /*
+ * peek normally dosen't trim extents - they can span iter.pos,
+ * which is not what we want here:
+ */
+ iter.k.size = iter.k.p.offset - iter.pos.offset;
- for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
- alloc_cursor < k.k->p.offset;
- alloc_cursor++) {
+ while (iter.k.size) {
s->buckets_seen++;
- u64 bucket = alloc_cursor & ~(~0ULL << 56);
+ u64 bucket = iter.pos.offset & ~(~0ULL << 56);
if (s->btree_bitmap != BTREE_BITMAP_ANY &&
s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
@@ -507,32 +419,36 @@ again:
goto fail;
bucket = sector_to_bucket(ca,
- round_up(bucket_to_sector(ca, bucket) + 1,
+ round_up(bucket_to_sector(ca, bucket + 1),
1ULL << ca->mi.btree_bitmap_shift));
- u64 genbits = alloc_cursor >> 56;
- alloc_cursor = bucket | (genbits << 56);
+ alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56));
- if (alloc_cursor > k.k->p.offset)
- bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor));
+ bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor));
s->skipped_mi_btree_bitmap++;
- continue;
+ goto next;
}
- ob = try_alloc_bucket(trans, ca, watermark,
- alloc_cursor, s, k, cl);
+ ob = try_alloc_bucket(trans, ca, watermark, s, &iter, cl);
if (ob) {
+ if (!IS_ERR(ob))
+ *dev_alloc_cursor = iter.pos.offset;
bch2_set_btree_iter_dontneed(&iter);
break;
}
- }
+ iter.k.size--;
+ iter.pos.offset++;
+ }
+next:
if (ob || ret)
break;
}
fail:
bch2_trans_iter_exit(trans, &iter);
- if (!ob && ret)
+ BUG_ON(ob && ret);
+
+ if (ret)
ob = ERR_PTR(ret);
if (!ob && alloc_start > ca->mi.first_bucket) {
@@ -540,8 +456,6 @@ fail:
goto again;
}
- *dev_alloc_cursor = alloc_cursor;
-
return ob;
}
@@ -591,6 +505,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca,
* @watermark: how important is this allocation?
* @data_type: BCH_DATA_journal, btree, user...
* @cl: if not NULL, closure to be used to wait if buckets not available
+ * @nowait: if true, do not wait for buckets to become available
* @usage: for secondarily also returning the current device usage
*
* Returns: an open_bucket on success, or an ERR_PTR() on failure.
@@ -625,6 +540,10 @@ again:
bch2_dev_do_invalidates(ca);
if (!avail) {
+ if (watermark > BCH_WATERMARK_normal &&
+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
+ goto alloc;
+
if (cl && !waiting) {
closure_wait(&c->freelist_wait, cl);
waiting = true;
@@ -972,7 +891,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
u64 avail;
bch2_dev_usage_read_fast(ca, &usage);
- avail = dev_buckets_free(ca, usage, watermark);
+ avail = dev_buckets_free(ca, usage, watermark) + ca->nr_partial_buckets;
if (!avail)
continue;
@@ -981,6 +900,10 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
i);
ob->on_partial_list = false;
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
+ rcu_read_unlock();
+
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, ob);
@@ -1191,7 +1114,13 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
--c->open_buckets_partial_nr;
swap(c->open_buckets_partial[i],
c->open_buckets_partial[c->open_buckets_partial_nr]);
+
ob->on_partial_list = false;
+
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
+ rcu_read_unlock();
+
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);