summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-10-27 18:56:21 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2020-11-05 12:45:14 -0500
commit8ded23970ab1c66ff0c6086987c092788b3e34a8 (patch)
tree1e4d6df6763f95644323a2ee8fe99d2d157f6af4
parent32d5b836d68773e49d65ad72c0bf8144ed27e478 (diff)
bcachefs: Inode create optimization
On workloads that do a lot of multithreaded creates all at once, lock contention on the inodes btree turns out to still be an issue. This patch adds a small buffer of inode numbers that are known to be free, so that we can avoid touching the btree on every create. Also, this changes inode creates to update via the btree key cache for the initial create. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/fs-common.c4
-rw-r--r--fs/bcachefs/inode.c137
-rw-r--r--fs/bcachefs/inode.h4
-rw-r--r--fs/bcachefs/super.c1
5 files changed, 100 insertions, 50 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 29f411635f29..8d775ec0bbdd 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -801,6 +801,10 @@ struct bch_fs {
struct mutex verify_lock;
#endif
+ struct mutex inode_create_lock;
+ unsigned unused_inodes_nr;
+ u64 unused_inodes[64];
+ u32 unused_inodes_gens[64];
u64 unused_inode_hint;
/*
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
index 878419d40992..503ce1920f39 100644
--- a/fs/bcachefs/fs-common.c
+++ b/fs/bcachefs/fs-common.c
@@ -34,9 +34,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
if (!name)
new_inode->bi_flags |= BCH_INODE_UNLINKED;
- ret = bch2_inode_create(trans, new_inode,
- BLOCKDEV_INODE_MAX, 0,
- &c->unused_inode_hint);
+ ret = bch2_inode_create(trans, new_inode);
if (ret)
goto err;
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index a988f0ea4eff..9a0991adf550 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -361,71 +361,120 @@ static inline u32 bkey_generation(struct bkey_s_c k)
}
}
-int bch2_inode_create(struct btree_trans *trans,
- struct bch_inode_unpacked *inode_u,
- u64 min, u64 max, u64 *hint)
+static int scan_free_inums(struct btree_trans *trans)
{
- struct bkey_inode_buf *inode_p;
+ struct bch_fs *c = trans->c;
struct btree_iter *iter = NULL;
struct bkey_s_c k;
- u64 start;
- int ret;
-
- if (!max)
- max = ULLONG_MAX;
-
- if (trans->c->opts.inodes_32bit)
- max = min_t(u64, max, U32_MAX);
+ u64 min = BLOCKDEV_INODE_MAX;
+ u64 max = c->opts.inodes_32bit
+ ? S32_MAX : S64_MAX;
+ u64 start = max(min, READ_ONCE(c->unused_inode_hint));
+ int ret = 0;
+
+ iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, start),
+ BTREE_ITER_SLOTS);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+again:
+ for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
+ if (bkey_cmp(iter->pos, POS(0, max)) > 0)
+ break;
- start = READ_ONCE(*hint);
+ /*
+ * This doesn't check the btree key cache, but we don't care:
+ * we have to recheck with an intent lock held on the slot we're
+ * inserting to anyways:
+ */
+ if (k.k->type != KEY_TYPE_inode) {
+ if (c->unused_inodes_nr < ARRAY_SIZE(c->unused_inodes)) {
+ c->unused_inodes[c->unused_inodes_nr] = k.k->p.offset;
+ c->unused_inodes_gens[c->unused_inodes_nr] = bkey_generation(k);
+ c->unused_inodes_nr++;
+ }
+
+ if (c->unused_inodes_nr == ARRAY_SIZE(c->unused_inodes))
+ goto out;
+ }
+ }
- if (start >= max || start < min)
+ if (!ret && start != min) {
+ max = start;
start = min;
+ bch2_btree_iter_set_pos(iter, POS(0, start));
+ goto again;
+ }
+out:
+ c->unused_inode_hint = iter->pos.offset;
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
+int bch2_inode_create(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode_u)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_inode_buf *inode_p;
+ struct btree_iter *iter = NULL;
+ struct bkey_s_c k;
+ u64 inum;
+ u32 generation;
+ int ret = 0;
inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
if (IS_ERR(inode_p))
return PTR_ERR(inode_p);
-again:
- for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
- if (bkey_cmp(iter->pos, POS(0, max)) > 0)
- break;
- /*
- * There's a potential cache coherency issue with the btree key
- * cache code here - we're iterating over the btree, skipping
- * that cache. We should never see an empty slot that isn't
- * actually empty due to a pending update in the key cache
- * because the update that creates the inode isn't done with a
- * cached iterator, but - better safe than sorry, check the
- * cache before using a slot:
- */
- if (k.k->type != KEY_TYPE_inode &&
- !bch2_btree_key_cache_find(trans->c, BTREE_ID_INODES, iter->pos))
- goto found_slot;
+ iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS_MIN,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+retry:
+ if (!mutex_trylock(&c->inode_create_lock)) {
+ bch2_trans_unlock(trans);
+ mutex_lock(&c->inode_create_lock);
+ if (!bch2_trans_relock(trans)) {
+ mutex_unlock(&c->inode_create_lock);
+ ret = -EINTR;
+ goto err;
+ }
}
- bch2_trans_iter_put(trans, iter);
+ if (!c->unused_inodes_nr)
+ ret = scan_free_inums(trans);
+ if (!ret && !c->unused_inodes_nr)
+ ret = -ENOSPC;
+ if (!ret) {
+ --c->unused_inodes_nr;
+ inum = c->unused_inodes[c->unused_inodes_nr];
+ generation = c->unused_inodes_gens[c->unused_inodes_nr];
+ }
+
+ mutex_unlock(&c->inode_create_lock);
if (ret)
- return ret;
+ goto err;
- if (start != min) {
- /* Retry from start */
- start = min;
- goto again;
- }
+ bch2_btree_iter_set_pos(iter, POS(0, inum));
+
+ /* Recheck that the slot is free with an intent lock held: */
+ k = bch2_btree_iter_peek_cached(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type == KEY_TYPE_inode)
+ goto retry;
- return -ENOSPC;
-found_slot:
- *hint = k.k->p.offset;
- inode_u->bi_inum = k.k->p.offset;
- inode_u->bi_generation = bkey_generation(k);
+ inode_u->bi_inum = inum;
+ inode_u->bi_generation = generation;
bch2_inode_pack(inode_p, inode_u);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
+err:
bch2_trans_iter_put(trans, iter);
- return 0;
+ return ret;
}
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index bb759a46dc41..5743be2307f3 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -60,9 +60,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
uid_t, gid_t, umode_t, dev_t,
struct bch_inode_unpacked *);
-int bch2_inode_create(struct btree_trans *,
- struct bch_inode_unpacked *,
- u64, u64, u64 *);
+int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *);
int bch2_inode_rm(struct bch_fs *, u64);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 015bbd9f21fd..6064eff12cd9 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -696,6 +696,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
seqcount_init(&c->usage_lock);
sema_init(&c->io_in_flight, 64);
+ mutex_init(&c->inode_create_lock);
c->copy_gc_enabled = 1;
c->rebalance.enabled = 1;