summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-06-02 17:04:20 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-06-03 21:01:03 -0400
commit365f2957b1ba1375264dda9309cc9ea9ec5992d8 (patch)
tree23908819d2a4d92d9a3d422fd35552a40984e547
parent3756273fe2dd369f2a8ee7a3ba09df97510d02e3 (diff)
bcachefs: implement BTREE_INSERT_NOUNLOCK
BTREE_INSERT_NOUNLOCK means after a sucessful btree update, do not drop any locks (e.g. while merging nodes). This is going to be used to fix some locking primarily related to bi_size in bch_inode_info.
-rw-r--r--fs/bcachefs/btree_cache.c51
-rw-r--r--fs/bcachefs/btree_cache.h2
-rw-r--r--fs/bcachefs/btree_gc.c2
-rw-r--r--fs/bcachefs/btree_update.h40
-rw-r--r--fs/bcachefs/btree_update_interior.c125
-rw-r--r--fs/bcachefs/btree_update_interior.h38
-rw-r--r--fs/bcachefs/btree_update_leaf.c299
7 files changed, 350 insertions, 207 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index c950f2564f25..4292dd7fff8e 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -649,7 +649,14 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
struct btree *b;
struct bset_tree *t;
- /* btree_node_fill() requires parent to be locked: */
+ /*
+ * XXX: locking optimization
+ *
+ * we can make the locking looser here - caller can drop lock on parent
+ * node before locking child node (and potentially blocking): we just
+ * have to have bch2_btree_node_fill() call relock on the parent and
+ * return -EINTR if that fails
+ */
EBUG_ON(!btree_node_locked(iter, level + 1));
EBUG_ON(level >= BTREE_MAX_DEPTH);
retry:
@@ -749,6 +756,7 @@ retry:
struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree_iter *iter,
struct btree *b,
+ bool may_drop_locks,
enum btree_node_sibling sib)
{
struct btree *parent;
@@ -785,26 +793,47 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
ret = bch2_btree_node_get(c, iter, &tmp.k, level, SIX_LOCK_intent);
- if (IS_ERR(ret) && PTR_ERR(ret) == -EINTR) {
- btree_node_unlock(iter, level);
-
+ if (PTR_ERR_OR_ZERO(ret) == -EINTR && may_drop_locks) {
if (!bch2_btree_node_relock(iter, level + 1)) {
bch2_btree_iter_set_locks_want(iter, level + 2);
return ERR_PTR(-EINTR);
}
- ret = bch2_btree_node_get(c, iter, &tmp.k, level, SIX_LOCK_intent);
- }
+#if 0
+ /*
+ * we might have got -EINTR because trylock failed, and we're
+ * holding other locks that would cause us to deadlock:
+ *
+ * to do this correctly, we need a bch2_btree_iter_relock()
+ * that relocks all linked iters
+ */
+ struct btree_iter *linked;
- if (!bch2_btree_node_relock(iter, level)) {
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+ for_each_linked_btree_iter(iter, linked) {
+ if (btree_iter_cmp(iter, linked) < 0)
+ __bch2_btree_iter_unlock(linked);
- if (!IS_ERR(ret)) {
- six_unlock_intent(&ret->lock);
- ret = ERR_PTR(-EINTR);
+ }
+#endif
+ if (sib == btree_prev_sib)
+ btree_node_unlock(iter, level);
+
+ ret = bch2_btree_node_get(c, iter, &tmp.k, level,
+ SIX_LOCK_intent);
+
+ if (!bch2_btree_node_relock(iter, level)) {
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+
+ if (!IS_ERR(ret)) {
+ six_unlock_intent(&ret->lock);
+ ret = ERR_PTR(-EINTR);
+ }
}
}
+ BUG_ON((!may_drop_locks || !IS_ERR(ret)) &&
+ !btree_node_locked(iter, level));
+
return ret;
}
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index e021d6e9422a..43109d086479 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -26,7 +26,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
enum six_lock_type);
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
- struct btree *,
+ struct btree *, bool,
enum btree_node_sibling);
void bch2_btree_node_prefetch(struct bch_fs *, const struct bkey_i *,
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 02b14e38ffda..0904f445e375 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -780,7 +780,7 @@ next:
bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key);
/* Insert the newly coalesced nodes */
- bch2_btree_insert_node(as, parent, iter, &keylist);
+ bch2_btree_insert_node(as, parent, iter, &keylist, 0);
BUG_ON(!bch2_keylist_empty(&keylist));
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index f357095d5b4f..aac97958cc3b 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -85,31 +85,49 @@ int __bch2_btree_insert_at(struct btree_insert *);
__VA_ARGS__ \
}})
+enum {
+ __BTREE_INSERT_ATOMIC,
+ __BTREE_INSERT_NOUNLOCK,
+ __BTREE_INSERT_NOFAIL,
+ __BTREE_INSERT_USE_RESERVE,
+ __BTREE_INSERT_USE_ALLOC_RESERVE,
+ __BTREE_INSERT_JOURNAL_REPLAY,
+ __BTREE_INSERT_NOWAIT,
+ __BTREE_INSERT_GC_LOCK_HELD,
+ __BCH_HASH_SET_MUST_CREATE,
+ __BCH_HASH_SET_MUST_REPLACE,
+};
+
+/*
+ * Don't drop/retake locks before doing btree update, instead return -EINTR if
+ * we had to drop locks for any reason
+ */
+#define BTREE_INSERT_ATOMIC (1 << __BTREE_INSERT_ATOMIC)
+
/*
- * Don't drop/retake locks: instead return -EINTR if need to upgrade to intent
- * locks, -EAGAIN if need to wait on btree reserve
+ * Don't drop locks _after_ successfully updating btree:
*/
-#define BTREE_INSERT_ATOMIC (1 << 0)
+#define BTREE_INSERT_NOUNLOCK (1 << __BTREE_INSERT_NOUNLOCK)
/* Don't check for -ENOSPC: */
-#define BTREE_INSERT_NOFAIL (1 << 1)
+#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
/* for copygc, or when merging btree nodes */
-#define BTREE_INSERT_USE_RESERVE (1 << 2)
-#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << 3)
+#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
+#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
/*
* Insert is for journal replay: don't get journal reservations, or mark extents
* (bch_mark_key)
*/
-#define BTREE_INSERT_JOURNAL_REPLAY (1 << 4)
+#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
/* Don't block on allocation failure (for new btree nodes: */
-#define BTREE_INSERT_NOWAIT (1 << 5)
-#define BTREE_INSERT_GC_LOCK_HELD (1 << 6)
+#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
+#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
-#define BCH_HASH_SET_MUST_CREATE (1 << 7)
-#define BCH_HASH_SET_MUST_REPLACE (1 << 8)
+#define BCH_HASH_SET_MUST_CREATE (1 << __BCH_HASH_SET_MUST_CREATE)
+#define BCH_HASH_SET_MUST_REPLACE (1 << __BCH_HASH_SET_MUST_REPLACE)
int bch2_btree_delete_at(struct btree_iter *, unsigned);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 92e19c4eae7e..65b345db2931 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1362,7 +1362,8 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
}
static void btree_split(struct btree_update *as, struct btree *b,
- struct btree_iter *iter, struct keylist *keys)
+ struct btree_iter *iter, struct keylist *keys,
+ unsigned flags)
{
struct bch_fs *c = as->c;
struct btree *parent = btree_node_parent(iter, b);
@@ -1425,7 +1426,7 @@ static void btree_split(struct btree_update *as, struct btree *b,
if (parent) {
/* Split a non root node */
- bch2_btree_insert_node(as, parent, iter, &as->parent_keys);
+ bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
} else if (n3) {
bch2_btree_set_root(as, n3, iter);
} else {
@@ -1511,7 +1512,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
* for leaf nodes -- inserts into interior nodes have to be atomic.
*/
void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
- struct btree_iter *iter, struct keylist *keys)
+ struct btree_iter *iter, struct keylist *keys,
+ unsigned flags)
{
struct bch_fs *c = as->c;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
@@ -1551,14 +1553,14 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
btree_node_interior_verify(b);
- bch2_foreground_maybe_merge(c, iter, b->level);
+ bch2_foreground_maybe_merge(c, iter, b->level, flags);
return;
split:
- btree_split(as, b, iter, keys);
+ btree_split(as, b, iter, keys, flags);
}
int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
- unsigned btree_reserve_flags)
+ unsigned flags)
{
struct btree *b = iter->l[0].b;
struct btree_update *as;
@@ -1572,14 +1574,17 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
*/
for_each_linked_btree_iter(iter, linked)
if (linked->btree_id == BTREE_ID_EXTENTS)
- btree_reserve_flags |= BTREE_INSERT_USE_RESERVE;
+ flags |= BTREE_INSERT_USE_RESERVE;
if (iter->btree_id == BTREE_ID_EXTENTS)
- btree_reserve_flags |= BTREE_INSERT_USE_RESERVE;
+ flags |= BTREE_INSERT_USE_RESERVE;
closure_init_stack(&cl);
/* Hack, because gc and splitting nodes doesn't mix yet: */
if (!down_read_trylock(&c->gc_lock)) {
+ if (flags & BTREE_INSERT_NOUNLOCK)
+ return -EINTR;
+
bch2_btree_iter_unlock(iter);
down_read(&c->gc_lock);
@@ -1597,20 +1602,19 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
}
as = bch2_btree_update_start(c, iter->btree_id,
- btree_update_reserve_required(c, b),
- btree_reserve_flags, &cl);
+ btree_update_reserve_required(c, b), flags,
+ !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (IS_ERR(as)) {
ret = PTR_ERR(as);
if (ret == -EAGAIN) {
+ BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
bch2_btree_iter_unlock(iter);
- up_read(&c->gc_lock);
- closure_sync(&cl);
- return -EINTR;
+ ret = -EINTR;
}
goto out;
}
- btree_split(as, b, iter, NULL);
+ btree_split(as, b, iter, NULL, flags);
bch2_btree_update_done(as);
bch2_btree_iter_set_locks_want(iter, 1);
@@ -1620,10 +1624,11 @@ out:
return ret;
}
-int __bch2_foreground_maybe_merge(struct bch_fs *c,
- struct btree_iter *iter,
- unsigned level,
- enum btree_node_sibling sib)
+void __bch2_foreground_maybe_merge(struct bch_fs *c,
+ struct btree_iter *iter,
+ unsigned level,
+ unsigned flags,
+ enum btree_node_sibling sib)
{
struct btree_update *as;
struct bkey_format_state new_s;
@@ -1636,29 +1641,29 @@ int __bch2_foreground_maybe_merge(struct bch_fs *c,
closure_init_stack(&cl);
retry:
- if (!bch2_btree_node_relock(iter, level))
- return 0;
+ BUG_ON(!btree_node_locked(iter, level));
b = iter->l[level].b;
parent = btree_node_parent(iter, b);
if (!parent)
- return 0;
+ goto out;
if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
- return 0;
+ goto out;
/* XXX: can't be holding read locks */
- m = bch2_btree_node_get_sibling(c, iter, b, sib);
+ m = bch2_btree_node_get_sibling(c, iter, b,
+ !(flags & BTREE_INSERT_NOUNLOCK), sib);
if (IS_ERR(m)) {
ret = PTR_ERR(m);
- goto out;
+ goto err;
}
/* NULL means no sibling: */
if (!m) {
b->sib_u64s[sib] = U16_MAX;
- return 0;
+ goto out;
}
if (sib == btree_prev_sib) {
@@ -1688,33 +1693,26 @@ retry:
if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
six_unlock_intent(&m->lock);
- return 0;
+ goto out;
}
/* We're changing btree topology, doesn't mix with gc: */
- if (!down_read_trylock(&c->gc_lock)) {
- six_unlock_intent(&m->lock);
- bch2_btree_iter_unlock(iter);
-
- down_read(&c->gc_lock);
- up_read(&c->gc_lock);
- ret = -EINTR;
- goto out;
- }
+ if (!down_read_trylock(&c->gc_lock))
+ goto err_cycle_gc_lock;
if (!bch2_btree_iter_set_locks_want(iter, U8_MAX)) {
ret = -EINTR;
- goto out_unlock;
+ goto err_unlock;
}
as = bch2_btree_update_start(c, iter->btree_id,
btree_update_reserve_required(c, parent) + 1,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE,
- &cl);
+ !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (IS_ERR(as)) {
ret = PTR_ERR(as);
- goto out_unlock;
+ goto err_unlock;
}
trace_btree_merge(c, b);
@@ -1744,7 +1742,7 @@ retry:
bch2_btree_node_write(c, n, SIX_LOCK_intent);
- bch2_btree_insert_node(as, parent, iter, &as->parent_keys);
+ bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
bch2_btree_open_bucket_put(c, n);
bch2_btree_node_free_inmem(c, b, iter);
@@ -1754,26 +1752,47 @@ retry:
bch2_btree_iter_verify(iter, n);
bch2_btree_update_done(as);
-out_unlock:
- if (ret != -EINTR && ret != -EAGAIN)
- bch2_btree_iter_set_locks_want(iter, 1);
+
six_unlock_intent(&m->lock);
up_read(&c->gc_lock);
out:
- if (ret == -EAGAIN || ret == -EINTR) {
- bch2_btree_iter_unlock(iter);
- ret = -EINTR;
- }
-
+ bch2_btree_iter_set_locks_want(iter, 1);
closure_sync(&cl);
+ return;
- if (ret == -EINTR) {
+err_cycle_gc_lock:
+ six_unlock_intent(&m->lock);
+
+ if (flags & BTREE_INSERT_NOUNLOCK)
+ goto out;
+
+ bch2_btree_iter_unlock(iter);
+
+ down_read(&c->gc_lock);
+ up_read(&c->gc_lock);
+ ret = -EINTR;
+ goto err;
+
+err_unlock:
+ if (ret != -EINTR && ret != -EAGAIN)
+ bch2_btree_iter_set_locks_want(iter, 1);
+ six_unlock_intent(&m->lock);
+ up_read(&c->gc_lock);
+err:
+ BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
+
+ if ((ret == -EAGAIN || ret == -EINTR) &&
+ !(flags & BTREE_INSERT_NOUNLOCK)) {
+ bch2_btree_iter_unlock(iter);
+ closure_sync(&cl);
ret = bch2_btree_iter_traverse(iter);
- if (!ret)
- goto retry;
+ if (ret)
+ goto out;
+
+ goto retry;
}
- return ret;
+ goto out;
}
static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
@@ -1806,7 +1825,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
- bch2_btree_insert_node(as, parent, iter, &as->parent_keys);
+ bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
} else {
bch2_btree_set_root(as, n, iter);
}
@@ -1920,7 +1939,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
}
bch2_keylist_add(&as->parent_keys, &new_key->k_i);
- bch2_btree_insert_node(as, parent, iter, &as->parent_keys);
+ bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0);
if (new_hash) {
mutex_lock(&c->btree_cache.lock);
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index abf14e4c41dc..3a17de5ca43e 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -146,35 +146,51 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *,
struct btree *);
void bch2_btree_insert_node(struct btree_update *, struct btree *,
- struct btree_iter *, struct keylist *);
+ struct btree_iter *, struct keylist *,
+ unsigned);
int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned);
-int __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
- unsigned, enum btree_node_sibling);
+void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
+ unsigned, unsigned, enum btree_node_sibling);
-static inline int bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
+static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
struct btree_iter *iter,
- unsigned level,
+ unsigned level, unsigned flags,
enum btree_node_sibling sib)
{
struct btree *b;
+ /*
+ * iterators are inconsistent when they hit end of leaf, until
+ * traversed again
+ *
+ * XXX inconsistent how?
+ */
+ if (iter->flags & BTREE_ITER_AT_END_OF_LEAF)
+ return;
+
+ if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
+ return;
+
if (!bch2_btree_node_relock(iter, level))
- return 0;
+ return;
b = iter->l[level].b;
if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
- return 0;
+ return;
- return __bch2_foreground_maybe_merge(c, iter, level, sib);
+ __bch2_foreground_maybe_merge(c, iter, level, flags, sib);
}
static inline void bch2_foreground_maybe_merge(struct bch_fs *c,
struct btree_iter *iter,
- unsigned level)
+ unsigned level,
+ unsigned flags)
{
- bch2_foreground_maybe_merge_sibling(c, iter, level, btree_prev_sib);
- bch2_foreground_maybe_merge_sibling(c, iter, level, btree_next_sib);
+ bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
+ btree_prev_sib);
+ bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
+ btree_next_sib);
}
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index cc41140fbe3a..10cb8947109c 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -227,19 +227,36 @@ btree_insert_key_leaf(struct btree_insert *trans,
return ret;
}
+#define trans_for_each_entry(trans, i) \
+ for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++)
+
+/*
+ * We sort transaction entries so that if multiple iterators point to the same
+ * leaf node they'll be adjacent:
+ */
static bool same_leaf_as_prev(struct btree_insert *trans,
struct btree_insert_entry *i)
{
- /*
- * Because we sorted the transaction entries, if multiple iterators
- * point to the same leaf node they'll always be adjacent now:
- */
return i != trans->entries &&
i[0].iter->l[0].b == i[-1].iter->l[0].b;
}
-#define trans_for_each_entry(trans, i) \
- for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++)
+static inline struct btree_insert_entry *trans_next_leaf(struct btree_insert *trans,
+ struct btree_insert_entry *i)
+{
+ struct btree *b = i->iter->l[0].b;
+
+ do {
+ i++;
+ } while (i < trans->entries + trans->nr && b == i->iter->l[0].b);
+
+ return i;
+}
+
+#define trans_for_each_leaf(trans, i) \
+ for ((i) = (trans)->entries; \
+ (i) < (trans)->entries + (trans)->nr; \
+ (i) = trans_next_leaf(trans, i))
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
@@ -262,19 +279,16 @@ static void multi_lock_write(struct bch_fs *c, struct btree_insert *trans)
{
struct btree_insert_entry *i;
- trans_for_each_entry(trans, i)
- if (!same_leaf_as_prev(trans, i))
- bch2_btree_node_lock_for_insert(c, i->iter->l[0].b,
- i->iter);
+ trans_for_each_leaf(trans, i)
+ bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
}
static void multi_unlock_write(struct btree_insert *trans)
{
struct btree_insert_entry *i;
- trans_for_each_entry(trans, i)
- if (!same_leaf_as_prev(trans, i))
- bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
+ trans_for_each_leaf(trans, i)
+ bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
}
static inline int btree_trans_cmp(struct btree_insert_entry l,
@@ -285,56 +299,24 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
/* Normal update interface: */
-/**
- * __bch_btree_insert_at - insert keys at given iterator positions
- *
- * This is main entry point for btree updates.
- *
- * Return values:
- * -EINTR: locking changed, this function should be called again. Only returned
- * if passed BTREE_INSERT_ATOMIC.
- * -EROFS: filesystem read only
- * -EIO: journal or btree node IO error
+/*
+ * Get journal reservation, take write locks, and attempt to do btree update(s):
*/
-int __bch2_btree_insert_at(struct btree_insert *trans)
+static inline int do_btree_insert_at(struct btree_insert *trans,
+ struct btree_iter **split,
+ bool *cycle_gc_lock)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
- struct btree_iter *split = NULL;
- bool cycle_gc_lock = false;
unsigned u64s;
int ret;
- trans_for_each_entry(trans, i) {
- BUG_ON(i->iter->level);
- BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
- BUG_ON(debug_check_bkeys(c) &&
- bch2_bkey_invalid(c, i->iter->btree_id,
- bkey_i_to_s_c(i->k)));
- }
-
- bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
-
- if (unlikely(!percpu_ref_tryget(&c->writes)))
- return -EROFS;
-retry_locks:
- ret = -EINTR;
- trans_for_each_entry(trans, i) {
- if (!bch2_btree_iter_set_locks_want(i->iter, 1))
- goto err;
+ trans_for_each_entry(trans, i)
+ BUG_ON(i->done);
- if (i->iter->uptodate == BTREE_ITER_NEED_TRAVERSE) {
- ret = bch2_btree_iter_traverse(i->iter);
- if (ret)
- goto err;
- }
- }
-retry:
- trans->did_work = false;
u64s = 0;
trans_for_each_entry(trans, i)
- if (!i->done)
- u64s += jset_u64s(i->k->k.u64s + i->extra_res);
+ u64s += jset_u64s(i->k->k.u64s + i->extra_res);
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
@@ -344,13 +326,13 @@ retry:
u64s, u64s)
: 0;
if (ret)
- goto err;
+ return ret;
multi_lock_write(c, trans);
if (race_fault()) {
ret = -EINTR;
- goto unlock;
+ goto out;
}
u64s = 0;
@@ -365,129 +347,203 @@ retry:
* bch2_btree_node_write(), converting an unwritten bset to a
* written one
*/
- if (!i->done) {
- u64s += i->k->k.u64s + i->extra_res;
- if (!bch2_btree_node_insert_fits(c,
- i->iter->l[0].b, u64s)) {
- split = i->iter;
- goto unlock;
- }
+ u64s += i->k->k.u64s + i->extra_res;
+ if (!bch2_btree_node_insert_fits(c,
+ i->iter->l[0].b, u64s)) {
+ ret = -EINTR;
+ *split = i->iter;
+ goto out;
}
}
- ret = 0;
- split = NULL;
- cycle_gc_lock = false;
-
trans_for_each_entry(trans, i) {
- if (i->done)
- continue;
-
switch (btree_insert_key_leaf(trans, i)) {
case BTREE_INSERT_OK:
i->done = true;
break;
case BTREE_INSERT_JOURNAL_RES_FULL:
case BTREE_INSERT_NEED_TRAVERSE:
- ret = -EINTR;
- break;
case BTREE_INSERT_NEED_RESCHED:
- ret = -EAGAIN;
+ ret = -EINTR;
break;
case BTREE_INSERT_BTREE_NODE_FULL:
- split = i->iter;
+ ret = -EINTR;
+ *split = i->iter;
break;
case BTREE_INSERT_ENOSPC:
ret = -ENOSPC;
break;
case BTREE_INSERT_NEED_GC_LOCK:
- cycle_gc_lock = true;
ret = -EINTR;
+ *cycle_gc_lock = true;
break;
default:
BUG();
}
- if (!trans->did_work && (ret || split))
+ /*
+ * If we did some work (i.e. inserted part of an extent),
+ * we have to do all the other updates as well:
+ */
+ if (!trans->did_work && (ret || *split))
break;
}
-unlock:
+out:
multi_unlock_write(trans);
bch2_journal_res_put(&c->journal, &trans->journal_res);
- if (split)
- goto split;
- if (ret)
- goto err;
+ return ret;
+}
- trans_for_each_entry(trans, i)
- if (i->iter->flags & BTREE_ITER_AT_END_OF_LEAF)
- goto out;
+/**
+ * __bch_btree_insert_at - insert keys at given iterator positions
+ *
+ * This is main entry point for btree updates.
+ *
+ * Return values:
+ * -EINTR: locking changed, this function should be called again. Only returned
+ * if passed BTREE_INSERT_ATOMIC.
+ * -EROFS: filesystem read only
+ * -EIO: journal or btree node IO error
+ */
+int __bch2_btree_insert_at(struct btree_insert *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ struct btree_iter *split = NULL;
+ bool cycle_gc_lock = false;
+ unsigned flags;
+ int ret;
trans_for_each_entry(trans, i) {
- /*
- * iterators are inconsistent when they hit end of leaf, until
- * traversed again
- */
- if (i->iter->uptodate < BTREE_ITER_NEED_TRAVERSE &&
- !same_leaf_as_prev(trans, i))
- bch2_foreground_maybe_merge(c, i->iter, 0);
+ BUG_ON(i->iter->level);
+ BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
+ BUG_ON(debug_check_bkeys(c) &&
+ bch2_bkey_invalid(c, i->iter->btree_id,
+ bkey_i_to_s_c(i->k)));
+ BUG_ON(i->iter->uptodate == BTREE_ITER_END);
+ }
+
+ /* for sanity purposes: */
+ if (trans->nr > 1)
+ trans->flags |= BTREE_INSERT_ATOMIC;
+
+ bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
+
+ if (unlikely(!percpu_ref_tryget(&c->writes)))
+ return -EROFS;
+retry:
+ split = NULL;
+ cycle_gc_lock = false;
+
+ trans_for_each_entry(trans, i) {
+ if (i->iter->locks_want < 1 &&
+ !bch2_btree_iter_set_locks_want(i->iter, 1)) {
+ ret = -EINTR;
+ goto err;
+ }
+
+ if (i->iter->uptodate > BTREE_ITER_NEED_PEEK) {
+ ret = -EINTR;
+ goto err;
+ }
+
+ if (i->iter->flags & BTREE_ITER_ERROR) {
+ ret = -EIO;
+ goto err;
+ }
}
+
+ ret = do_btree_insert_at(trans, &split, &cycle_gc_lock);
+ if (unlikely(ret))
+ goto err;
+
+ trans_for_each_leaf(trans, i)
+ bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
+
+ trans_for_each_entry(trans, i)
+ bch2_btree_iter_set_locks_want(i->iter, 1);
out:
+ percpu_ref_put(&c->writes);
+
+ if ((trans->flags & BTREE_INSERT_NOUNLOCK) && trans->did_work)
+ trans_for_each_entry(trans, i)
+ BUG_ON(!btree_node_locked(i->iter, 0));
+
/* make sure we didn't lose an error: */
if (!ret && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
trans_for_each_entry(trans, i)
BUG_ON(!i->done);
- percpu_ref_put(&c->writes);
+ BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
+
return ret;
-split:
- /*
- * have to drop journal res before splitting, because splitting means
- * allocating new btree nodes, and holding a journal reservation
- * potentially blocks the allocator:
- */
- ret = bch2_btree_split_leaf(c, split, trans->flags);
+err:
+ flags = trans->flags;
/*
- * This can happen when we insert part of an extent - with an update
- * with multiple keys, we don't want to redo the entire update - that's
- * just too confusing:
+ * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
+ * update; if we haven't done anything yet it doesn't apply
*/
- if (!ret &&
- (trans->flags & BTREE_INSERT_ATOMIC) &&
- trans->did_work)
- ret = -EINTR;
+ if (!trans->did_work)
+ flags &= ~BTREE_INSERT_NOUNLOCK;
- if (ret)
- goto err;
+ if (split) {
+ ret = bch2_btree_split_leaf(c, split, flags);
+
+ /*
+ * if the split succeeded without dropping locks the insert will
+ * still be atomic (in the BTREE_INSERT_ATOMIC sense, what the
+ * caller peeked() and is overwriting won't have changed)
+ */
+#if 0
+ /*
+ * XXX:
+ * split -> btree node merging (of parent node) might still drop
+ * locks when we're not passing it BTREE_INSERT_NOUNLOCK
+ */
+ if (!ret && !trans->did_work)
+ goto retry;
+#endif
+
+ /*
+ * don't care if we got ENOSPC because we told split it
+ * couldn't block:
+ */
+ if (!ret || (flags & BTREE_INSERT_NOUNLOCK))
+ ret = -EINTR;
+ }
- /*
- * if the split didn't have to drop locks the insert will still be
- * atomic (in the BTREE_INSERT_ATOMIC sense, what the caller peeked()
- * and is overwriting won't have changed)
- */
- goto retry_locks;
-err:
if (cycle_gc_lock) {
- down_read(&c->gc_lock);
+ if (!down_read_trylock(&c->gc_lock)) {
+ if (flags & BTREE_INSERT_NOUNLOCK)
+ goto out;
+
+ bch2_btree_iter_unlock(trans->entries[0].iter);
+ down_read(&c->gc_lock);
+ }
up_read(&c->gc_lock);
}
if (ret == -EINTR) {
+ if (flags & BTREE_INSERT_NOUNLOCK)
+ goto out;
+
trans_for_each_entry(trans, i) {
int ret2 = bch2_btree_iter_traverse(i->iter);
if (ret2) {
ret = ret2;
goto out;
}
+
+ BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
}
/*
* BTREE_ITER_ATOMIC means we have to return -EINTR if we
* dropped locks:
*/
- if (!(trans->flags & BTREE_INSERT_ATOMIC))
+ if (!(flags & BTREE_INSERT_ATOMIC))
goto retry;
}
@@ -549,7 +605,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k),
BTREE_ITER_INTENT);
ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, flags,
- BTREE_INSERT_ENTRY(&iter, k));
+ BTREE_INSERT_ENTRY(&iter, k));
bch2_btree_iter_unlock(&iter);
return ret;
@@ -584,6 +640,11 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
if (bkey_cmp(iter.pos, end) >= 0)
break;
+ if (k.k->type == KEY_TYPE_DISCARD) {
+ bch2_btree_iter_next(&iter);
+ continue;
+ }
+
bkey_init(&delete.k);
/*
@@ -615,8 +676,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
}
ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq,
- BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(&iter, &delete));
+ BTREE_INSERT_NOFAIL,
+ BTREE_INSERT_ENTRY(&iter, &delete));
if (ret)
break;