diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-06-02 17:04:20 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-06-03 21:01:03 -0400 |
commit | 365f2957b1ba1375264dda9309cc9ea9ec5992d8 (patch) | |
tree | 23908819d2a4d92d9a3d422fd35552a40984e547 | |
parent | 3756273fe2dd369f2a8ee7a3ba09df97510d02e3 (diff) |
bcachefs: implement BTREE_INSERT_NOUNLOCK
BTREE_INSERT_NOUNLOCK means after a sucessful btree update, do not drop
any locks (e.g. while merging nodes).
This is going to be used to fix some locking primarily related to
bi_size in bch_inode_info.
-rw-r--r-- | fs/bcachefs/btree_cache.c | 51 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.h | 40 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 125 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.h | 38 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 299 |
7 files changed, 350 insertions, 207 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index c950f2564f25..4292dd7fff8e 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -649,7 +649,14 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, struct btree *b; struct bset_tree *t; - /* btree_node_fill() requires parent to be locked: */ + /* + * XXX: locking optimization + * + * we can make the locking looser here - caller can drop lock on parent + * node before locking child node (and potentially blocking): we just + * have to have bch2_btree_node_fill() call relock on the parent and + * return -EINTR if that fails + */ EBUG_ON(!btree_node_locked(iter, level + 1)); EBUG_ON(level >= BTREE_MAX_DEPTH); retry: @@ -749,6 +756,7 @@ retry: struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, struct btree_iter *iter, struct btree *b, + bool may_drop_locks, enum btree_node_sibling sib) { struct btree *parent; @@ -785,26 +793,47 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, ret = bch2_btree_node_get(c, iter, &tmp.k, level, SIX_LOCK_intent); - if (IS_ERR(ret) && PTR_ERR(ret) == -EINTR) { - btree_node_unlock(iter, level); - + if (PTR_ERR_OR_ZERO(ret) == -EINTR && may_drop_locks) { if (!bch2_btree_node_relock(iter, level + 1)) { bch2_btree_iter_set_locks_want(iter, level + 2); return ERR_PTR(-EINTR); } - ret = bch2_btree_node_get(c, iter, &tmp.k, level, SIX_LOCK_intent); - } +#if 0 + /* + * we might have got -EINTR because trylock failed, and we're + * holding other locks that would cause us to deadlock: + * + * to do this correctly, we need a bch2_btree_iter_relock() + * that relocks all linked iters + */ + struct btree_iter *linked; - if (!bch2_btree_node_relock(iter, level)) { - btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK); + for_each_linked_btree_iter(iter, linked) { + if (btree_iter_cmp(iter, linked) < 0) + __bch2_btree_iter_unlock(linked); - if (!IS_ERR(ret)) { - six_unlock_intent(&ret->lock); - ret = ERR_PTR(-EINTR); + } +#endif + if (sib == btree_prev_sib) + btree_node_unlock(iter, level); + + ret = bch2_btree_node_get(c, iter, &tmp.k, level, + SIX_LOCK_intent); + + if (!bch2_btree_node_relock(iter, level)) { + btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK); + + if (!IS_ERR(ret)) { + six_unlock_intent(&ret->lock); + ret = ERR_PTR(-EINTR); + } } } + BUG_ON((!may_drop_locks || !IS_ERR(ret)) && + !btree_node_locked(iter, level)); + return ret; } diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index e021d6e9422a..43109d086479 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -26,7 +26,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, enum six_lock_type); struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, - struct btree *, + struct btree *, bool, enum btree_node_sibling); void bch2_btree_node_prefetch(struct bch_fs *, const struct bkey_i *, diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 02b14e38ffda..0904f445e375 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -780,7 +780,7 @@ next: bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key); /* Insert the newly coalesced nodes */ - bch2_btree_insert_node(as, parent, iter, &keylist); + bch2_btree_insert_node(as, parent, iter, &keylist, 0); BUG_ON(!bch2_keylist_empty(&keylist)); diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index f357095d5b4f..aac97958cc3b 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -85,31 +85,49 @@ int __bch2_btree_insert_at(struct btree_insert *); __VA_ARGS__ \ }}) +enum { + __BTREE_INSERT_ATOMIC, + __BTREE_INSERT_NOUNLOCK, + __BTREE_INSERT_NOFAIL, + __BTREE_INSERT_USE_RESERVE, + __BTREE_INSERT_USE_ALLOC_RESERVE, + __BTREE_INSERT_JOURNAL_REPLAY, + __BTREE_INSERT_NOWAIT, + __BTREE_INSERT_GC_LOCK_HELD, + __BCH_HASH_SET_MUST_CREATE, + __BCH_HASH_SET_MUST_REPLACE, +}; + +/* + * Don't drop/retake locks before doing btree update, instead return -EINTR if + * we had to drop locks for any reason + */ +#define BTREE_INSERT_ATOMIC (1 << __BTREE_INSERT_ATOMIC) + /* - * Don't drop/retake locks: instead return -EINTR if need to upgrade to intent - * locks, -EAGAIN if need to wait on btree reserve + * Don't drop locks _after_ successfully updating btree: */ -#define BTREE_INSERT_ATOMIC (1 << 0) +#define BTREE_INSERT_NOUNLOCK (1 << __BTREE_INSERT_NOUNLOCK) /* Don't check for -ENOSPC: */ -#define BTREE_INSERT_NOFAIL (1 << 1) +#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) /* for copygc, or when merging btree nodes */ -#define BTREE_INSERT_USE_RESERVE (1 << 2) -#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << 3) +#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE) +#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE) /* * Insert is for journal replay: don't get journal reservations, or mark extents * (bch_mark_key) */ -#define BTREE_INSERT_JOURNAL_REPLAY (1 << 4) +#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY) /* Don't block on allocation failure (for new btree nodes: */ -#define BTREE_INSERT_NOWAIT (1 << 5) -#define BTREE_INSERT_GC_LOCK_HELD (1 << 6) +#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT) +#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD) -#define BCH_HASH_SET_MUST_CREATE (1 << 7) -#define BCH_HASH_SET_MUST_REPLACE (1 << 8) +#define BCH_HASH_SET_MUST_CREATE (1 << __BCH_HASH_SET_MUST_CREATE) +#define BCH_HASH_SET_MUST_REPLACE (1 << __BCH_HASH_SET_MUST_REPLACE) int bch2_btree_delete_at(struct btree_iter *, unsigned); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 92e19c4eae7e..65b345db2931 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1362,7 +1362,8 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, } static void btree_split(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys) + struct btree_iter *iter, struct keylist *keys, + unsigned flags) { struct bch_fs *c = as->c; struct btree *parent = btree_node_parent(iter, b); @@ -1425,7 +1426,7 @@ static void btree_split(struct btree_update *as, struct btree *b, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); } else if (n3) { bch2_btree_set_root(as, n3, iter); } else { @@ -1511,7 +1512,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, * for leaf nodes -- inserts into interior nodes have to be atomic. */ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys) + struct btree_iter *iter, struct keylist *keys, + unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); @@ -1551,14 +1553,14 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, btree_node_interior_verify(b); - bch2_foreground_maybe_merge(c, iter, b->level); + bch2_foreground_maybe_merge(c, iter, b->level, flags); return; split: - btree_split(as, b, iter, keys); + btree_split(as, b, iter, keys, flags); } int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, - unsigned btree_reserve_flags) + unsigned flags) { struct btree *b = iter->l[0].b; struct btree_update *as; @@ -1572,14 +1574,17 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, */ for_each_linked_btree_iter(iter, linked) if (linked->btree_id == BTREE_ID_EXTENTS) - btree_reserve_flags |= BTREE_INSERT_USE_RESERVE; + flags |= BTREE_INSERT_USE_RESERVE; if (iter->btree_id == BTREE_ID_EXTENTS) - btree_reserve_flags |= BTREE_INSERT_USE_RESERVE; + flags |= BTREE_INSERT_USE_RESERVE; closure_init_stack(&cl); /* Hack, because gc and splitting nodes doesn't mix yet: */ if (!down_read_trylock(&c->gc_lock)) { + if (flags & BTREE_INSERT_NOUNLOCK) + return -EINTR; + bch2_btree_iter_unlock(iter); down_read(&c->gc_lock); @@ -1597,20 +1602,19 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, } as = bch2_btree_update_start(c, iter->btree_id, - btree_update_reserve_required(c, b), - btree_reserve_flags, &cl); + btree_update_reserve_required(c, b), flags, + !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); if (IS_ERR(as)) { ret = PTR_ERR(as); if (ret == -EAGAIN) { + BUG_ON(flags & BTREE_INSERT_NOUNLOCK); bch2_btree_iter_unlock(iter); - up_read(&c->gc_lock); - closure_sync(&cl); - return -EINTR; + ret = -EINTR; } goto out; } - btree_split(as, b, iter, NULL); + btree_split(as, b, iter, NULL, flags); bch2_btree_update_done(as); bch2_btree_iter_set_locks_want(iter, 1); @@ -1620,10 +1624,11 @@ out: return ret; } -int __bch2_foreground_maybe_merge(struct bch_fs *c, - struct btree_iter *iter, - unsigned level, - enum btree_node_sibling sib) +void __bch2_foreground_maybe_merge(struct bch_fs *c, + struct btree_iter *iter, + unsigned level, + unsigned flags, + enum btree_node_sibling sib) { struct btree_update *as; struct bkey_format_state new_s; @@ -1636,29 +1641,29 @@ int __bch2_foreground_maybe_merge(struct bch_fs *c, closure_init_stack(&cl); retry: - if (!bch2_btree_node_relock(iter, level)) - return 0; + BUG_ON(!btree_node_locked(iter, level)); b = iter->l[level].b; parent = btree_node_parent(iter, b); if (!parent) - return 0; + goto out; if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) - return 0; + goto out; /* XXX: can't be holding read locks */ - m = bch2_btree_node_get_sibling(c, iter, b, sib); + m = bch2_btree_node_get_sibling(c, iter, b, + !(flags & BTREE_INSERT_NOUNLOCK), sib); if (IS_ERR(m)) { ret = PTR_ERR(m); - goto out; + goto err; } /* NULL means no sibling: */ if (!m) { b->sib_u64s[sib] = U16_MAX; - return 0; + goto out; } if (sib == btree_prev_sib) { @@ -1688,33 +1693,26 @@ retry: if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) { six_unlock_intent(&m->lock); - return 0; + goto out; } /* We're changing btree topology, doesn't mix with gc: */ - if (!down_read_trylock(&c->gc_lock)) { - six_unlock_intent(&m->lock); - bch2_btree_iter_unlock(iter); - - down_read(&c->gc_lock); - up_read(&c->gc_lock); - ret = -EINTR; - goto out; - } + if (!down_read_trylock(&c->gc_lock)) + goto err_cycle_gc_lock; if (!bch2_btree_iter_set_locks_want(iter, U8_MAX)) { ret = -EINTR; - goto out_unlock; + goto err_unlock; } as = bch2_btree_update_start(c, iter->btree_id, btree_update_reserve_required(c, parent) + 1, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE, - &cl); + !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); if (IS_ERR(as)) { ret = PTR_ERR(as); - goto out_unlock; + goto err_unlock; } trace_btree_merge(c, b); @@ -1744,7 +1742,7 @@ retry: bch2_btree_node_write(c, n, SIX_LOCK_intent); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); bch2_btree_open_bucket_put(c, n); bch2_btree_node_free_inmem(c, b, iter); @@ -1754,26 +1752,47 @@ retry: bch2_btree_iter_verify(iter, n); bch2_btree_update_done(as); -out_unlock: - if (ret != -EINTR && ret != -EAGAIN) - bch2_btree_iter_set_locks_want(iter, 1); + six_unlock_intent(&m->lock); up_read(&c->gc_lock); out: - if (ret == -EAGAIN || ret == -EINTR) { - bch2_btree_iter_unlock(iter); - ret = -EINTR; - } - + bch2_btree_iter_set_locks_want(iter, 1); closure_sync(&cl); + return; - if (ret == -EINTR) { +err_cycle_gc_lock: + six_unlock_intent(&m->lock); + + if (flags & BTREE_INSERT_NOUNLOCK) + goto out; + + bch2_btree_iter_unlock(iter); + + down_read(&c->gc_lock); + up_read(&c->gc_lock); + ret = -EINTR; + goto err; + +err_unlock: + if (ret != -EINTR && ret != -EAGAIN) + bch2_btree_iter_set_locks_want(iter, 1); + six_unlock_intent(&m->lock); + up_read(&c->gc_lock); +err: + BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK)); + + if ((ret == -EAGAIN || ret == -EINTR) && + !(flags & BTREE_INSERT_NOUNLOCK)) { + bch2_btree_iter_unlock(iter); + closure_sync(&cl); ret = bch2_btree_iter_traverse(iter); - if (!ret) - goto retry; + if (ret) + goto out; + + goto retry; } - return ret; + goto out; } static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, @@ -1806,7 +1825,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); } else { bch2_btree_set_root(as, n, iter); } @@ -1920,7 +1939,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, } bch2_keylist_add(&as->parent_keys, &new_key->k_i); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0); if (new_hash) { mutex_lock(&c->btree_cache.lock); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index abf14e4c41dc..3a17de5ca43e 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -146,35 +146,51 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *, struct btree *); void bch2_btree_insert_node(struct btree_update *, struct btree *, - struct btree_iter *, struct keylist *); + struct btree_iter *, struct keylist *, + unsigned); int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned); -int __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *, - unsigned, enum btree_node_sibling); +void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *, + unsigned, unsigned, enum btree_node_sibling); -static inline int bch2_foreground_maybe_merge_sibling(struct bch_fs *c, +static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c, struct btree_iter *iter, - unsigned level, + unsigned level, unsigned flags, enum btree_node_sibling sib) { struct btree *b; + /* + * iterators are inconsistent when they hit end of leaf, until + * traversed again + * + * XXX inconsistent how? + */ + if (iter->flags & BTREE_ITER_AT_END_OF_LEAF) + return; + + if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE) + return; + if (!bch2_btree_node_relock(iter, level)) - return 0; + return; b = iter->l[level].b; if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) - return 0; + return; - return __bch2_foreground_maybe_merge(c, iter, level, sib); + __bch2_foreground_maybe_merge(c, iter, level, flags, sib); } static inline void bch2_foreground_maybe_merge(struct bch_fs *c, struct btree_iter *iter, - unsigned level) + unsigned level, + unsigned flags) { - bch2_foreground_maybe_merge_sibling(c, iter, level, btree_prev_sib); - bch2_foreground_maybe_merge_sibling(c, iter, level, btree_next_sib); + bch2_foreground_maybe_merge_sibling(c, iter, level, flags, + btree_prev_sib); + bch2_foreground_maybe_merge_sibling(c, iter, level, flags, + btree_next_sib); } void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index cc41140fbe3a..10cb8947109c 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -227,19 +227,36 @@ btree_insert_key_leaf(struct btree_insert *trans, return ret; } +#define trans_for_each_entry(trans, i) \ + for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++) + +/* + * We sort transaction entries so that if multiple iterators point to the same + * leaf node they'll be adjacent: + */ static bool same_leaf_as_prev(struct btree_insert *trans, struct btree_insert_entry *i) { - /* - * Because we sorted the transaction entries, if multiple iterators - * point to the same leaf node they'll always be adjacent now: - */ return i != trans->entries && i[0].iter->l[0].b == i[-1].iter->l[0].b; } -#define trans_for_each_entry(trans, i) \ - for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++) +static inline struct btree_insert_entry *trans_next_leaf(struct btree_insert *trans, + struct btree_insert_entry *i) +{ + struct btree *b = i->iter->l[0].b; + + do { + i++; + } while (i < trans->entries + trans->nr && b == i->iter->l[0].b); + + return i; +} + +#define trans_for_each_leaf(trans, i) \ + for ((i) = (trans)->entries; \ + (i) < (trans)->entries + (trans)->nr; \ + (i) = trans_next_leaf(trans, i)) inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, struct btree_iter *iter) @@ -262,19 +279,16 @@ static void multi_lock_write(struct bch_fs *c, struct btree_insert *trans) { struct btree_insert_entry *i; - trans_for_each_entry(trans, i) - if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, - i->iter); + trans_for_each_leaf(trans, i) + bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter); } static void multi_unlock_write(struct btree_insert *trans) { struct btree_insert_entry *i; - trans_for_each_entry(trans, i) - if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); + trans_for_each_leaf(trans, i) + bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); } static inline int btree_trans_cmp(struct btree_insert_entry l, @@ -285,56 +299,24 @@ static inline int btree_trans_cmp(struct btree_insert_entry l, /* Normal update interface: */ -/** - * __bch_btree_insert_at - insert keys at given iterator positions - * - * This is main entry point for btree updates. - * - * Return values: - * -EINTR: locking changed, this function should be called again. Only returned - * if passed BTREE_INSERT_ATOMIC. - * -EROFS: filesystem read only - * -EIO: journal or btree node IO error +/* + * Get journal reservation, take write locks, and attempt to do btree update(s): */ -int __bch2_btree_insert_at(struct btree_insert *trans) +static inline int do_btree_insert_at(struct btree_insert *trans, + struct btree_iter **split, + bool *cycle_gc_lock) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - struct btree_iter *split = NULL; - bool cycle_gc_lock = false; unsigned u64s; int ret; - trans_for_each_entry(trans, i) { - BUG_ON(i->iter->level); - BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); - BUG_ON(debug_check_bkeys(c) && - bch2_bkey_invalid(c, i->iter->btree_id, - bkey_i_to_s_c(i->k))); - } - - bubble_sort(trans->entries, trans->nr, btree_trans_cmp); - - if (unlikely(!percpu_ref_tryget(&c->writes))) - return -EROFS; -retry_locks: - ret = -EINTR; - trans_for_each_entry(trans, i) { - if (!bch2_btree_iter_set_locks_want(i->iter, 1)) - goto err; + trans_for_each_entry(trans, i) + BUG_ON(i->done); - if (i->iter->uptodate == BTREE_ITER_NEED_TRAVERSE) { - ret = bch2_btree_iter_traverse(i->iter); - if (ret) - goto err; - } - } -retry: - trans->did_work = false; u64s = 0; trans_for_each_entry(trans, i) - if (!i->done) - u64s += jset_u64s(i->k->k.u64s + i->extra_res); + u64s += jset_u64s(i->k->k.u64s + i->extra_res); memset(&trans->journal_res, 0, sizeof(trans->journal_res)); @@ -344,13 +326,13 @@ retry: u64s, u64s) : 0; if (ret) - goto err; + return ret; multi_lock_write(c, trans); if (race_fault()) { ret = -EINTR; - goto unlock; + goto out; } u64s = 0; @@ -365,129 +347,203 @@ retry: * bch2_btree_node_write(), converting an unwritten bset to a * written one */ - if (!i->done) { - u64s += i->k->k.u64s + i->extra_res; - if (!bch2_btree_node_insert_fits(c, - i->iter->l[0].b, u64s)) { - split = i->iter; - goto unlock; - } + u64s += i->k->k.u64s + i->extra_res; + if (!bch2_btree_node_insert_fits(c, + i->iter->l[0].b, u64s)) { + ret = -EINTR; + *split = i->iter; + goto out; } } - ret = 0; - split = NULL; - cycle_gc_lock = false; - trans_for_each_entry(trans, i) { - if (i->done) - continue; - switch (btree_insert_key_leaf(trans, i)) { case BTREE_INSERT_OK: i->done = true; break; case BTREE_INSERT_JOURNAL_RES_FULL: case BTREE_INSERT_NEED_TRAVERSE: - ret = -EINTR; - break; case BTREE_INSERT_NEED_RESCHED: - ret = -EAGAIN; + ret = -EINTR; break; case BTREE_INSERT_BTREE_NODE_FULL: - split = i->iter; + ret = -EINTR; + *split = i->iter; break; case BTREE_INSERT_ENOSPC: ret = -ENOSPC; break; case BTREE_INSERT_NEED_GC_LOCK: - cycle_gc_lock = true; ret = -EINTR; + *cycle_gc_lock = true; break; default: BUG(); } - if (!trans->did_work && (ret || split)) + /* + * If we did some work (i.e. inserted part of an extent), + * we have to do all the other updates as well: + */ + if (!trans->did_work && (ret || *split)) break; } -unlock: +out: multi_unlock_write(trans); bch2_journal_res_put(&c->journal, &trans->journal_res); - if (split) - goto split; - if (ret) - goto err; + return ret; +} - trans_for_each_entry(trans, i) - if (i->iter->flags & BTREE_ITER_AT_END_OF_LEAF) - goto out; +/** + * __bch_btree_insert_at - insert keys at given iterator positions + * + * This is main entry point for btree updates. + * + * Return values: + * -EINTR: locking changed, this function should be called again. Only returned + * if passed BTREE_INSERT_ATOMIC. + * -EROFS: filesystem read only + * -EIO: journal or btree node IO error + */ +int __bch2_btree_insert_at(struct btree_insert *trans) +{ + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; + struct btree_iter *split = NULL; + bool cycle_gc_lock = false; + unsigned flags; + int ret; trans_for_each_entry(trans, i) { - /* - * iterators are inconsistent when they hit end of leaf, until - * traversed again - */ - if (i->iter->uptodate < BTREE_ITER_NEED_TRAVERSE && - !same_leaf_as_prev(trans, i)) - bch2_foreground_maybe_merge(c, i->iter, 0); + BUG_ON(i->iter->level); + BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); + BUG_ON(debug_check_bkeys(c) && + bch2_bkey_invalid(c, i->iter->btree_id, + bkey_i_to_s_c(i->k))); + BUG_ON(i->iter->uptodate == BTREE_ITER_END); + } + + /* for sanity purposes: */ + if (trans->nr > 1) + trans->flags |= BTREE_INSERT_ATOMIC; + + bubble_sort(trans->entries, trans->nr, btree_trans_cmp); + + if (unlikely(!percpu_ref_tryget(&c->writes))) + return -EROFS; +retry: + split = NULL; + cycle_gc_lock = false; + + trans_for_each_entry(trans, i) { + if (i->iter->locks_want < 1 && + !bch2_btree_iter_set_locks_want(i->iter, 1)) { + ret = -EINTR; + goto err; + } + + if (i->iter->uptodate > BTREE_ITER_NEED_PEEK) { + ret = -EINTR; + goto err; + } + + if (i->iter->flags & BTREE_ITER_ERROR) { + ret = -EIO; + goto err; + } } + + ret = do_btree_insert_at(trans, &split, &cycle_gc_lock); + if (unlikely(ret)) + goto err; + + trans_for_each_leaf(trans, i) + bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags); + + trans_for_each_entry(trans, i) + bch2_btree_iter_set_locks_want(i->iter, 1); out: + percpu_ref_put(&c->writes); + + if ((trans->flags & BTREE_INSERT_NOUNLOCK) && trans->did_work) + trans_for_each_entry(trans, i) + BUG_ON(!btree_node_locked(i->iter, 0)); + /* make sure we didn't lose an error: */ if (!ret && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) trans_for_each_entry(trans, i) BUG_ON(!i->done); - percpu_ref_put(&c->writes); + BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); + return ret; -split: - /* - * have to drop journal res before splitting, because splitting means - * allocating new btree nodes, and holding a journal reservation - * potentially blocks the allocator: - */ - ret = bch2_btree_split_leaf(c, split, trans->flags); +err: + flags = trans->flags; /* - * This can happen when we insert part of an extent - with an update - * with multiple keys, we don't want to redo the entire update - that's - * just too confusing: + * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree + * update; if we haven't done anything yet it doesn't apply */ - if (!ret && - (trans->flags & BTREE_INSERT_ATOMIC) && - trans->did_work) - ret = -EINTR; + if (!trans->did_work) + flags &= ~BTREE_INSERT_NOUNLOCK; - if (ret) - goto err; + if (split) { + ret = bch2_btree_split_leaf(c, split, flags); + + /* + * if the split succeeded without dropping locks the insert will + * still be atomic (in the BTREE_INSERT_ATOMIC sense, what the + * caller peeked() and is overwriting won't have changed) + */ +#if 0 + /* + * XXX: + * split -> btree node merging (of parent node) might still drop + * locks when we're not passing it BTREE_INSERT_NOUNLOCK + */ + if (!ret && !trans->did_work) + goto retry; +#endif + + /* + * don't care if we got ENOSPC because we told split it + * couldn't block: + */ + if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) + ret = -EINTR; + } - /* - * if the split didn't have to drop locks the insert will still be - * atomic (in the BTREE_INSERT_ATOMIC sense, what the caller peeked() - * and is overwriting won't have changed) - */ - goto retry_locks; -err: if (cycle_gc_lock) { - down_read(&c->gc_lock); + if (!down_read_trylock(&c->gc_lock)) { + if (flags & BTREE_INSERT_NOUNLOCK) + goto out; + + bch2_btree_iter_unlock(trans->entries[0].iter); + down_read(&c->gc_lock); + } up_read(&c->gc_lock); } if (ret == -EINTR) { + if (flags & BTREE_INSERT_NOUNLOCK) + goto out; + trans_for_each_entry(trans, i) { int ret2 = bch2_btree_iter_traverse(i->iter); if (ret2) { ret = ret2; goto out; } + + BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK); } /* * BTREE_ITER_ATOMIC means we have to return -EINTR if we * dropped locks: */ - if (!(trans->flags & BTREE_INSERT_ATOMIC)) + if (!(flags & BTREE_INSERT_ATOMIC)) goto retry; } @@ -549,7 +605,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, flags, - BTREE_INSERT_ENTRY(&iter, k)); + BTREE_INSERT_ENTRY(&iter, k)); bch2_btree_iter_unlock(&iter); return ret; @@ -584,6 +640,11 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, if (bkey_cmp(iter.pos, end) >= 0) break; + if (k.k->type == KEY_TYPE_DISCARD) { + bch2_btree_iter_next(&iter); + continue; + } + bkey_init(&delete.k); /* @@ -615,8 +676,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, } ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, - BTREE_INSERT_NOFAIL, - BTREE_INSERT_ENTRY(&iter, &delete)); + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(&iter, &delete)); if (ret) break; |