diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2025-07-19 14:29:30 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2025-07-19 14:29:30 -0400 |
commit | e17d8841efa4fa17150025eac9dd32a4b4610fc7 (patch) | |
tree | 1b685d8a4eaad56874d899839a519c696e3dc8a4 | |
parent | ffe083c31d0a387e5092e1709daf717c21c2a98d (diff) |
Update bcachefs sources to c1ec91699f60 workqueue: Basic memory allocation profiling supportv1.25.3
34 files changed, 305 insertions, 196 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index 7abb0304..fad9022e 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -deeffbdc52f1092dadb3d523c4429e002c7fc485 +c1ec91699f6089754548cfc01391fff17a633516 diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 307824d6..8f970dc1 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -138,8 +138,8 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, acl = allocate_dropping_locks(trans, ret, posix_acl_alloc(count, _gfp)); - if (!acl) - return ERR_PTR(-ENOMEM); + if (!acl && !ret) + ret = bch_err_throw(trans->c, ENOMEM_acl); if (ret) { kfree(acl); return ERR_PTR(ret); diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 4c1604fd..f1d35b7f 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -337,9 +337,10 @@ void bch2_alloc_v4_swab(struct bkey_s k) } static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, - unsigned dev, const struct bch_alloc_v4 *a) + struct bkey_s_c k, + const struct bch_alloc_v4 *a) { - struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, dev) : NULL; + struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL; prt_newline(out); printbuf_indent_add(out, 2); @@ -348,11 +349,14 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs * bch2_prt_data_type(out, a->data_type); prt_newline(out); prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty); - prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); + if (bkey_val_bytes(k.k) > offsetof(struct bch_alloc_v4, journal_seq_empty)) + prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); + prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); - prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); + if (bkey_val_bytes(k.k) > offsetof(struct bch_alloc_v4, stripe_sectors)) + prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); prt_printf(out, "cached_sectors %u\n", a->cached_sectors); prt_printf(out, "stripe %u\n", a->stripe); prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); @@ -372,12 +376,12 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); - __bch2_alloc_v4_to_text(out, c, k.k->p.inode, a); + __bch2_alloc_v4_to_text(out, c, k, a); } void bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - __bch2_alloc_v4_to_text(out, c, k.k->p.inode, bkey_s_c_to_alloc_v4(k).v); + __bch2_alloc_v4_to_text(out, c, k, bkey_s_c_to_alloc_v4(k).v); } void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 34cb8a43..e95bb684 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -44,10 +44,6 @@ #include <linux/rcupdate.h> #include <linux/sched/task.h> -#define DROP_THIS_NODE 10 -#define DROP_PREV_NODE 11 -#define DID_FILL_FROM_SCAN 12 - /* * Returns true if it's a btree we can easily reconstruct, or otherwise won't * cause data loss if it's missing: @@ -252,7 +248,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * return ret; *pulled_from_scan = cur->data->min_key; - ret = DID_FILL_FROM_SCAN; + ret = bch_err_throw(c, topology_repair_did_fill_from_scan); } else { if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key, "btree node with incorrect min_key%s", buf.buf)) @@ -263,7 +259,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */ if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_next_node, "btree node overwritten by next node%s", buf.buf)) - ret = DROP_PREV_NODE; + ret = bch_err_throw(c, topology_repair_drop_prev_node); } else { if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key, "btree node with incorrect max_key%s", buf.buf)) @@ -274,7 +270,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */ if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_prev_node, "btree node overwritten by prev node%s", buf.buf)) - ret = DROP_THIS_NODE; + ret = bch_err_throw(c, topology_repair_drop_this_node); } else { if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key, "btree node with incorrect min_key%s", buf.buf)) @@ -314,7 +310,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, return ret; *pulled_from_scan = b->key.k.p; - ret = DID_FILL_FROM_SCAN; + ret = bch_err_throw(c, topology_repair_did_fill_from_scan); } else { ret = set_node_max(c, child, b->key.k.p); } @@ -391,15 +387,15 @@ again: ret = lockrestart_do(trans, btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan)); - if (ret < 0) + if (ret && !bch2_err_matches(ret, BCH_ERR_topology_repair)) goto err; - if (ret == DID_FILL_FROM_SCAN) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_did_fill_from_scan)) { new_pass = true; ret = 0; } - if (ret == DROP_THIS_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { six_unlock_read(&cur->c.lock); bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, @@ -414,7 +410,7 @@ again: six_unlock_read(&prev->c.lock); prev = NULL; - if (ret == DROP_PREV_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_prev_node)) { bch_info(c, "dropped prev node"); bch2_btree_node_evict(trans, prev_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, @@ -436,7 +432,7 @@ again: BUG_ON(cur); ret = lockrestart_do(trans, btree_repair_node_end(trans, b, prev, pulled_from_scan)); - if (ret == DID_FILL_FROM_SCAN) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_did_fill_from_scan)) { new_pass = true; ret = 0; } @@ -477,7 +473,7 @@ again: six_unlock_read(&cur->c.lock); cur = NULL; - if (ret == DROP_THIS_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); @@ -504,7 +500,7 @@ again: if (mustfix_fsck_err_on(!have_child, c, btree_node_topology_interior_node_empty, "empty interior btree node at %s", buf.buf)) - ret = DROP_THIS_NODE; + ret = bch_err_throw(c, topology_repair_drop_this_node); err: fsck_err: if (!IS_ERR_OR_NULL(prev)) @@ -521,7 +517,8 @@ fsck_err: bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); - bch_err_fn(c, ret); + if (!bch2_err_matches(ret, BCH_ERR_topology_repair)) + bch_err_fn(c, ret); return ret; } @@ -592,7 +589,7 @@ recover: ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); six_unlock_read(&b->c.lock); - if (ret == DROP_THIS_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { scoped_guard(mutex, &c->btree_cache.lock) bch2_btree_node_hash_remove(&c->btree_cache, b); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index cc771aff..a282c388 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2860,8 +2860,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre !bkey_deleted(k.k) && (k2 = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) { k = k2; - if (!bkey_err(k)) - iter->k = *k.k; + if (bkey_err(k)) + goto out; + iter->k = *k.k; } if (unlikely(k.k->type == KEY_TYPE_whiteout && diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index ebba14da..d61b7820 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -254,11 +254,13 @@ static int btree_key_cache_create(struct btree_trans *trans, struct bkey_i *new_k = allocate_dropping_locks(trans, ret, kmalloc(key_u64s * sizeof(u64), _gfp)); - if (unlikely(!new_k)) { + if (unlikely(!new_k && !ret)) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(ck->key.btree_id), key_u64s); ret = bch_err_throw(c, ENOMEM_btree_key_cache_fill); - } else if (ret) { + } + + if (unlikely(ret)) { kfree(new_k); goto err; } @@ -407,7 +409,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(ret); } - } else { + } else if (!(flags & BTREE_ITER_cached_nofill)) { BUG_ON(path->uptodate); BUG_ON(!path->nodes_locked); } diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c index d997e381..f4f958f4 100644 --- a/libbcachefs/btree_node_scan.c +++ b/libbcachefs/btree_node_scan.c @@ -270,6 +270,9 @@ static int read_btree_nodes(struct find_btree_nodes *f) int ret = 0; closure_init_stack(&cl); + CLASS(printbuf, buf)(); + + prt_printf(&buf, "scanning for btree nodes on"); for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) { if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree))) @@ -295,10 +298,14 @@ static int read_btree_nodes(struct find_btree_nodes *f) break; } + prt_printf(&buf, " %s", ca->name); + closure_get(&cl); enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); wake_up_process(t); } + + bch_notice(c, "%s", buf.buf); err: while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2)) ; diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index 1f9965ae..58590ccc 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -772,12 +772,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, trans->journal_res.offset += trans->journal_entries.u64s; trans->journal_res.u64s -= trans->journal_entries.u64s; - memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res, - BCH_JSET_ENTRY_write_buffer_keys, - BTREE_ID_accounting, 0, - trans->accounting.u64s)->_data, - btree_trans_subbuf_base(trans, &trans->accounting), - trans->accounting.u64s); + if (trans->accounting.u64s) + memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res, + BCH_JSET_ENTRY_write_buffer_keys, + BTREE_ID_accounting, 0, + trans->accounting.u64s)->_data, + btree_trans_subbuf_base(trans, &trans->accounting), + trans->accounting.u64s); if (trans->journal_seq) *trans->journal_seq = trans->journal_res.seq; @@ -1065,11 +1066,15 @@ int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags f EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - journal_u64s = jset_u64s(trans->accounting.u64s); + journal_u64s = 0; + trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); + if (trans->accounting.u64s) + journal_u64s += jset_u64s(trans->accounting.u64s); + trans_for_each_update(trans, i) { struct btree_path *path = trans->paths + i->path; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 312ef203..e4aa4fa7 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -14,6 +14,7 @@ #include "btree_locking.h" #include "buckets.h" #include "clock.h" +#include "disk_groups.h" #include "enumerated_ref.h" #include "error.h" #include "extents.h" @@ -277,6 +278,36 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, bch2_trans_node_drop(trans, b); } +static bool can_use_btree_node(struct bch_fs *c, + struct disk_reservation *res, + unsigned target, + struct bkey_s_c k) +{ + if (!bch2_bkey_devs_rw(c, k)) + return false; + + if (target && !bch2_bkey_in_target(c, k, target)) + return false; + + unsigned durability = bch2_bkey_durability(c, k); + + if (durability >= res->nr_replicas) + return true; + + struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_btree, target); + + guard(rcu)(); + + unsigned durability_available = 0, i; + for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) { + struct bch_dev *ca = bch2_dev_rcu_noerror(c, i); + if (ca) + durability_available += ca->mi.durability; + } + + return durability >= durability_available; +} + static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, @@ -303,10 +334,14 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { for (struct btree_alloc *a = c->btree_reserve_cache; - a < c->btree_reserve_cache + c->btree_reserve_cache_nr; - a++) { - if (target && !bch2_bkey_in_target(c, bkey_i_to_s_c(&a->k), target)) + a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { + /* check if it has sufficient durability */ + + if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) { + bch2_open_buckets_put(c, &a->ob); + *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; continue; + } bkey_copy(&b->key, &a->k); b->ob = a->ob; diff --git a/libbcachefs/clock.c b/libbcachefs/clock.c index 5185794f..1c6d0cdc 100644 --- a/libbcachefs/clock.c +++ b/libbcachefs/clock.c @@ -21,7 +21,7 @@ static const struct min_heap_callbacks callbacks = { void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) { - guard(spinlock)(&clock->timer_lock); + spin_lock(&clock->timer_lock); if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) { spin_unlock(&clock->timer_lock); @@ -31,9 +31,11 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) for (size_t i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) - return; + goto out; BUG_ON(!min_heap_push(&clock->timers, &timer, &callbacks, NULL)); +out: + spin_unlock(&clock->timer_lock); } void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 97d7655a..33cb94f7 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -462,7 +462,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * struct btree *b) { if (!out->nr_tabstops) - printbuf_tabstop_push(out, 32); + printbuf_tabstop_push(out, 36); prt_printf(out, "%px ", b); bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index 373d382b..efb58d2d 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -26,6 +26,13 @@ struct bch_inode_info; #if IS_ENABLED(CONFIG_UNICODE) int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, const struct qstr *, struct qstr *); +#else +static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + return bch_err_throw(trans->c, no_casefolding_without_utf8); +} +#endif static inline int bch2_maybe_casefold(struct btree_trans *trans, const struct bch_hash_info *info, @@ -38,14 +45,6 @@ static inline int bch2_maybe_casefold(struct btree_trans *trans, return bch2_casefold(trans, info, str, out_cf); } } -#else -static inline int bch2_maybe_casefold(struct btree_trans *trans, - const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) -{ - return bch_err_throw(trans->c, no_casefolding_without_utf8); -} -#endif struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 62dda821..bea14f02 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -2060,6 +2060,9 @@ allocated: BUG_ON(trans->restarted); return h; err: + if (waiting && + !bch2_err_matches(ret, BCH_ERR_operation_blocked)) + closure_wake_up(&c->freelist_wait); bch2_ec_stripe_head_put(c, h); return ERR_PTR(ret); } diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 2de0dc91..cec8b0f4 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -90,6 +90,8 @@ x(ENOMEM, ENOMEM_disk_accounting) \ x(ENOMEM, ENOMEM_stripe_head_alloc) \ x(ENOMEM, ENOMEM_journal_read_bucket) \ + x(ENOMEM, ENOMEM_acl) \ + x(ENOMEM, ENOMEM_move_extent) \ x(ENOSPC, ENOSPC_disk_reservation) \ x(ENOSPC, ENOSPC_bucket_alloc) \ x(ENOSPC, ENOSPC_disk_label_add) \ @@ -216,9 +218,13 @@ x(EINVAL, varint_decode_error) \ x(EINVAL, erasure_coding_found_btree_node) \ x(EINVAL, option_negative) \ + x(EINVAL, topology_repair) \ + x(BCH_ERR_topology_repair, topology_repair_drop_this_node) \ + x(BCH_ERR_topology_repair, topology_repair_drop_prev_node) \ + x(BCH_ERR_topology_repair, topology_repair_did_fill_from_scan) \ x(EOPNOTSUPP, may_not_use_incompat_feature) \ x(EOPNOTSUPP, no_casefolding_without_utf8) \ - x(EOPNOTSUPP, casefolding_disabled) \ + x(EOPNOTSUPP, casefolding_disabled) \ x(EOPNOTSUPP, casefold_opt_is_dir_only) \ x(EOPNOTSUPP, unsupported_fsx_flag) \ x(EOPNOTSUPP, unsupported_fa_flag) \ diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index b36ecfc0..8152ef1c 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -1006,6 +1006,20 @@ const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned return NULL; } +bool bch2_bkey_devs_rw(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + guard(rcu)(); + bkey_for_each_ptr(ptrs, ptr) { + CLASS(bch2_dev_tryget, ca)(c, ptr->dev); + if (!ca || ca->mi.state != BCH_MEMBER_STATE_rw) + return false; + } + + return true; +} + bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index f212f91c..35ee03cd 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -614,6 +614,8 @@ static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsig return (void *) bch2_bkey_has_device_c(k.s_c, dev); } +bool bch2_bkey_devs_rw(struct bch_fs *, struct bkey_s_c); + bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); bool bch2_bkey_in_target(struct bch_fs *, struct bkey_s_c, unsigned); diff --git a/libbcachefs/fs-io-direct.c b/libbcachefs/fs-io-direct.c index 73d44875..e53fee05 100644 --- a/libbcachefs/fs-io-direct.c +++ b/libbcachefs/fs-io-direct.c @@ -127,7 +127,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) * the dirtying of requests that are internal from the kernel (i.e. from * loopback), because we'll deadlock on page_lock. */ - dio->should_dirty = iter_is_iovec(iter); + dio->should_dirty = user_backed_iter(iter); blk_start_plug(&plug); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index cc203752..93ad33f0 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -223,9 +223,8 @@ static int bch2_flush_inode(struct bch_fs *c, if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fsync)) return -EROFS; - CLASS(btree_trans, trans)(c); u64 seq; - int ret = commit_do(trans, NULL, NULL, 0, + int ret = bch2_trans_commit_do(c, NULL, NULL, 0, bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: bch2_inode_flush_nocow_writes(c, inode); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 2789b30a..56b7126b 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -1295,8 +1295,14 @@ static int bch2_fill_extent(struct bch_fs *c, flags| FIEMAP_EXTENT_DELALLOC| FIEMAP_EXTENT_UNWRITTEN); + } else if (k.k->type == KEY_TYPE_error) { + return 0; } else { - BUG(); + WARN_ONCE(1, "unhandled key type %s", + k.k->type < KEY_TYPE_MAX + ? bch2_bkey_types[k.k->type] + : "(unknown)"); + return 0; } } diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index df0aa252..183b88bb 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1975,6 +1975,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, } } + ret = check_extent_overbig(trans, iter, k); + if (ret) + goto err; + ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) goto err; @@ -2021,8 +2025,7 @@ int bch2_check_extents(struct bch_fs *c) POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: - check_extent_overbig(trans, &iter, k); + check_extent(trans, &iter, k, &w, &s, &extent_ends, &res); })) ?: check_i_sectors_notnested(trans, &w); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 3ba1f9fd..97760e89 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -182,6 +182,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) void bch2_journal_do_writes(struct journal *j) { + struct bch_fs *c = container_of(j, struct bch_fs, journal); + for (u64 seq = journal_last_unwritten_seq(j); seq <= journal_cur_seq(j); seq++) { @@ -196,6 +198,7 @@ void bch2_journal_do_writes(struct journal *j) if (!journal_state_seq_count(j, j->reservations, seq)) { j->seq_write_started = seq; w->write_started = true; + closure_get(&c->cl); closure_call(&w->io, bch2_journal_write, j->wq, NULL); } @@ -1063,6 +1066,8 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open ? ERR_PTR(-EAGAIN) : buf; + if (!ret) + smp_mb(); break; } } diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index b46b9718..c05aa942 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -267,7 +267,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u { union journal_res_state s; - s.v = atomic64_sub_return(((union journal_res_state) { + s.v = atomic64_sub_return_release(((union journal_res_state) { .buf0_count = idx == 0, .buf1_count = idx == 1, .buf2_count = idx == 2, diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 2835250a..47224666 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1820,6 +1820,8 @@ static CLOSURE_CALLBACK(journal_write_done) if (do_discards) bch2_do_discards(c); + + closure_put(&c->cl); } static void journal_write_endio(struct bio *bio) diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index be50455c..f23e5ee9 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -874,7 +874,34 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, --type) if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) { *did_work = true; - return ret; + + /* + * Question from Dan Carpenter, on the early return: + * + * If journal_flush_pins_or_still_flushing() returns + * true, then the flush hasn't complete and we must + * return 0; we want the outer closure_wait_event() in + * journal_flush_pins() to continue. + * + * The early return is there because we don't want to + * call journal_entry_close() until we've finished + * flushing all outstanding journal pins - otherwise + * seq_to_flush can be U64_MAX, and we'll close a bunch + * of journal entries and write tiny ones completely + * unnecessarily. + * + * Having the early return be in the loop where we loop + * over types is important, because flushing one journal + * pin can cause new journal pins to be added (even of + * the same type, btree node writes may generate more + * btree node writes, when updating the parent pointer + * has a full node and has to trigger a split/compact). + * + * This is part of our shutdown sequence, where order of + * flushing is important in order to make sure that it + * terminates... + */ + return 0; } if (seq_to_flush > journal_cur_seq(j)) diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 3f44bb54..84a228c4 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -330,7 +330,7 @@ int bch2_move_extent(struct moving_context *ctxt, { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; - int ret = -ENOMEM; + int ret = 0; if (trace_io_move_enabled()) trace_io_move2(c, k, &io_opts, &data_opts); @@ -351,11 +351,10 @@ int bch2_move_extent(struct moving_context *ctxt, struct moving_io *io = allocate_dropping_locks(trans, ret, kzalloc(sizeof(struct moving_io), _gfp)); - if (!io) - goto err; - + if (!io && !ret) + ret = bch_err_throw(c, ENOMEM_move_extent); if (ret) - goto err_free; + goto err; INIT_LIST_HEAD(&io->io_list); io->write.ctxt = ctxt; @@ -366,7 +365,7 @@ int bch2_move_extent(struct moving_context *ctxt, ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, &io_opts, data_opts, iter->btree_id, k); if (ret) - goto err_free; + goto err; io->write.op.end_io = move_write_done; } else { @@ -380,7 +379,7 @@ int bch2_move_extent(struct moving_context *ctxt, ret = bch2_data_update_bios_init(&io->write, c, &io_opts); if (ret) - goto err_free; + goto err; } io->write.rbio.bio.bi_end_io = move_read_endio; @@ -423,9 +422,8 @@ int bch2_move_extent(struct moving_context *ctxt, BCH_READ_last_fragment, data_opts.scrub ? data_opts.read_dev : -1); return 0; -err_free: - kfree(io); err: + kfree(io); if (bch2_err_matches(ret, EROFS) || bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; @@ -795,50 +793,50 @@ out: return ret; } -int __bch2_move_data(struct moving_context *ctxt, - struct bbpos start, - struct bbpos end, - move_pred_fn pred, void *arg) +static int bch2_move_data(struct bch_fs *c, + struct bbpos start, + struct bbpos end, + unsigned min_depth, + struct bch_ratelimit *rate, + struct bch_move_stats *stats, + struct write_point_specifier wp, + bool wait_on_copygc, + move_pred_fn pred, void *arg) { - struct bch_fs *c = ctxt->trans->c; - enum btree_id id; int ret = 0; - for (id = start.btree; + struct moving_context ctxt; + bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + + for (enum btree_id id = start.btree; id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); id++) { - ctxt->stats->pos = BBPOS(id, POS_MIN); + ctxt.stats->pos = BBPOS(id, POS_MIN); - if (!btree_type_has_ptrs(id) || - !bch2_btree_id_root(c, id)->b) + if (!bch2_btree_id_root(c, id)->b) continue; - ret = bch2_move_data_btree(ctxt, - id == start.btree ? start.pos : POS_MIN, - id == end.btree ? end.pos : POS_MAX, - pred, arg, id, 0); + unsigned min_depth_this_btree = min_depth; + + if (!btree_type_has_ptrs(id)) + min_depth_this_btree = max(min_depth_this_btree, 1); + + for (unsigned level = min_depth_this_btree; + level < BTREE_MAX_DEPTH; + level++) { + ret = bch2_move_data_btree(&ctxt, + id == start.btree ? start.pos : POS_MIN, + id == end.btree ? end.pos : POS_MAX, + pred, arg, id, level); + if (ret) + break; + } + if (ret) break; } - return ret; -} - -int bch2_move_data(struct bch_fs *c, - struct bbpos start, - struct bbpos end, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc, - move_pred_fn pred, void *arg) -{ - struct moving_context ctxt; - - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - int ret = __bch2_move_data(&ctxt, start, end, pred, arg); bch2_moving_ctxt_exit(&ctxt); - return ret; } @@ -1206,14 +1204,6 @@ static bool migrate_pred(struct bch_fs *c, void *arg, return data_opts->rewrite_ptrs != 0; } -static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - return rereplicate_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), io_opts, data_opts); -} - /* * Ancient versions of bcachefs produced packed formats which could represent * keys that the in memory format cannot represent; this checks for those @@ -1293,15 +1283,6 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, return data_opts->kill_ptrs != 0; } -static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - return drop_extra_replicas_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), - io_opts, data_opts); -} - static bool scrub_pred(struct bch_fs *c, void *_arg, enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, @@ -1359,14 +1340,11 @@ int bch2_data_job(struct bch_fs *c, case BCH_DATA_OP_rereplicate: stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - ret = bch2_move_btree(c, start, end, - rereplicate_btree_pred, c, stats) ?: ret; - ret = bch2_move_data(c, start, end, - NULL, - stats, + ret = bch2_move_data(c, start, end, 0, NULL, stats, writepoint_hashed((unsigned long) current), true, rereplicate_pred, c) ?: ret; + bch2_btree_interior_updates_flush(c); ret = bch2_replicas_gc2(c) ?: ret; break; case BCH_DATA_OP_migrate: @@ -1389,12 +1367,10 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_scan_old_btree_nodes(c, stats); break; case BCH_DATA_OP_drop_extra_replicas: - ret = bch2_move_btree(c, start, end, - drop_extra_replicas_btree_pred, c, stats) ?: ret; - ret = bch2_move_data(c, start, end, NULL, stats, - writepoint_hashed((unsigned long) current), - true, - drop_extra_replicas_pred, c) ?: ret; + ret = bch2_move_data(c, start, end, 0, NULL, stats, + writepoint_hashed((unsigned long) current), + true, + drop_extra_replicas_pred, c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; default: diff --git a/libbcachefs/move.h b/libbcachefs/move.h index fe92ca6d..481026ff 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -128,18 +128,6 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos, move_pred_fn, void *, enum btree_id, unsigned); -int __bch2_move_data(struct moving_context *, - struct bbpos, - struct bbpos, - move_pred_fn, void *); -int bch2_move_data(struct bch_fs *, - struct bbpos start, - struct bbpos end, - struct bch_ratelimit *, - struct bch_move_stats *, - struct write_point_specifier, - bool, - move_pred_fn, void *); int bch2_move_data_phys(struct bch_fs *, unsigned, u64, u64, unsigned, struct bch_ratelimit *, struct bch_move_stats *, diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index a8eea478..58c159e5 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -37,78 +37,79 @@ int bch2_btree_lost_data(struct bch_fs *c, struct printbuf *msg, enum btree_id btree) { - u64 b = BIT_ULL(btree); int ret = 0; guard(mutex)(&c->sb_lock); + bool write_sb = false; struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - if (!(c->sb.btrees_lost_data & b)) { + if (!(c->sb.btrees_lost_data & BIT_ULL(btree))) { prt_printf(msg, "flagging btree "); bch2_btree_id_to_text(msg, btree); prt_printf(msg, " lost data\n"); - ext->btrees_lost_data |= cpu_to_le64(b); + write_sb |= !__test_and_set_bit_le64(btree, &ext->btrees_lost_data); } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; /* Btree node accounting will be off: */ - __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret; + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; - - __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; + + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0, &write_sb) ?: ret; goto out; case BTREE_ID_need_discard: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_freespace: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_lru: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_accounting: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret; goto out; case BTREE_ID_snapshots: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret; goto out; default: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret; goto out; } out: - bch2_write_super(c); + if (write_sb) + bch2_write_super(c); return ret; } diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c index f9d1c492..b2cdd111 100644 --- a/libbcachefs/recovery_passes.c +++ b/libbcachefs/recovery_passes.c @@ -340,7 +340,8 @@ static bool recovery_pass_needs_set(struct bch_fs *c, int __bch2_run_explicit_recovery_pass(struct bch_fs *c, struct printbuf *out, enum bch_recovery_pass pass, - enum bch_run_recovery_pass_flags flags) + enum bch_run_recovery_pass_flags flags, + bool *write_sb) { struct bch_fs_recovery *r = &c->recovery; int ret = 0; @@ -362,7 +363,8 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, if (!(flags & RUN_RECOVERY_PASS_nopersistent)) { struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); + *write_sb |= !__test_and_set_bit_le64(bch2_recovery_pass_to_stable(pass), + ext->recovery_passes_required); } if (pass < BCH_RECOVERY_PASS_set_may_go_rw && @@ -408,14 +410,19 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass, enum bch_run_recovery_pass_flags flags) { - int ret = 0; + /* + * With RUN_RECOVERY_PASS_ratelimit, recovery_pass_needs_set needs + * sb_lock + */ + if (!(flags & RUN_RECOVERY_PASS_ratelimit) && + !recovery_pass_needs_set(c, pass, &flags)) + return 0; - if (recovery_pass_needs_set(c, pass, &flags)) { - guard(mutex)(&c->sb_lock); - ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); + guard(mutex)(&c->sb_lock); + bool write_sb = false; + int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb); + if (write_sb) bch2_write_super(c); - } - return ret; } @@ -438,14 +445,13 @@ int bch2_require_recovery_pass(struct bch_fs *c, return 0; enum bch_run_recovery_pass_flags flags = 0; - int ret = 0; - if (recovery_pass_needs_set(c, pass, &flags)) { - ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); + bool write_sb = false; + int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb) ?: + bch_err_throw(c, recovery_pass_will_run); + if (write_sb) bch2_write_super(c); - } - - return ret ?: bch_err_throw(c, recovery_pass_will_run); + return ret; } int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) @@ -459,8 +465,10 @@ int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pa bch2_log_msg_start(c, &buf); guard(mutex)(&c->sb_lock); + bool write_sb = false; int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass, - RUN_RECOVERY_PASS_nopersistent); + RUN_RECOVERY_PASS_nopersistent, + &write_sb); bch2_print_str(c, KERN_NOTICE, buf.buf); return ret; diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/recovery_passes.h index 2117f0ce..4f2c2f81 100644 --- a/libbcachefs/recovery_passes.h +++ b/libbcachefs/recovery_passes.h @@ -30,7 +30,8 @@ int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pas int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, enum bch_recovery_pass, - enum bch_run_recovery_pass_flags); + enum bch_run_recovery_pass_flags, + bool *); int bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, enum bch_recovery_pass, enum bch_run_recovery_pass_flags); diff --git a/libbcachefs/str_hash.c b/libbcachefs/str_hash.c index dfe4b6ae..3e08e55d 100644 --- a/libbcachefs/str_hash.c +++ b/libbcachefs/str_hash.c @@ -329,7 +329,6 @@ duplicate_entries: out: fsck_err: bch2_trans_iter_exit(trans, dup_iter); - printbuf_exit(&buf); if (free_snapshots_seen) darray_exit(&s->ids); return ret; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 4e038f65..0fc0b222 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -514,6 +514,10 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (ret) return ret; + ret = bch2_fs_mark_dirty(c); + if (ret) + return ret; + clear_bit(BCH_FS_clean_shutdown, &c->flags); scoped_guard(rcu) @@ -537,10 +541,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_journal_space_available(&c->journal); } - ret = bch2_fs_mark_dirty(c); - if (ret) - return ret; - /* * Don't jump to our error path, and call bch2_fs_read_only(), unless we * successfully marked the filesystem dirty diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 158f526e..bd3fa9c3 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -18,6 +18,7 @@ #include "btree_key_cache.h" #include "btree_update.h" #include "btree_update_interior.h" +#include "btree_write_buffer.h" #include "btree_gc.h" #include "buckets.h" #include "clock.h" @@ -150,6 +151,7 @@ write_attribute(trigger_journal_flush); write_attribute(trigger_journal_writes); write_attribute(trigger_btree_cache_shrink); write_attribute(trigger_btree_key_cache_shrink); +write_attribute(trigger_btree_write_buffer_flush); write_attribute(trigger_btree_updates); write_attribute(trigger_freelist_wakeup); write_attribute(trigger_recalc_capacity); @@ -539,6 +541,11 @@ STORE(bch2_fs) c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc); } + if (attr == &sysfs_trigger_btree_write_buffer_flush) + bch2_trans_do(c, + (bch2_btree_write_buffer_flush_sync(trans), + bch2_trans_begin(trans))); + if (attr == &sysfs_trigger_gc) bch2_gc_gens(c); @@ -709,6 +716,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_journal_writes, &sysfs_trigger_btree_cache_shrink, &sysfs_trigger_btree_key_cache_shrink, + &sysfs_trigger_btree_write_buffer_flush, &sysfs_trigger_btree_updates, &sysfs_trigger_freelist_wakeup, &sysfs_trigger_recalc_capacity, diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 768528c2..52ac8230 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -733,6 +733,13 @@ static inline bool test_bit_le64(size_t bit, __le64 *addr) return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; } +static inline bool __test_and_set_bit_le64(size_t bit, __le64 *addr) +{ + bool ret = test_bit_le64(bit, addr); + __set_bit_le64(bit, addr); + return ret; +} + static inline void memcpy_swab(void *_dst, void *_src, size_t len) { u8 *dst = _dst + len; diff --git a/linux/closure.c b/linux/closure.c index 2bfe7d2a..4fb78d18 100644 --- a/linux/closure.c +++ b/linux/closure.c @@ -13,23 +13,25 @@ #include <linux/seq_file.h> #include <linux/sched/debug.h> -static inline void closure_put_after_sub_checks(int flags) +static inline void closure_put_after_sub_checks(struct closure *cl, int flags) { int r = flags & CLOSURE_REMAINING_MASK; if (WARN(flags & CLOSURE_GUARD_MASK, - "closure has guard bits set: %x (%u)", + "closure %ps has guard bits set: %x (%u)", + cl->fn, flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r))) r &= ~CLOSURE_GUARD_MASK; WARN(!r && (flags & ~CLOSURE_DESTRUCTOR), - "closure ref hit 0 with incorrect flags set: %x (%u)", + "closure %ps ref hit 0 with incorrect flags set: %x (%u)", + cl->fn, flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags)); } static inline void closure_put_after_sub(struct closure *cl, int flags) { - closure_put_after_sub_checks(flags); + closure_put_after_sub_checks(cl, flags); if (!(flags & CLOSURE_REMAINING_MASK)) { smp_acquire__after_ctrl_dep(); @@ -167,7 +169,7 @@ void __sched closure_return_sync(struct closure *cl) unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR, &cl->remaining); - closure_put_after_sub_checks(flags); + closure_put_after_sub_checks(cl, flags); if (unlikely(flags & CLOSURE_REMAINING_MASK)) { while (1) { |