diff options
Diffstat (limited to 'fs/bcachefs')
28 files changed, 233 insertions, 139 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index f1d35b7f3fc5..55c21e9b4c52 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -20,6 +20,7 @@ #include "enumerated_ref.h" #include "error.h" #include "lru.h" +#include "progress.h" #include "recovery.h" #include "varint.h" @@ -389,7 +390,7 @@ void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) if (k.k->type == KEY_TYPE_alloc_v4) { void *src, *dst; - *out = *bkey_s_c_to_alloc_v4(k).v; + bkey_val_copy(out, bkey_s_c_to_alloc_v4(k)); src = alloc_v4_backpointers(out); SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); @@ -1553,6 +1554,9 @@ int bch2_check_alloc_info(struct bch_fs *c) struct bkey_s_c k; int ret = 0; + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch); @@ -1576,6 +1580,8 @@ int bch2_check_alloc_info(struct bch_fs *c) if (!k.k) break; + progress_update_iter(trans, &progress, &iter); + if (k.k->type) { next = bpos_nosnap_successor(k.k->p); @@ -1736,12 +1742,16 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); + CLASS(btree_trans, trans)(c); int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)) ?: - bch2_check_stripe_to_lru_refs(trans); + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed); + }))?: bch2_check_stripe_to_lru_refs(trans); bch2_bkey_buf_exit(&last_flushed, c); return ret; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 8a6f886b5bf2..45c15bdaa6f4 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1277,4 +1277,11 @@ static inline int bch2_fs_casefold_enabled(struct bch_fs *c) return 0; } +static inline const char *strip_bch2(const char *msg) +{ + if (!strncmp("bch2_", msg, 5)) + return msg + 5; + return msg; +} + #endif /* _BCACHEFS_H */ diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 23ed7393f07f..25b01e750880 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -511,7 +511,7 @@ restart: if (btree_node_accessed(b)) { clear_btree_node_accessed(b); bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; - --touched;; + --touched; } else if (!btree_node_reclaim(c, b)) { __bch2_btree_node_hash_remove(bc, b); __btree_node_data_free(b); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index bd86dd7151a1..8a03cd75a64f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1405,10 +1405,8 @@ static void btree_node_read_work(struct work_struct *work) ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), &failed, &rb->pick, -1); - if (ret <= 0) { - set_btree_node_read_error(b); + if (ret <= 0) break; - } ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = ca != NULL; @@ -1442,27 +1440,21 @@ start: bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); - if (ret == -BCH_ERR_btree_node_read_err_want_retry || - ret == -BCH_ERR_btree_node_read_err_must_retry) - continue; - - if (ret) - set_btree_node_read_error(b); - - break; + if (ret != -BCH_ERR_btree_node_read_err_want_retry && + ret != -BCH_ERR_btree_node_read_err_must_retry) + break; } bch2_io_failures_to_text(&buf, c, &failed); - if (btree_node_read_error(b)) - bch2_btree_lost_data(c, &buf, b->c.btree_id); - /* * only print retry success if we read from a replica with no errors */ - if (btree_node_read_error(b)) + if (ret) { + set_btree_node_read_error(b); + bch2_btree_lost_data(c, &buf, b->c.btree_id); prt_printf(&buf, "ret %s", bch2_err_str(ret)); - else if (failed.nr) { + } else if (failed.nr) { if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) prt_printf(&buf, "retry success"); else @@ -2019,7 +2011,7 @@ static void btree_node_scrub_work(struct work_struct *work) bch_err_fn_ratelimited(c, ret); } - bch2_bkey_buf_exit(&scrub->key, c);; + bch2_bkey_buf_exit(&scrub->key, c); btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub); kfree(scrub); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index a282c3886168..dd716b35a11d 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2744,7 +2744,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct } /* Extents can straddle iter->pos: */ - iter->pos = bpos_min(iter->pos, k.k->p);; + iter->pos = bpos_min(iter->pos, k.k->p); if (iter->flags & BTREE_ITER_filter_snapshots) iter->pos.snapshot = iter->snapshot; diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 682898fb82b1..4b7b5ca74ba1 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -155,13 +155,8 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH) return; - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); - bio->bi_iter.bi_sector = offset; - bch2_bio_map(bio, b->data, c->opts.btree_node_size); - - submit_time = local_clock(); - submit_bio_wait(bio); - bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); + if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX) + return; rcu_read_lock(); struct found_btree_node n = { @@ -179,17 +174,18 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, }; rcu_read_unlock(); - found_btree_node_to_key(&b->key, &n); - - CLASS(printbuf, buf)(); - bool can_read = !bch2_btree_node_read_done(c, ca, b, NULL, &buf); + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, b->data, c->opts.btree_node_size); - printbuf_reset(&buf); - found_btree_node_to_text(&buf, c, &n); + submit_time = local_clock(); + submit_bio_wait(bio); + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); - bch_verbose(ca, "found %sreadable node %s", can_read ? "" : "un", buf.buf); + found_btree_node_to_key(&b->key, &n); - if (can_read) { + CLASS(printbuf, buf)(); + if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) { /* read_done will swap out b->data for another buffer */ bn = b->data; /* diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 58590ccc26bd..8b94a8156fbf 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -969,7 +969,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans, BUG_ON(current != c->recovery_task); struct bkey_i *accounting; - +retry: percpu_down_read(&c->mark_lock); for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); accounting != btree_trans_subbuf_top(trans, &trans->accounting); @@ -1025,13 +1025,17 @@ fatal_err: bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); percpu_down_read(&c->mark_lock); revert_fs_usage: - BUG(); - /* error path not handled by __bch2_trans_commit() */ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); i != accounting; i = bkey_next(i)) bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); percpu_up_read(&c->mark_lock); + + if (bch2_err_matches(ret, BCH_ERR_btree_insert_need_mark_replicas)) { + ret = drop_locks_do(trans, bch2_accounting_update_sb(trans)); + if (!ret) + goto retry; + } return ret; } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 76adf75617aa..e4870fbc11d0 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -485,7 +485,7 @@ typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace; struct btree_trans_subbuf { u16 base; u16 u64s; - u16 size;; + u16 size; }; struct btree_trans { @@ -854,15 +854,15 @@ static inline bool btree_node_type_is_extents(enum btree_node_type type) return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1); } -static inline bool btree_type_has_snapshots(enum btree_id btree) -{ - const u64 mask = 0 +static const u64 btree_has_snapshots_mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr) - BCH_BTREE_IDS() +BCH_BTREE_IDS() #undef x - ; +; - return BIT_ULL(btree) & mask; +static inline bool btree_type_has_snapshots(enum btree_id btree) +{ + return BIT_ULL(btree) & btree_has_snapshots_mask; } static inline bool btree_type_has_snapshot_field(enum btree_id btree) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index dd60c47528da..1b891ac43053 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -214,11 +214,13 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); - prt_printf(out, "%.*s", d_name.len, d_name.name); + prt_bytes(out, d_name.name, d_name.len); if (d.v->d_casefold) { + prt_str(out, " (casefold "); struct qstr d_name = bch2_dirent_get_lookup_name(d); - prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name); + prt_bytes(out, d_name.name, d_name.len); + prt_char(out, ')'); } prt_str(out, " ->"); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 8152ef1cbbcd..b879a586b7f6 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -282,9 +282,9 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (have_pick) return 1; - if (!have_dirty_ptrs) + if (!have_dirty_ptrs && !bkey_is_btree_ptr(k.k)) return 0; - if (have_missing_devs) + if (have_missing_devs || !have_dirty_ptrs) return bch_err_throw(c, no_device_to_read_from); if (have_csum_errors) return bch_err_throw(c, data_read_csum_err); diff --git a/fs/bcachefs/fast_list.h b/fs/bcachefs/fast_list.h index 73c9bf591fd6..f67df3f72ee2 100644 --- a/fs/bcachefs/fast_list.h +++ b/fs/bcachefs/fast_list.h @@ -9,7 +9,7 @@ struct fast_list_pcpu; struct fast_list { GENRADIX(void *) items; - struct ida slots_allocated;; + struct ida slots_allocated; struct fast_list_pcpu __percpu *buffer; }; diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index 2a6705186c44..469492f6264a 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -635,6 +635,8 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) goto out; } + inode->ei_last_dirtied = (unsigned long) current; + bch2_set_folio_dirty(c, inode, folio, &res, offset, len); bch2_folio_reservation_put(c, inode, &res); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 183b88bbd402..f5a9da40c647 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -15,6 +15,7 @@ #include "io_misc.h" #include "keylist.h" #include "namei.h" +#include "progress.h" #include "recovery_passes.h" #include "snapshot.h" #include "super.h" @@ -1331,11 +1332,16 @@ int bch2_check_inodes(struct bch_fs *c) CLASS(btree_trans, trans)(c); CLASS(snapshots_seen, s)(); + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_inodes)); + return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_inode(trans, &iter, k, &snapshot_root, &s)); + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + check_inode(trans, &iter, k, &snapshot_root, &s); + })); } static int find_oldest_inode_needs_reattach(struct btree_trans *trans, @@ -1422,12 +1428,17 @@ fsck_err: */ int bch2_check_unreachable_inodes(struct bch_fs *c) { + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_inodes)); + CLASS(btree_trans, trans)(c); return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_unreachable_inode(trans, &iter, k)); + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + check_unreachable_inode(trans, &iter, k); + })); } static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) @@ -1961,11 +1972,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, "extent type past end of inode %llu:%u, i_size %llu\n%s", i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = bch2_fpunch_snapshot(trans, - SPOS(i->inode.bi_inum, - last_block, - i->inode.bi_snapshot), - POS(i->inode.bi_inum, U64_MAX)); + ret = snapshots_seen_add_inorder(c, s, i->inode.bi_snapshot) ?: + bch2_fpunch_snapshot(trans, + SPOS(i->inode.bi_inum, + last_block, + i->inode.bi_snapshot), + POS(i->inode.bi_inum, U64_MAX)); if (ret) goto err; @@ -2021,9 +2033,13 @@ int bch2_check_extents(struct bch_fs *c) CLASS(inode_walker, w)(); CLASS(extent_ends, extent_ends)(); + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_extents)); + int ret = for_each_btree_key(trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ + progress_update_iter(trans, &progress, &iter); bch2_disk_reservation_put(c, &res); check_extent(trans, &iter, k, &w, &s, &extent_ends, &res); })) ?: @@ -2038,11 +2054,15 @@ int bch2_check_indirect_extents(struct bch_fs *c) CLASS(btree_trans, trans)(c); struct disk_reservation res = { 0 }; + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_reflink)); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); bch2_disk_reservation_put(c, &res); check_extent_overbig(trans, &iter, k); })); @@ -2451,15 +2471,20 @@ int bch2_check_dirents(struct bch_fs *c) CLASS(snapshots_seen, s)(); CLASS(inode_walker, dir)(); CLASS(inode_walker, target)(); + struct progress_indicator_state progress; bool need_second_pass = false, did_second_pass = false; int ret; again: + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_dirents)); + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s, - &need_second_pass)) ?: + &need_second_pass); + })) ?: check_subdir_count_notnested(trans, &dir); if (!ret && need_second_pass && !did_second_pass) { @@ -2519,13 +2544,18 @@ int bch2_check_xattrs(struct bch_fs *c) CLASS(btree_trans, trans)(c); CLASS(inode_walker, inode)(); + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_xattrs)); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - check_xattr(trans, &iter, k, &hash_info, &inode)); + BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + check_xattr(trans, &iter, k, &hash_info, &inode); + })); return ret; } @@ -2667,10 +2697,16 @@ err: int bch2_check_subvolume_structure(struct bch_fs *c) { CLASS(btree_trans, trans)(c); + + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_subvolumes)); + return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol_path(trans, &iter, k)); + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + check_subvol_path(trans, &iter, k); + })); } static int bch2_bi_depth_renumber_one(struct btree_trans *trans, diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d7620138e038..44b02d4b6502 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -89,7 +89,12 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) new = ewma_add(old, io_latency, 5); } while (!atomic64_try_cmpxchg(latency, &old, new)); - bch2_congested_acct(ca, io_latency, now, rw); + /* + * Only track read latency for congestion accounting: writes are subject + * to heavy queuing delays from page cache writeback: + */ + if (rw == READ) + bch2_congested_acct(ca, io_latency, now, rw); __bch2_time_stats_update(&ca->io_latency[rw].stats, submit_time, now); } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 97760e89e5a3..07869436a964 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1060,14 +1060,13 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou if (open && !*blocked) { __bch2_journal_block(j); + s.v = atomic64_read_acquire(&j->reservations.counter); *blocked = true; } ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open ? ERR_PTR(-EAGAIN) : buf; - if (!ret) - smp_mb(); break; } } @@ -1297,7 +1296,7 @@ int bch2_dev_journal_bucket_delete(struct bch_dev *ca, u64 b) return -EINVAL; } - u64 *new_buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);; + u64 *new_buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); if (!new_buckets) return bch_err_throw(c, ENOMEM_set_nr_journal_buckets); @@ -1472,6 +1471,10 @@ int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) last_seq = cur_seq; u64 nr = cur_seq - last_seq; + if (nr * sizeof(struct journal_entry_pin_list) > 1U << 30) { + bch_err(c, "too many ntjournal fifo (%llu open entries)", nr); + return bch_err_throw(c, ENOMEM_journal_pin_fifo); + } /* * Extra fudge factor, in case we crashed when the journal pin fifo was @@ -1484,7 +1487,7 @@ int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) nr = max(nr, JOURNAL_PIN); init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL); if (!j->pin.data) { - bch_err(c, "error reallocating journal fifo (%llu open entries)", nr); + bch_err(c, "error allocating journal fifo (%llu open entries)", nr); return bch_err_throw(c, ENOMEM_journal_pin_fifo); } diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index c05aa94237f8..b46b9718d841 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -267,7 +267,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u { union journal_res_state s; - s.v = atomic64_sub_return_release(((union journal_res_state) { + s.v = atomic64_sub_return(((union journal_res_state) { .buf0_count = idx == 0, .buf1_count = idx == 1, .buf2_count = idx == 2, diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 47224666d07e..093e4acad085 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -428,15 +428,22 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs bool first = true; jset_entry_for_each_key(entry, k) { - /* We may be called on entries that haven't been validated: */ - if (!k->k.u64s) - break; - if (!first) { prt_newline(out); bch2_prt_jset_entry_type(out, entry->type); prt_str(out, ": "); } + /* We may be called on entries that haven't been validated: */ + if (!k->k.u64s) { + prt_str(out, "(invalid, k->u64s 0)"); + break; + } + + if (bkey_next(k) > vstruct_last(entry)) { + prt_str(out, "(invalid, bkey overruns jset_entry)"); + break; + } + bch2_btree_id_level_to_text(out, entry->btree_id, entry->level); prt_char(out, ' '); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index ee14656c3fdd..76109b37d681 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -9,6 +9,7 @@ #include "ec.h" #include "error.h" #include "lru.h" +#include "progress.h" #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ @@ -207,11 +208,16 @@ int bch2_check_lrus(struct bch_fs *c) bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_lru)); + CLASS(btree_trans, trans)(c); int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_lru_key(trans, &iter, k, &last_flushed)); + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + bch2_check_lru_key(trans, &iter, k, &last_flushed); + })); bch2_bkey_buf_exit(&last_flushed, c); return ret; diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c index 42353067ba28..792fc6fef270 100644 --- a/fs/bcachefs/progress.c +++ b/fs/bcachefs/progress.c @@ -52,7 +52,8 @@ void bch2_progress_update_iter(struct btree_trans *trans, : 0; prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ", - msg, percent, s->nodes_seen, s->nodes_total); + strip_bch2(msg), + percent, s->nodes_seen, s->nodes_total); bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); bch_info(c, "%s", buf.buf); diff --git a/fs/bcachefs/progress.h b/fs/bcachefs/progress.h index 23fb1811f943..972a73087ffe 100644 --- a/fs/bcachefs/progress.h +++ b/fs/bcachefs/progress.h @@ -26,4 +26,7 @@ void bch2_progress_update_iter(struct btree_trans *, struct btree_iter *, const char *); +#define progress_update_iter(trans, p, iter) \ + bch2_progress_update_iter(trans, p, iter, __func__) + #endif /* _BCACHEFS_PROGRESS_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 32fa7cf90b63..c7e7f508fd0b 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -15,6 +15,7 @@ #include "inode.h" #include "io_write.h" #include "move.h" +#include "progress.h" #include "rebalance.h" #include "subvolume.h" #include "super-io.h" @@ -858,7 +859,12 @@ int bch2_check_rebalance_work(struct bch_fs *c) bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_rebalance_work)); + while (!ret) { + progress_update_iter(trans, &progress, &rebalance_iter); + bch2_trans_begin(trans); ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 58c159e5f10d..304473dac268 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -67,13 +67,16 @@ int bch2_btree_lost_data(struct bch_fs *c, ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_backpointer_to_missing_ptr, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + switch (btree) { case BTREE_ID_alloc: ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; - write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); - write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index b2cdd111fd0e..bd442652d0f5 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -639,6 +639,8 @@ void bch2_recovery_pass_status_to_text(struct printbuf *out, struct bch_fs *c) prt_printf(out, "Current pass:\t%s\n", bch2_recovery_passes[r->curr_pass]); prt_passes(out, "Current passes", r->passes_to_run); } + + prt_printf(out, "Pass done:\t%s\n", bch2_recovery_passes[r->pass_done]); } void bch2_fs_recovery_passes_init(struct bch_fs *c) diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 4f2c2f811d5e..95e3612bb96c 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -26,6 +26,12 @@ static inline bool go_rw_in_recovery(struct bch_fs *c) (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))); } +static inline bool recovery_pass_will_run(struct bch_fs *c, enum bch_recovery_pass pass) +{ + return unlikely(test_bit(BCH_FS_in_recovery, &c->flags) && + c->recovery.passes_to_run & BIT_ULL(pass)); +} + int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h index fb72ad730518..b2b892687cdd 100644 --- a/fs/bcachefs/sb-members_format.h +++ b/fs/bcachefs/sb-members_format.h @@ -17,7 +17,7 @@ UUID_INIT(0xffffffff, 0xffff, 0xffff, \ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef) -#define BCH_MIN_NR_NBUCKETS (1 << 6) +#define BCH_MIN_NR_NBUCKETS (1 << 9) #define BCH_IOPS_MEASUREMENTS() \ x(seqread, 0) \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 7a801513b134..5370ccb85d2d 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -11,6 +11,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "progress.h" #include "recovery_passes.h" #include "snapshot.h" @@ -142,7 +143,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) guard(rcu)(); struct snapshot_table *t = rcu_dereference(c->snapshots); - if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots)) + if (unlikely(recovery_pass_will_run(c, BCH_RECOVERY_PASS_check_snapshots))) return __bch2_snapshot_is_ancestor_early(t, id, ancestor); if (likely(ancestor >= IS_ANCESTOR_BITMAP)) @@ -364,31 +365,32 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, /* fsck: */ -static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) +static u32 bch2_snapshot_child(struct snapshot_table *t, + u32 id, unsigned child) { - return snapshot_t(c, id)->children[child]; + return __snapshot_t(t, id)->children[child]; } -static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) +static u32 bch2_snapshot_left_child(struct snapshot_table *t, u32 id) { - return bch2_snapshot_child(c, id, 0); + return bch2_snapshot_child(t, id, 0); } -static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) +static u32 bch2_snapshot_right_child(struct snapshot_table *t, u32 id) { - return bch2_snapshot_child(c, id, 1); + return bch2_snapshot_child(t, id, 1); } -static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) +static u32 bch2_snapshot_tree_next(struct snapshot_table *t, u32 id) { u32 n, parent; - n = bch2_snapshot_left_child(c, id); + n = bch2_snapshot_left_child(t, id); if (n) return n; - while ((parent = bch2_snapshot_parent(c, id))) { - n = bch2_snapshot_right_child(c, parent); + while ((parent = __bch2_snapshot_parent(t, id))) { + n = bch2_snapshot_right_child(t, parent); if (n && n != id) return n; id = parent; @@ -401,17 +403,18 @@ u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root, snapshot_id_list *skip) { guard(rcu)(); + struct snapshot_table *t = rcu_dereference(c->snapshots); u32 id, subvol = 0, s; retry: id = snapshot_root; - while (id && bch2_snapshot_exists(c, id)) { + while (id && __bch2_snapshot_exists(t, id)) { if (!(skip && snapshot_list_has_id(skip, id))) { - s = snapshot_t(c, id)->subvol; + s = __snapshot_t(t, id)->subvol; if (s && (!subvol || s < subvol)) subvol = s; } - id = bch2_snapshot_tree_next(c, id); + id = bch2_snapshot_tree_next(t, id); if (id == snapshot_root) break; } @@ -973,12 +976,16 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) struct snapshot_tree_reconstruct r = {}; int ret = 0; + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, btree_has_snapshots_mask); + for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { if (btree_type_has_snapshots(btree)) { r.btree = btree; ret = for_each_btree_key(trans, iter, btree, POS_MIN, BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ + progress_update_iter(trans, &progress, &iter); get_snapshot_trees(c, &r, k.k->p); })); if (ret) @@ -1424,38 +1431,22 @@ static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id) return i ? i->live_child : 0; } -static unsigned __live_child(struct snapshot_table *t, u32 id, - snapshot_id_list *delete_leaves, - interior_delete_list *delete_interior) -{ - struct snapshot_t *s = __snapshot_t(t, id); - if (!s) - return 0; - - for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) - if (s->children[i] && - !snapshot_list_has_id(delete_leaves, s->children[i]) && - !interior_delete_has_id(delete_interior, s->children[i])) - return s->children[i]; - - for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) { - u32 live_child = s->children[i] - ? __live_child(t, s->children[i], delete_leaves, delete_interior) - : 0; - if (live_child) - return live_child; - } - - return 0; -} - -static unsigned live_child(struct bch_fs *c, u32 id) +static unsigned live_child(struct bch_fs *c, u32 start) { struct snapshot_delete *d = &c->snapshot_delete; guard(rcu)(); - return __live_child(rcu_dereference(c->snapshots), id, - &d->delete_leaves, &d->delete_interior); + struct snapshot_table *t = rcu_dereference(c->snapshots); + + for (u32 id = bch2_snapshot_tree_next(t, start); + id && id != start; + id = bch2_snapshot_tree_next(t, id)) + if (bch2_snapshot_is_leaf(c, id) && + !snapshot_list_has_id(&d->delete_leaves, id) && + !interior_delete_has_id(&d->delete_interior, id)) + return id; + + return 0; } static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id) @@ -1712,12 +1703,14 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, interior_delete_list *skip) { guard(rcu)(); + struct snapshot_table *t = rcu_dereference(c->snapshots); + while (interior_delete_has_id(skip, id)) - id = __bch2_snapshot_parent(c, id); + id = __bch2_snapshot_parent(t, id); while (n--) { do { - id = __bch2_snapshot_parent(c, id); + id = __bch2_snapshot_parent(t, id); } while (interior_delete_has_id(skip, id)); } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 6dcb118b0fbd..fef32a0118c4 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -63,19 +63,19 @@ static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) return __bch2_snapshot_parent_early(c, id); } -static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) +static inline u32 __bch2_snapshot_parent(struct snapshot_table *t, u32 id) { - const struct snapshot_t *s = snapshot_t(c, id); + const struct snapshot_t *s = __snapshot_t(t, id); if (!s) return 0; u32 parent = s->parent; if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && parent && - s->depth != snapshot_t(c, parent)->depth + 1) + s->depth != __snapshot_t(t, parent)->depth + 1) panic("id %u depth=%u parent %u depth=%u\n", - id, snapshot_t(c, id)->depth, - parent, snapshot_t(c, parent)->depth); + id, __snapshot_t(t, id)->depth, + parent, __snapshot_t(t, parent)->depth); return parent; } @@ -83,14 +83,16 @@ static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) { guard(rcu)(); - return __bch2_snapshot_parent(c, id); + return __bch2_snapshot_parent(rcu_dereference(c->snapshots), id); } static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) { guard(rcu)(); + struct snapshot_table *t = rcu_dereference(c->snapshots); + while (n--) - id = __bch2_snapshot_parent(c, id); + id = __bch2_snapshot_parent(t, id); return id; } @@ -100,23 +102,29 @@ u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32); static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) { guard(rcu)(); + struct snapshot_table *t = rcu_dereference(c->snapshots); u32 parent; - while ((parent = __bch2_snapshot_parent(c, id))) + while ((parent = __bch2_snapshot_parent(t, id))) id = parent; return id; } -static inline enum snapshot_id_state __bch2_snapshot_id_state(struct bch_fs *c, u32 id) +static inline enum snapshot_id_state __bch2_snapshot_id_state(struct snapshot_table *t, u32 id) { - const struct snapshot_t *s = snapshot_t(c, id); + const struct snapshot_t *s = __snapshot_t(t, id); return s ? s->state : SNAPSHOT_ID_empty; } static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs *c, u32 id) { guard(rcu)(); - return __bch2_snapshot_id_state(c, id); + return __bch2_snapshot_id_state(rcu_dereference(c->snapshots), id); +} + +static inline bool __bch2_snapshot_exists(struct snapshot_table *t, u32 id) +{ + return __bch2_snapshot_id_state(t, id) == SNAPSHOT_ID_live; } static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0fc0b2221036..b3b2d8353a36 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -729,6 +729,8 @@ void __bch2_fs_stop(struct bch_fs *c) cancel_work_sync(&ca->io_error_work); cancel_work_sync(&c->read_only_work); + + flush_work(&c->btree_interior_update_work); } void bch2_fs_free(struct bch_fs *c) |