summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-01-05 17:47:10 -0800
committerDarrick J. Wong <djwong@kernel.org>2021-03-25 17:08:52 -0700
commit27669f806cc7dd75329bc93751bb58298e7e02eb (patch)
tree2c8c5613cdfc6f9ab0918c45e326de48e378abbb
parent68982df17e98c68896294470d757c128015b82b1 (diff)
xfs: online repair of the realtime refcount btreerealtime-reflink_2021-03-25
Port the data device's refcount btree repair code to the realtime refcount btree. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/bmap_repair.c3
-rw-r--r--fs/xfs/scrub/repair.c14
-rw-r--r--fs/xfs/scrub/repair.h5
-rw-r--r--fs/xfs/scrub/rtrefcount_repair.c706
-rw-r--r--fs/xfs/scrub/rtrmap_repair.c3
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/trace.h22
8 files changed, 748 insertions, 8 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 62b7cd6a8417..fe2ee79e8e93 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -197,6 +197,7 @@ xfs-y += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
rtbitmap_repair.o \
+ rtrefcount_repair.o \
rtrmap_repair.o \
rtsummary_repair.o \
)
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 0eb1a3de12a9..428e8d1a3322 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -287,7 +287,8 @@ xrep_bmap_check_rtfork_rmap(
return -EFSCORRUPTED;
/* Make sure this isn't free space. */
- return xrep_rtext_is_free(sc, rec->rm_startblock, rec->rm_blockcount);
+ return xrep_rtext_is_free(sc, rec->rm_startblock, rec->rm_blockcount,
+ false);
}
/* Record realtime extents that belong to this inode's fork. */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 740d86ec2555..4352511c9431 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -2263,14 +2263,15 @@ xrep_swapext_prep(
}
/*
- * Check if any part of this range of rt blocks is free, so that we don't
- * rebuild things with bad records. Returns -EFSCORRUPTED if bad.
+ * Check if any part of this range of rt blocks is free or misaligned, so that
+ * we don't rebuild things with bad records. Returns -EFSCORRUPTED if bad.
*/
int
xrep_rtext_is_free(
struct xfs_scrub *sc,
xfs_rtblock_t rtbno,
- xfs_filblks_t len)
+ xfs_filblks_t len,
+ bool must_align)
{
struct xfs_mount *mp = sc->mp;
xfs_rtblock_t startext;
@@ -2280,9 +2281,13 @@ xrep_rtext_is_free(
bool is_free = false;
int error;
- /* Convert rt blocks to rt extents. */
+ /* Convert rt blocks to rt extents, and enforce alignment. */
startext = div_u64_rem(rtbno, mp->m_sb.sb_rextsize, &mod);
+ if (mod != 0 && must_align)
+ return -EFSCORRUPTED;
endext = div_u64_rem(rtbno + len - 1, mp->m_sb.sb_rextsize, &mod);
+ if (mod != 0 && must_align)
+ return -EFSCORRUPTED;
/* Make sure this isn't free space. */
extcount = endext - startext + 1;
@@ -2304,6 +2309,7 @@ xrep_is_rtmeta_ino(
{
return ino == sc->mp->m_rbmip->i_ino ||
ino == sc->mp->m_rsumip->i_ino ||
+ ino == sc->mp->m_rrefcountip->i_ino ||
ino == sc->mp->m_rrmapip->i_ino;
}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 9a8d9195b7ae..c8a4daf67fb2 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -88,7 +88,7 @@ int xrep_check_ino_btree_mapping(struct xfs_scrub *sc,
const struct xfs_rmap_irec *rec);
int xrep_rtext_is_free(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
- xfs_filblks_t len);
+ xfs_filblks_t len, bool must_align);
bool xrep_is_rtmeta_ino(struct xfs_scrub *sc, xfs_ino_t ino);
static inline int
@@ -144,10 +144,12 @@ int xrep_quotacheck(struct xfs_scrub *sc);
int xrep_rtsummary(struct xfs_scrub *sc);
int xrep_rtbitmap(struct xfs_scrub *sc);
int xrep_rtrmapbt(struct xfs_scrub *sc);
+int xrep_rtrefcountbt(struct xfs_scrub *sc);
#else
# define xrep_rtsummary xrep_notsupported
# define xrep_rtbitmap xrep_notsupported
# define xrep_rtrmapbt xrep_notsupported
+# define xrep_rtrefcountbt xrep_notsupported
#endif /* CONFIG_XFS_RT */
struct xrep_newbt_resv {
@@ -274,6 +276,7 @@ xrep_rmapbt_setup(
#define xrep_parent xrep_notsupported
#define xrep_rtbitmap xrep_notsupported
#define xrep_rtrmapbt xrep_notsupported
+#define xrep_rtrefcountbt xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtrefcount_repair.c b/fs/xfs/scrub/rtrefcount_repair.c
new file mode 100644
index 000000000000..d3a3e6dbf2c1
--- /dev/null
+++ b/fs/xfs/scrub/rtrefcount_repair.c
@@ -0,0 +1,706 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_rtrefcount_btree.h"
+#include "xfs_error.h"
+#include "xfs_health.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/array.h"
+
+/*
+ * Rebuilding the Reference Count Btree
+ * ====================================
+ *
+ * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
+ * entries as rectangles representing extents of physical blocks, and
+ * that the rectangles can be laid down to allow them to overlap each
+ * other; then we know that we must emit a refcnt btree entry wherever
+ * the amount of overlap changes, i.e. the emission stimulus is
+ * level-triggered:
+ *
+ * - ---
+ * -- ----- ---- --- ------
+ * -- ---- ----------- ---- ---------
+ * -------------------------------- -----------
+ * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
+ * 2 1 23 21 3 43 234 2123 1 01 2 3 0
+ *
+ * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
+ *
+ * Note that in the actual refcnt btree we don't store the refcount < 2
+ * cases because the bnobt tells us which blocks are free; single-use
+ * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
+ * supports storing multiple entries covering a given block we could
+ * theoretically dispense with the refcntbt and simply count rmaps, but
+ * that's inefficient in the (hot) write path, so we'll take the cost of
+ * the extra tree to save time. Also there's no guarantee that rmap
+ * will be enabled.
+ *
+ * Given an array of rmaps sorted by physical block number, a starting
+ * physical block (sp), a bag to hold rmaps that cover sp, and the next
+ * physical block where the level changes (np), we can reconstruct the
+ * rt refcount btree as follows:
+ *
+ * While there are still unprocessed rmaps in the array,
+ * - Set sp to the physical block (pblk) of the next unprocessed rmap.
+ * - Add to the bag all rmaps in the array where startblock == sp.
+ * - Set np to the physical block where the bag size will change. This
+ * is the minimum of (the pblk of the next unprocessed rmap) and
+ * (startblock + len of each rmap in the bag).
+ * - Record the bag size as old_bag_size.
+ *
+ * - While the bag isn't empty,
+ * - Remove from the bag all rmaps where startblock + len == np.
+ * - Add to the bag all rmaps in the array where startblock == np.
+ * - If the bag size isn't old_bag_size, store the refcount entry
+ * (sp, np - sp, bag_size) in the refcnt btree.
+ * - If the bag is empty, break out of the inner loop.
+ * - Set old_bag_size to the bag size
+ * - Set sp = np.
+ * - Set np to the physical block where the bag size will change.
+ * This is the minimum of (the pblk of the next unprocessed rmap)
+ * and (startblock + len of each rmap in the bag).
+ *
+ * Like all the other repairers, we make a list of all the refcount
+ * records we need, then reinitialize the rt refcount btree root and
+ * insert all the records.
+ */
+
+/* The only parts of the rmap that we care about for computing refcounts. */
+struct xrep_rtrefc_rmap {
+ xfs_rtblock_t startblock;
+ xfs_filblks_t blockcount;
+} __packed;
+
+struct xrep_rtrefc {
+ /* refcount extents */
+ struct xfbma *refcount_records;
+
+ /* new refcountbt information */
+ struct xrep_newbt new_btree_info;
+ struct xfs_btree_bload rtrefc_bload;
+
+ /* old refcountbt blocks */
+ struct xbitmap old_rtrefcountbt_blocks;
+
+ struct xfs_scrub *sc;
+
+ /* # of refcountbt blocks */
+ xfs_filblks_t btblocks;
+
+ /* get_record()'s position in the free space record array. */
+ uint64_t iter;
+};
+
+/* Check for any obvious conflicts with this shared/CoW staging extent. */
+STATIC int
+xrep_rtrefc_check_ext(
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *rec)
+{
+ /* Must be within the AG and not static data. */
+ if (!xfs_verify_rtext(sc->mp, rec->rc_startblock, rec->rc_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space or misaligned. */
+ return xrep_rtext_is_free(sc, rec->rc_startblock, rec->rc_blockcount,
+ true);
+}
+
+/* Record a reference count extent. */
+STATIC int
+xrep_rtrefc_stash(
+ struct xrep_rtrefc *rr,
+ xfs_rtblock_t bno,
+ xfs_filblks_t len,
+ xfs_nlink_t refcount)
+{
+ struct xfs_refcount_irec irec = {
+ .rc_startblock = bno,
+ .rc_blockcount = len,
+ .rc_refcount = refcount,
+ };
+ struct xfs_mount *mp = rr->sc->mp;
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ error = xrep_rtrefc_check_ext(rr->sc, &irec);
+ if (error)
+ return error;
+
+ trace_xrep_rtrefc_found(mp, &irec);
+
+ return xfbma_append(rr->refcount_records, &irec);
+}
+
+/* Record a CoW staging extent. */
+STATIC int
+xrep_rtrefc_stash_cow(
+ struct xrep_rtrefc *rr,
+ xfs_rtblock_t bno,
+ xfs_filblks_t len)
+{
+ return xrep_rtrefc_stash(rr, bno + XFS_RTREFC_COW_START, len, 1);
+}
+
+/* Grab the next (abbreviated) rmap record from the rmapbt. */
+STATIC int
+xrep_rtrefc_next_rrm(
+ struct xfs_btree_cur *cur,
+ struct xrep_rtrefc *rr,
+ struct xrep_rtrefc_rmap *rrm,
+ bool *have_rec)
+{
+ struct xfs_rmap_irec rmap;
+ struct xfs_mount *mp = cur->bc_mp;
+ int have_gt;
+ int error = 0;
+
+ *have_rec = false;
+ /*
+ * Loop through the remaining rmaps. Remember CoW staging
+ * extents and the refcountbt blocks from the old tree for later
+ * disposal. We can only share written data fork extents, so
+ * keep looping until we find an rmap for one.
+ */
+ do {
+ if (xchk_should_terminate(rr->sc, &error))
+ goto out_error;
+
+ error = xfs_btree_increment(cur, 0, &have_gt);
+ if (error)
+ goto out_error;
+ if (!have_gt)
+ return 0;
+
+ error = xfs_rmap_get_rec(cur, &rmap, &have_gt);
+ if (error)
+ goto out_error;
+ if (XFS_IS_CORRUPT(mp, !have_gt)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
+
+ if (rmap.rm_owner == XFS_RMAP_OWN_COW) {
+ error = xrep_rtrefc_stash_cow(rr, rmap.rm_startblock,
+ rmap.rm_blockcount);
+ if (error)
+ goto out_error;
+ }
+ } while (XFS_RMAP_NON_INODE_OWNER(rmap.rm_owner) ||
+ xfs_internal_inum(mp, rmap.rm_owner) ||
+ (rmap.rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
+ XFS_RMAP_UNWRITTEN)));
+
+ rrm->startblock = rmap.rm_startblock;
+ rrm->blockcount = rmap.rm_blockcount;
+ *have_rec = true;
+ return 0;
+
+out_error:
+ return error;
+}
+
+/* Compare two btree extents. */
+static int
+xrep_rtrefc_extent_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xfs_refcount_irec *ap = a;
+ const struct xfs_refcount_irec *bp = b;
+
+ if (ap->rc_startblock > bp->rc_startblock)
+ return 1;
+ else if (ap->rc_startblock < bp->rc_startblock)
+ return -1;
+ return 0;
+}
+
+#define RRM_NEXT(r) ((r).startblock + (r).blockcount)
+/*
+ * Find the next block where the refcount changes, given the next rmap we
+ * looked at and the ones we're already tracking.
+ */
+static inline xfs_rtblock_t
+xrep_rtrefc_next_edge(
+ struct xfbma *rmap_bag,
+ struct xrep_rtrefc_rmap *next_rrm,
+ bool next_valid)
+{
+ struct xrep_rtrefc_rmap rrm;
+ uint64_t i;
+ xfs_rtblock_t nbno;
+
+ nbno = next_valid ? next_rrm->startblock : NULLFSBLOCK;
+ foreach_xfbma_item(rmap_bag, i, rrm)
+ nbno = min_t(xfs_rtblock_t, nbno, RRM_NEXT(rrm));
+ return nbno;
+}
+
+/* Record extents that belong to the realtime refcount inode. */
+STATIC int
+xrep_rtrefc_walk_rmap(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_fsblock_t fsbno;
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != mp->m_rrefcountip->i_ino)
+ return 0;
+
+ error = xrep_check_ino_btree_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.agno, rec->rm_startblock);
+
+ return xbitmap_set(&rr->old_rtrefcountbt_blocks, fsbno,
+ rec->rm_blockcount);
+}
+
+/* Scan one AG for reverse mappings for the realtime refcount btree. */
+STATIC int
+xrep_rtrefc_scan_ag(
+ struct xrep_rtrefc *rr,
+ xfs_agnumber_t agno)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ error = xrep_ag_init(sc, agno, &sc->sa);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrefc_walk_rmap, rr);
+ xchk_ag_free(sc, &sc->sa);
+ return error;
+}
+
+/* Iterate all the rmap records to generate reference count data. */
+STATIC int
+xrep_rtrefc_find_refcounts(
+ struct xrep_rtrefc *rr)
+{
+ struct xrep_rtrefc_rmap rrm;
+ struct xfs_scrub *sc = rr->sc;
+ struct xfbma *rmap_bag;
+ xfs_rtblock_t sbno;
+ xfs_rtblock_t cbno;
+ xfs_rtblock_t nbno;
+ size_t old_stack_sz;
+ size_t stack_sz = 0;
+ xfs_agnumber_t agno;
+ bool have;
+ int have_gt;
+ int error;
+
+ /* Scan for old rtrefc btree blocks. */
+ for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
+ error = xrep_rtrefc_scan_ag(rr, agno);
+ if (error)
+ return error;
+ }
+
+ xrep_rt_btcur_init(sc, &sc->sr);
+
+ /* Set up some storage */
+ rmap_bag = xfbma_init("rtrmap bag", sizeof(struct xrep_rtrefc_rmap));
+ if (IS_ERR(rmap_bag)) {
+ error = PTR_ERR(rmap_bag);
+ goto out_cur;
+ }
+
+ /* Start the rtrmapbt cursor to the left of all records. */
+ error = xfs_rmap_lookup_le(sc->sr.rmap_cur, 0, 0, 0, 0, 0, &have_gt);
+ if (error)
+ goto out_bag;
+ ASSERT(have_gt == 0);
+
+ /* Process reverse mappings into refcount data. */
+ while (xfs_btree_has_more_records(sc->sr.rmap_cur)) {
+ /* Push all rmaps with pblk == sbno onto the stack */
+ error = xrep_rtrefc_next_rrm(sc->sr.rmap_cur, rr, &rrm, &have);
+ if (error)
+ goto out_bag;
+ if (!have)
+ break;
+ sbno = cbno = rrm.startblock;
+ while (have && rrm.startblock == sbno) {
+ error = xfbma_insert_anywhere(rmap_bag, &rrm);
+ if (error)
+ goto out_bag;
+ stack_sz++;
+ error = xrep_rtrefc_next_rrm(sc->sr.rmap_cur, rr, &rrm,
+ &have);
+ if (error)
+ goto out_bag;
+ }
+ error = xfs_btree_decrement(sc->sr.rmap_cur, 0, &have_gt);
+ if (error)
+ goto out_bag;
+ if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
+ xfs_btree_mark_sick(sc->sr.rmap_cur);
+ error = -EFSCORRUPTED;
+ goto out_bag;
+ }
+
+ /* Set nbno to the bno of the next refcount change */
+ nbno = xrep_rtrefc_next_edge(rmap_bag, &rrm, have);
+ if (nbno == NULLFSBLOCK) {
+ error = -EFSCORRUPTED;
+ goto out_bag;
+ }
+
+ ASSERT(nbno > sbno);
+ old_stack_sz = stack_sz;
+
+ /* While stack isn't empty... */
+ while (stack_sz) {
+ uint64_t i;
+
+ /* Pop all rmaps that end at nbno */
+ foreach_xfbma_item(rmap_bag, i, rrm) {
+ if (RRM_NEXT(rrm) != nbno)
+ continue;
+ error = xfbma_nullify(rmap_bag, i);
+ if (error)
+ goto out_bag;
+ stack_sz--;
+ }
+
+ /* Push array items that start at nbno */
+ error = xrep_rtrefc_next_rrm(sc->sr.rmap_cur, rr, &rrm,
+ &have);
+ if (error)
+ goto out_bag;
+ while (have && rrm.startblock == nbno) {
+ error = xfbma_insert_anywhere(rmap_bag,
+ &rrm);
+ if (error)
+ goto out_bag;
+ stack_sz++;
+ error = xrep_rtrefc_next_rrm(sc->sr.rmap_cur,
+ rr, &rrm, &have);
+ if (error)
+ goto out_bag;
+ }
+ error = xfs_btree_decrement(sc->sr.rmap_cur, 0,
+ &have_gt);
+ if (error)
+ goto out_bag;
+ if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
+ xfs_btree_mark_sick(sc->sr.rmap_cur);
+ error = -EFSCORRUPTED;
+ goto out_bag;
+ }
+
+ /* Emit refcount if necessary */
+ ASSERT(nbno > cbno);
+ if (stack_sz != old_stack_sz) {
+ if (old_stack_sz > 1) {
+ error = xrep_rtrefc_stash(rr, cbno,
+ nbno - cbno,
+ old_stack_sz);
+ if (error)
+ goto out_bag;
+ }
+ cbno = nbno;
+ }
+
+ /* Stack empty, go find the next rmap */
+ if (stack_sz == 0)
+ break;
+ old_stack_sz = stack_sz;
+ sbno = nbno;
+
+ /* Set nbno to the bno of the next refcount change */
+ nbno = xrep_rtrefc_next_edge(rmap_bag, &rrm, have);
+ if (nbno == NULLFSBLOCK) {
+ error = -EFSCORRUPTED;
+ goto out_bag;
+ }
+
+ ASSERT(nbno > sbno);
+ }
+ }
+
+ ASSERT(stack_sz == 0);
+out_bag:
+ xfbma_destroy(rmap_bag);
+out_cur:
+ xchk_rt_btcur_free(&sc->sr);
+ return error;
+}
+#undef RRM_NEXT
+
+/* Retrieve refcountbt data for bulk load. */
+STATIC int
+xrep_rtrefc_get_record(
+ struct xfs_btree_cur *cur,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+
+ return xfbma_iter_get(rr->refcount_records, &rr->iter,
+ &cur->bc_rec.rc);
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_rtrefc_claim_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+ int error;
+
+ error = xrep_newbt_relog_efis(&rr->new_btree_info);
+ if (error)
+ return error;
+
+ return xrep_newbt_claim_block(cur, &rr->new_btree_info, ptr);
+}
+
+/* Update the inode counters. */
+STATIC int
+xrep_rtrefc_reset_counters(
+ struct xrep_rtrefc *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xbtree_ifakeroot *ifake = &rr->new_btree_info.ifake;
+ int64_t delta;
+ int error;
+
+ /*
+ * Update the inode block counts to reflect the extents we found in the
+ * rmapbt.
+ */
+ delta = ifake->if_blocks - mp->m_rrefcountip->i_d.di_nblocks;
+ mp->m_rrefcountip->i_d.di_nblocks = ifake->if_blocks;
+ xfs_trans_log_inode(sc->tp, mp->m_rrefcountip, XFS_ILOG_CORE);
+
+ /*
+ * Adjust the quota counts by the difference in size between the old
+ * and new bmbt.
+ */
+ if (delta == 0 || !XFS_IS_QUOTA_ON(sc->mp))
+ return 0;
+
+ error = xrep_ino_dqattach(sc);
+ if (error)
+ return error;
+
+ xfs_trans_mod_dquot_byino(sc->tp, mp->m_rrefcountip,
+ XFS_TRANS_DQ_BCOUNT, delta);
+ return 0;
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_rtrefc_iroot_size(
+ struct xfs_btree_cur *cur,
+ unsigned int level,
+ unsigned int nr_this_level,
+ void *priv)
+{
+ return xfs_rtrefcount_broot_space_calc(cur->bc_mp, level,
+ nr_this_level);
+}
+
+/*
+ * Use the collected refcount information to stage a new rt refcount btree. If
+ * this is successful we'll return with the new btree root information logged
+ * to the repair transaction but not yet committed.
+ */
+STATIC int
+xrep_rtrefc_build_new_tree(
+ struct xrep_rtrefc *rr)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_btree_cur *refc_cur;
+ int error;
+
+ rr->rtrefc_bload.get_record = xrep_rtrefc_get_record;
+ rr->rtrefc_bload.claim_block = xrep_rtrefc_claim_block;
+ rr->rtrefc_bload.iroot_size = xrep_rtrefc_iroot_size;
+ xrep_bload_estimate_slack(sc, &rr->rtrefc_bload);
+
+ /*
+ * Sort the refcount extents by startblock or else the btree records
+ * will be in the wrong order.
+ */
+ error = xfbma_sort(rr->refcount_records, xrep_rtrefc_extent_cmp);
+ if (error)
+ return error;
+
+ /*
+ * Prepare to construct the new btree by reserving disk space for the
+ * new btree and setting up all the accounting information we'll need
+ * to root the new btree while it's under construction and before we
+ * attach it to the realtime refcount inode.
+ */
+ xfs_rmap_ino_bmbt_owner(&oinfo, mp->m_rrefcountip->i_ino,
+ XFS_DATA_FORK);
+ xrep_newbt_init_inode(&rr->new_btree_info, sc, XFS_DATA_FORK, &oinfo);
+ refc_cur = xfs_rtrefcountbt_stage_cursor(mp, mp->m_rrefcountip,
+ &rr->new_btree_info.ifake);
+
+ /* Compute how many blocks we'll need. */
+ error = xfs_btree_bload_compute_geometry(refc_cur, &rr->rtrefc_bload,
+ xfbma_length(rr->refcount_records));
+ if (error)
+ goto err_cur;
+
+ /*
+ * Guess how many blocks we're going to need to rebuild an entire
+ * rtrefcountbt from the number of extents we found, and pump up our
+ * transaction to have sufficient block reservation.
+ */
+ error = xfs_trans_reserve_more(sc->tp, rr->rtrefc_bload.nr_blocks, 0);
+ if (error)
+ goto err_cur;
+
+ /* Reserve the space we'll need for the new btree. */
+ error = xrep_newbt_alloc_blocks(&rr->new_btree_info,
+ rr->rtrefc_bload.nr_blocks);
+ if (error)
+ goto err_cur;
+
+ /* Add all observed refcount records. */
+ rr->new_btree_info.ifake.if_fork->if_format = XFS_DINODE_FMT_REFCOUNT;
+ rr->iter = 0;
+ error = xfs_btree_bload(refc_cur, &rr->rtrefc_bload, rr);
+ if (error)
+ goto err_cur;
+
+ /*
+ * Install the new rtrefc btree in the inode. After this point the old
+ * btree is no longer accessible and the new tree is live and we can
+ * delete the cursor.
+ */
+ xfs_rtrefcountbt_commit_staged_btree(refc_cur, sc->tp);
+ xfs_btree_del_cursor(refc_cur, 0);
+
+ /* Reset the AGF counters now that we've changed the btree shape. */
+ error = xrep_rtrefc_reset_counters(rr);
+ if (error)
+ goto err_newbt;
+
+ /* Dispose of any unused blocks and the accounting information. */
+ xrep_newbt_destroy(&rr->new_btree_info, error);
+ return xrep_roll_trans(sc);
+err_cur:
+ xfs_btree_del_cursor(refc_cur, error);
+err_newbt:
+ xrep_newbt_destroy(&rr->new_btree_info, error);
+ return error;
+}
+
+/*
+ * Now that we've logged the roots of the new btrees, invalidate all of the
+ * old blocks and free them.
+ */
+STATIC int
+xrep_rtrefc_remove_old_tree(
+ struct xrep_rtrefc *rr)
+{
+ /* Free the old refcountbt blocks if they're not in use. */
+ return xrep_reap_extents(rr->sc, &rr->old_rtrefcountbt_blocks,
+ &XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_RTMETADATA);
+}
+
+/* Rebuild the rt refcount btree. */
+int
+xrep_rtrefcountbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtrefc *rr;
+ struct xfs_mount *mp = sc->mp;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrtrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ rr = kmem_zalloc(sizeof(struct xrep_rtrefc), KM_NOFS | KM_MAYFAIL);
+ if (!rr)
+ return -ENOMEM;
+ rr->sc = sc;
+
+ xchk_perag_get(sc->mp, &sc->sa);
+
+ /* Set up some storage */
+ rr->refcount_records = xfbma_init("rtrefcount records",
+ sizeof(struct xfs_refcount_irec));
+ if (IS_ERR(rr->refcount_records)) {
+ error = PTR_ERR(rr->refcount_records);
+ goto out_rr;
+ }
+
+ /* Collect all reference counts. */
+ xbitmap_init(&rr->old_rtrefcountbt_blocks);
+ error = xrep_rtrefc_find_refcounts(rr);
+ if (error)
+ goto out_bitmap;
+
+ /* Rebuild the refcount information. */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ error = xrep_rtrefc_build_new_tree(rr);
+ if (error)
+ goto out_bitmap;
+
+ /* Kill the old tree. */
+ error = xrep_rtrefc_remove_old_tree(rr);
+
+out_bitmap:
+ xbitmap_destroy(&rr->old_rtrefcountbt_blocks);
+ xfbma_destroy(rr->refcount_records);
+out_rr:
+ kmem_free(rr);
+ return error;
+}
diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c
index c67d60012e4c..34b1ed463cdd 100644
--- a/fs/xfs/scrub/rtrmap_repair.c
+++ b/fs/xfs/scrub/rtrmap_repair.c
@@ -133,7 +133,8 @@ xrep_rtrmap_check_mapping(
return -EFSCORRUPTED;
/* Make sure this isn't free space. */
- return xrep_rtext_is_free(sc, rec->rm_startblock, rec->rm_blockcount);
+ return xrep_rtext_is_free(sc, rec->rm_startblock, rec->rm_blockcount,
+ false);
}
/* Store a reverse-mapping record. */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 9da1efd55102..5f8ef9861460 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -402,7 +402,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_rtrefcountbt,
.scrub = xchk_rtrefcountbt,
.has = xfs_sb_version_hasrtreflink,
- .repair = xrep_notsupported,
+ .repair = xrep_rtrefcountbt,
},
};
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 2058290e8b83..f3232daa0a5d 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1649,6 +1649,28 @@ TRACE_EVENT(xrep_rtrmap_found,
__entry->flags)
);
+TRACE_EVENT(xrep_rtrefc_found,
+ TP_PROTO(struct xfs_mount *mp, const struct xfs_refcount_irec *rec),
+ TP_ARGS(mp, rec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_fsblock_t, startblock)
+ __field(xfs_filblks_t, blockcount)
+ __field(xfs_nlink_t, refcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->startblock = rec->rc_startblock;
+ __entry->blockcount = rec->rc_blockcount;
+ __entry->refcount = rec->rc_refcount;
+ ),
+ TP_printk("dev %d:%d bno %llu len %llu refcount %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->startblock,
+ __entry->blockcount,
+ __entry->refcount)
+)
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */