summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2019-01-16 10:12:14 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2019-04-15 17:02:48 -0700
commitbe8ac34e89c0cb3c9a5959dd6f8b393d22cfac73 (patch)
treece7440ee274b85f9000ee9440a69351396cebec5
parentf7c78f9ef1453a700a41e2d205fc61239e8f994a (diff)
xfs: repair the rmapbt
Rebuild the reverse mapping btree from all primary metadata. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/bitmap.c11
-rw-r--r--fs/xfs/scrub/bitmap.h2
-rw-r--r--fs/xfs/scrub/repair.h13
-rw-r--r--fs/xfs/scrub/rmap.c6
-rw-r--r--fs/xfs/scrub/rmap_repair.c1095
-rw-r--r--fs/xfs/scrub/scrub.c2
7 files changed, 1128 insertions, 2 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index b67d196a33cb..0655ee8c1a87 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -170,6 +170,7 @@ xfs-y += $(addprefix scrub/, \
inode_repair.o \
refcount_repair.o \
repair.o \
+ rmap_repair.o \
symlink_repair.o \
xfile.o \
)
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index fdadc9e1dc49..2c0450c39fba 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -38,6 +38,7 @@ xfs_bitmap_set(
bmr->start = start;
bmr->len = len;
list_add_tail(&bmr->list, &bitmap->list);
+ bitmap->weight += len;
return 0;
}
@@ -62,6 +63,7 @@ xfs_bitmap_init(
struct xfs_bitmap *bitmap)
{
INIT_LIST_HEAD(&bitmap->list);
+ bitmap->weight = 0;
}
/* Compare two btree extents. */
@@ -164,6 +166,7 @@ xfs_bitmap_disunion(
state |= LEFT_ALIGNED;
if (sub_start + sub_len == br->start + br->len)
state |= RIGHT_ALIGNED;
+ bitmap->weight -= sub_len;
switch (state) {
case LEFT_ALIGNED:
/* Coincides with only the left. */
@@ -301,3 +304,11 @@ xfs_bitmap_set_btblocks(
{
return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock, bitmap);
}
+
+/* Compute the weight of this bitmap. */
+uint64_t
+xfs_bitmap_hweight(
+ struct xfs_bitmap *bitmap)
+{
+ return bitmap->weight;
+}
diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h
index ae8ecbce6fa6..f75494b16cb7 100644
--- a/fs/xfs/scrub/bitmap.h
+++ b/fs/xfs/scrub/bitmap.h
@@ -14,6 +14,7 @@ struct xfs_bitmap_range {
struct xfs_bitmap {
struct list_head list;
+ xfs_fsblock_t weight;
};
void xfs_bitmap_init(struct xfs_bitmap *bitmap);
@@ -32,5 +33,6 @@ int xfs_bitmap_set_btcur_path(struct xfs_bitmap *bitmap,
struct xfs_btree_cur *cur);
int xfs_bitmap_set_btblocks(struct xfs_bitmap *bitmap,
struct xfs_btree_cur *cur);
+uint64_t xfs_bitmap_hweight(struct xfs_bitmap *bitmap);
#endif /* __XFS_SCRUB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 9d72166749b8..f0f811ca3e51 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -54,6 +54,7 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_xattr_reset_btree(struct xfs_scrub *sc);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
+int xrep_rmapbt_setup(struct xfs_scrub *sc, struct xfs_inode *ip);
/* Metadata revalidators */
@@ -69,6 +70,7 @@ int xrep_agfl(struct xfs_scrub *sc);
int xrep_agi(struct xfs_scrub *sc);
int xrep_allocbt(struct xfs_scrub *sc);
int xrep_iallocbt(struct xfs_scrub *sc);
+int xrep_rmapbt(struct xfs_scrub *sc);
int xrep_refcountbt(struct xfs_scrub *sc);
int xrep_inode(struct xfs_scrub *sc);
int xrep_bmap_data(struct xfs_scrub *sc);
@@ -111,6 +113,16 @@ xrep_reset_perag_resv(
return -EOPNOTSUPP;
}
+/* rmap setup function for CONFIG_XFS_REPAIR=n */
+static inline int
+xrep_rmapbt_setup(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
+{
+ /* We don't support rmap repair, but we can still do a scan. */
+ return xchk_setup_ag_btree(sc, ip, false);
+}
+
#define xrep_revalidate_allocbt (NULL)
#define xrep_revalidate_iallocbt (NULL)
@@ -121,6 +133,7 @@ xrep_reset_perag_resv(
#define xrep_agi xrep_notsupported
#define xrep_allocbt xrep_notsupported
#define xrep_iallocbt xrep_notsupported
+#define xrep_rmapbt xrep_notsupported
#define xrep_refcountbt xrep_notsupported
#define xrep_inode xrep_notsupported
#define xrep_bmap_data xrep_notsupported
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 92a140c5b55e..c7a2401ad36f 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -24,6 +24,7 @@
#include "scrub/common.h"
#include "scrub/btree.h"
#include "scrub/trace.h"
+#include "scrub/repair.h"
/*
* Set us up to scrub reverse mapping btrees.
@@ -33,7 +34,10 @@ xchk_setup_ag_rmapbt(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- return xchk_setup_ag_btree(sc, ip, false);
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ return xrep_rmapbt_setup(sc, ip);
+ else
+ return xchk_setup_ag_btree(sc, ip, false);
}
/* Reverse-mapping scrubber. */
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
new file mode 100644
index 000000000000..5b93df08612d
--- /dev/null
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/array.h"
+
+/*
+ * Reverse Mapping Btree Repair
+ * ============================
+ *
+ * This is the most involved of all the AG space btree rebuilds. Everywhere
+ * else in XFS we lock inodes and then AG data structures, but generating the
+ * list of rmap records requires that we be able to scan both block mapping
+ * btrees of every inode in the filesystem to see if it owns any extents in
+ * this AG. We can't tolerate any inode updates while we do this, so we
+ * freeze the filesystem to lock everyone else out, and grant ourselves
+ * special privileges to run transactions with regular background reclamation
+ * turned off.
+ *
+ * We also have to be very careful not to allow inode reclaim to start a
+ * transaction because all transactions (other than our own) will block.
+ *
+ * So basically we scan all primary per-AG metadata and all block maps of all
+ * inodes to generate a huge list of reverse map records. Next we look for
+ * gaps in the rmap records to calculate all the unclaimed free space (1).
+ * Next, we scan all other OWN_AG metadata (bnobt, cntbt, agfl) and subtract
+ * the space used by those btrees from (1), and also subtract the free space
+ * listed in the bnobt from (1). What's left are the gaps in assigned space
+ * that the new rmapbt knows about but the existing bnobt doesn't; these are
+ * the blocks from the old rmapbt and they can be freed.
+ *
+ * We use the 'xrep_rmbt' prefix for all the rmap functions.
+ */
+
+/* Set us up to repair reverse mapping btrees. */
+int
+xrep_rmapbt_setup(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
+{
+ int error;
+
+ /*
+ * Freeze out anything that can lock an inode. We reconstruct
+ * the rmapbt by reading inode bmaps with the AGF held, which is
+ * only safe w.r.t. ABBA deadlocks if we're the only ones locking
+ * inodes.
+ */
+ error = xchk_fs_freeze(sc);
+ if (error)
+ return error;
+
+ /* Check the AG number and set up the scrub context. */
+ error = xchk_setup_fs(sc, ip);
+ if (error)
+ return error;
+
+ return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
+}
+
+/*
+ * Packed rmap record. The ATTR/BMBT/UNWRITTEN flags are hidden in the upper
+ * bits of offset, just like the on-disk record.
+ */
+struct xrep_rmbt_extent {
+ xfs_agblock_t startblock;
+ xfs_extlen_t blockcount;
+ uint64_t owner;
+ uint64_t offset;
+} __attribute__((packed));
+
+/* Context for collecting rmaps */
+struct xrep_rmbt {
+ /* Bitmap of inobt blocks, for generating rmaps later. */
+ struct xfs_bitmap inobt_blocks;
+
+ /* New rmap records generated from primary metadata. */
+ struct xfbma *rmap_records;
+
+ struct xfs_scrub *sc;
+
+ /*
+ * rmap owner for whatever we're iterating to generate new rmap
+ * records.
+ */
+ uint64_t owner;
+
+ /* New AGF btreeblks value, which won't include old rmapbt blocks. */
+ xfs_agblock_t btblocks;
+
+ /* Number of new rmap records. */
+ uint64_t nr_records;
+};
+
+/* Context for calculating old rmapbt blocks */
+struct xrep_rmbt_freesp {
+ /* Unclaimed (free) space, according to the new rmap. */
+ struct xfs_bitmap rmap_freelist;
+
+ /* Free space accounted for by the free space btrees. */
+ struct xfs_bitmap bno_freelist;
+
+ struct xfs_scrub *sc;
+
+ /*
+ * Next block we expect to find while scanning the new rmap for
+ * claimed space.
+ */
+ xfs_agblock_t next_bno;
+};
+
+/* Initialize an rmap. */
+static inline int
+xrep_rmbt_new_rec(
+ struct xrep_rmbt *rr,
+ xfs_agblock_t startblock,
+ xfs_extlen_t blockcount,
+ uint64_t owner,
+ uint64_t offset,
+ unsigned int flags)
+{
+ struct xrep_rmbt_extent rre = {
+ .startblock = startblock,
+ .blockcount = blockcount,
+ .owner = owner,
+ };
+ struct xfs_rmap_irec rmap = {
+ .rm_offset = offset,
+ .rm_flags = flags,
+ };
+ int error = 0;
+
+ trace_xrep_rmap_extent_fn(rr->sc->mp, rr->sc->sa.agno, startblock,
+ blockcount, owner, offset, flags);
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ rre.offset = xfs_rmap_irec_offset_pack(&rmap);
+ return xfbma_append(rr->rmap_records, &rre);
+}
+
+/* Add an AGFL block to the rmap list. */
+STATIC int
+xrep_rmbt_walk_agfl(
+ struct xfs_mount *mp,
+ xfs_agblock_t bno,
+ void *priv)
+{
+ struct xrep_rmbt *rr = priv;
+
+ return xrep_rmbt_new_rec(rr, bno, 1, XFS_RMAP_OWN_AG, 0, 0);
+}
+
+/* Add a btree block to the rmap list. */
+STATIC int
+xrep_rmbt_visit_btblock(
+ struct xfs_btree_cur *cur,
+ int level,
+ void *priv)
+{
+ struct xrep_rmbt *rr = priv;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsb;
+
+ xfs_btree_get_block(cur, level, &bp);
+ if (!bp)
+ return 0;
+
+ rr->btblocks++;
+ fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ return xrep_rmbt_new_rec(rr, XFS_FSB_TO_AGBNO(cur->bc_mp, fsb), 1,
+ rr->owner, 0, 0);
+}
+
+/* Record inode btree rmaps. */
+STATIC int
+xrep_rmbt_walk_inobt(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_inobt_rec_incore irec;
+ struct xrep_rmbt *rr = priv;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_agino_t agino;
+ xfs_agino_t iperhole;
+ unsigned int i;
+ int error;
+
+ /* Record the inobt blocks. */
+ error = xfs_bitmap_set_btcur_path(&rr->inobt_blocks, cur);
+ if (error)
+ return error;
+
+ xfs_inobt_btrec_to_irec(mp, rec, &irec);
+
+ /* Record a non-sparse inode chunk. */
+ if (irec.ir_holemask == XFS_INOBT_HOLEMASK_FULL)
+ return xrep_rmbt_new_rec(rr,
+ XFS_AGINO_TO_AGBNO(mp, irec.ir_startino),
+ XFS_INODES_PER_CHUNK / mp->m_sb.sb_inopblock,
+ XFS_RMAP_OWN_INODES, 0, 0);
+
+ /* Iterate each chunk. */
+ iperhole = max_t(xfs_agino_t, mp->m_sb.sb_inopblock,
+ XFS_INODES_PER_HOLEMASK_BIT);
+ for (i = 0, agino = irec.ir_startino;
+ i < XFS_INOBT_HOLEMASK_BITS;
+ i += iperhole / XFS_INODES_PER_HOLEMASK_BIT, agino += iperhole) {
+ /* Skip holes. */
+ if (irec.ir_holemask & (1 << i))
+ continue;
+
+ /* Record the inode chunk otherwise. */
+ error = xrep_rmbt_new_rec(rr, XFS_AGINO_TO_AGBNO(mp, agino),
+ iperhole / mp->m_sb.sb_inopblock,
+ XFS_RMAP_OWN_INODES, 0, 0);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+/* Record a CoW staging extent. */
+STATIC int
+xrep_rmbt_walk_cowblocks(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xrep_rmbt *rr = priv;
+ struct xfs_refcount_irec refc;
+
+ xfs_refcount_btrec_to_irec(rec, &refc);
+ if (refc.rc_refcount != 1)
+ return -EFSCORRUPTED;
+
+ return xrep_rmbt_new_rec(rr, refc.rc_startblock - XFS_REFC_COW_START,
+ refc.rc_blockcount, XFS_RMAP_OWN_COW, 0, 0);
+}
+
+/* Add a bmbt block to the rmap list. */
+STATIC int
+xrep_rmbt_visit_bmbt(
+ struct xfs_btree_cur *cur,
+ int level,
+ void *priv)
+{
+ struct xrep_rmbt *rr = priv;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsb;
+ unsigned int flags = XFS_RMAP_BMBT_BLOCK;
+
+ xfs_btree_get_block(cur, level, &bp);
+ if (!bp)
+ return 0;
+
+ fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ if (XFS_FSB_TO_AGNO(cur->bc_mp, fsb) != rr->sc->sa.agno)
+ return 0;
+
+ if (cur->bc_private.b.whichfork == XFS_ATTR_FORK)
+ flags |= XFS_RMAP_ATTR_FORK;
+ return xrep_rmbt_new_rec(rr, XFS_FSB_TO_AGBNO(cur->bc_mp, fsb), 1,
+ cur->bc_private.b.ip->i_ino, 0, flags);
+}
+
+/* Determine rmap flags from fork and bmbt state. */
+static inline unsigned int
+xrep_rmbt_bmap_flags(
+ int whichfork,
+ xfs_exntst_t state)
+{
+ return (whichfork == XFS_ATTR_FORK ? XFS_RMAP_ATTR_FORK : 0) |
+ (state == XFS_EXT_UNWRITTEN ? XFS_RMAP_UNWRITTEN : 0);
+}
+
+/* Find all the extents from a given AG in an inode fork. */
+STATIC int
+xrep_rmbt_scan_ifork(
+ struct xrep_rmbt *rr,
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xfs_bmbt_irec rec;
+ struct xfs_iext_cursor icur;
+ struct xfs_mount *mp = rr->sc->mp;
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_ifork *ifp;
+ unsigned int rflags;
+ int fmt;
+ int error = 0;
+
+ /* Do we even have data mapping extents? */
+ fmt = XFS_IFORK_FORMAT(ip, whichfork);
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ switch (fmt) {
+ case XFS_DINODE_FMT_BTREE:
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(rr->sc->tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+ break;
+ case XFS_DINODE_FMT_EXTENTS:
+ break;
+ default:
+ return 0;
+ }
+ if (!ifp)
+ return 0;
+
+ /* Find all the BMBT blocks in the AG. */
+ if (fmt == XFS_DINODE_FMT_BTREE) {
+ cur = xfs_bmbt_init_cursor(mp, rr->sc->tp, ip, whichfork);
+ error = xfs_btree_visit_blocks(cur, xrep_rmbt_visit_bmbt, rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, error);
+ cur = NULL;
+ }
+
+ /* We're done if this is an rt inode's data fork. */
+ if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip))
+ return 0;
+
+ /* Find all the extents in the AG. */
+ for_each_xfs_iext(ifp, &icur, &rec) {
+ if (isnullstartblock(rec.br_startblock))
+ continue;
+ /* Stash non-hole extent. */
+ if (XFS_FSB_TO_AGNO(mp, rec.br_startblock) == rr->sc->sa.agno) {
+ rflags = xrep_rmbt_bmap_flags(whichfork, rec.br_state);
+ error = xrep_rmbt_new_rec(rr,
+ XFS_FSB_TO_AGBNO(mp, rec.br_startblock),
+ rec.br_blockcount, ip->i_ino,
+ rec.br_startoff, rflags);
+ if (error)
+ goto out;
+ }
+ }
+out:
+ if (cur)
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
+
+/* Iterate all the inodes in an AG group. */
+STATIC int
+xrep_rmbt_scan_inobt(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_inobt_rec_incore irec;
+ struct xrep_rmbt *rr = priv;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip = NULL;
+ xfs_ino_t ino;
+ xfs_agino_t agino;
+ int chunkidx;
+ int lock_mode = 0;
+ int error = 0;
+
+ xfs_inobt_btrec_to_irec(mp, rec, &irec);
+
+ for (chunkidx = 0, agino = irec.ir_startino;
+ chunkidx < XFS_INODES_PER_CHUNK;
+ chunkidx++, agino++) {
+ /* Skip if this inode is free */
+ if (XFS_INOBT_MASK(chunkidx) & irec.ir_free)
+ continue;
+ ino = XFS_AGINO_TO_INO(mp, cur->bc_private.a.agno, agino);
+
+ /*
+ * Grab inode for scanning. We cannot use DONTCACHE here
+ * because we already have a transaction so the iput must not
+ * trigger inode reclaim (which might allocate a transaction
+ * to clean up posteof blocks).
+ */
+ error = xfs_iget(mp, cur->bc_tp, ino, 0, 0, &ip);
+ if (error)
+ return error;
+
+ if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+ !(ip->i_df.if_flags & XFS_IFEXTENTS)) ||
+ (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE &&
+ !(ip->i_afp->if_flags & XFS_IFEXTENTS)))
+ lock_mode = XFS_ILOCK_EXCL;
+ else
+ lock_mode = XFS_ILOCK_SHARED;
+ if (!xfs_ilock_nowait(ip, lock_mode)) {
+ ASSERT(0); /* XXX impossible? */
+ error = -EBUSY;
+ goto out_rele;
+ }
+
+ /* Check the data fork. */
+ error = xrep_rmbt_scan_ifork(rr, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_unlock;
+
+ /* Check the attr fork. */
+ error = xrep_rmbt_scan_ifork(rr, ip, XFS_ATTR_FORK);
+ if (error)
+ goto out_unlock;
+
+ xfs_iunlock(ip, lock_mode);
+ xfs_irele(ip);
+ ip = NULL;
+ }
+
+ return error;
+out_unlock:
+ xfs_iunlock(ip, lock_mode);
+out_rele:
+ xfs_irele(ip);
+ return error;
+}
+
+/* Find all the unclaimed space in the new rmap records. */
+STATIC int
+xrep_rmbt_record_rmap_freesp(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rmbt_freesp *rrf = priv;
+ xfs_fsblock_t fsb;
+ int error;
+
+ /* Record the free space we find. */
+ if (rec->rm_startblock > rrf->next_bno) {
+ fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ rrf->next_bno);
+ error = xfs_bitmap_set(&rrf->rmap_freelist, fsb,
+ rec->rm_startblock - rrf->next_bno);
+ if (error)
+ return error;
+ }
+ rrf->next_bno = max_t(xfs_agblock_t, rrf->next_bno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/* Find all the free space recorded in the AG. */
+STATIC int
+xrep_rmbt_record_bno_freesp(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ struct xrep_rmbt_freesp *rrf = priv;
+ xfs_fsblock_t fsb;
+
+ /* Record the free space we find. */
+ fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ rec->ar_startblock);
+ return xfs_bitmap_set(&rrf->bno_freelist, fsb, rec->ar_blockcount);
+}
+
+/* Compare two rmapbt extents. */
+static int
+xrep_rmbt_extent_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xrep_rmbt_extent *ap = a;
+ const struct xrep_rmbt_extent *bp = b;
+ struct xfs_rmap_irec ar = {
+ .rm_startblock = ap->startblock,
+ .rm_blockcount = ap->blockcount,
+ .rm_owner = ap->owner,
+ };
+ struct xfs_rmap_irec br = {
+ .rm_startblock = bp->startblock,
+ .rm_blockcount = bp->blockcount,
+ .rm_owner = bp->owner,
+ };
+ int error;
+
+ error = xfs_rmap_irec_offset_unpack(ap->offset, &ar);
+ ASSERT(error == 0);
+ error = xfs_rmap_irec_offset_unpack(bp->offset, &br);
+ ASSERT(error == 0);
+
+ return xfs_rmap_compare(&ar, &br);
+}
+
+/* Generate rmaps for the AG headers (AGI/AGF/AGFL) */
+STATIC int
+xrep_rmbt_generate_agheader_rmaps(
+ struct xrep_rmbt *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ /* Create a record for the AG sb->agfl. */
+ error = xrep_rmbt_new_rec(rr, XFS_SB_BLOCK(sc->mp),
+ XFS_AGFL_BLOCK(sc->mp) - XFS_SB_BLOCK(sc->mp) + 1,
+ XFS_RMAP_OWN_FS, 0, 0);
+ if (error)
+ return error;
+
+ /* Generate rmaps for the blocks in the AGFL. */
+ return xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
+ sc->sa.agfl_bp, xrep_rmbt_walk_agfl, rr);
+}
+
+/* Generate rmaps for the log, if it's in this AG. */
+STATIC int
+xrep_rmbt_generate_log_rmaps(
+ struct xrep_rmbt *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+
+ if (sc->mp->m_sb.sb_logstart == 0 ||
+ XFS_FSB_TO_AGNO(sc->mp, sc->mp->m_sb.sb_logstart) != sc->sa.agno)
+ return 0;
+
+ return xrep_rmbt_new_rec(rr,
+ XFS_FSB_TO_AGBNO(sc->mp, sc->mp->m_sb.sb_logstart),
+ sc->mp->m_sb.sb_logblocks, XFS_RMAP_OWN_LOG, 0, 0);
+}
+
+/* Collect rmaps for the blocks containing the free space btrees. */
+STATIC int
+xrep_rmbt_generate_freesp_rmaps(
+ struct xrep_rmbt *rr,
+ xfs_agblock_t *new_btreeblks)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ rr->owner = XFS_RMAP_OWN_AG;
+ rr->btblocks = 0;
+
+ /* bnobt */
+ cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.agno, XFS_BTNUM_BNO);
+ error = xfs_btree_visit_blocks(cur, xrep_rmbt_visit_btblock, rr);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, error);
+
+ /* cntbt */
+ cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.agno, XFS_BTNUM_CNT);
+ error = xfs_btree_visit_blocks(cur, xrep_rmbt_visit_btblock, rr);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, error);
+
+ /* btreeblks doesn't include the bnobt/cntbt btree roots */
+ *new_btreeblks = rr->btblocks - 2;
+ return 0;
+err:
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
+
+/* Collect rmaps for the blocks containing inode btrees and the inode chunks. */
+STATIC int
+xrep_rmbt_generate_inobt_rmaps(
+ struct xrep_rmbt *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_btree_cur *cur;
+ struct xfs_bitmap_range *br;
+ struct xfs_bitmap_range *n;
+ int error;
+
+ rr->owner = XFS_RMAP_OWN_INOBT;
+
+ /*
+ * Iterate every record in the inobt so we can capture all the inode
+ * chunks and the blocks in the inobt itself.
+ */
+ cur = xfs_inobt_init_cursor(sc->mp, sc->tp, sc->sa.agi_bp,
+ sc->sa.agno, XFS_BTNUM_INO);
+ error = xfs_btree_query_all(cur, xrep_rmbt_walk_inobt, rr);
+ if (error)
+ goto err_cur;
+ xfs_btree_del_cursor(cur, error);
+
+ /*
+ * Note that if there are zero records in the inobt then query_all does
+ * nothing and we have to account the empty inobt root manually.
+ */
+ if (xfs_bitmap_hweight(&rr->inobt_blocks) == 0) {
+ struct xfs_agi *agi;
+ xfs_fsblock_t agi_root;
+
+ agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+ agi_root = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno,
+ be32_to_cpu(agi->agi_root));
+ xfs_bitmap_set(&rr->inobt_blocks, agi_root, 1);
+ }
+
+ /* Add all the inobt blocks to the rmap list. */
+ for_each_xfs_bitmap_extent(br, n, &rr->inobt_blocks) {
+ error = xrep_rmbt_new_rec(rr,
+ XFS_FSB_TO_AGBNO(sc->mp, br->start), br->len,
+ XFS_RMAP_OWN_INOBT, 0, 0);
+ if (error)
+ goto err;
+ }
+
+ /* finobt */
+ if (!xfs_sb_version_hasfinobt(&sc->mp->m_sb))
+ return 0;
+
+ cur = xfs_inobt_init_cursor(sc->mp, sc->tp, sc->sa.agi_bp, sc->sa.agno,
+ XFS_BTNUM_FINO);
+ error = xfs_btree_visit_blocks(cur, xrep_rmbt_visit_btblock, rr);
+ if (error)
+ goto err_cur;
+ xfs_btree_del_cursor(cur, error);
+ return 0;
+err_cur:
+ xfs_btree_del_cursor(cur, error);
+err:
+ return error;
+}
+
+/*
+ * Collect rmaps for the blocks containing the refcount btree, and all CoW
+ * staging extents.
+ */
+STATIC int
+xrep_rmbt_generate_refcountbt_rmaps(
+ struct xrep_rmbt *rr)
+{
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ if (!xfs_sb_version_hasreflink(&sc->mp->m_sb))
+ return 0;
+
+ rr->owner = XFS_RMAP_OWN_REFC;
+
+ /* refcountbt */
+ cur = xfs_refcountbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.agno);
+ error = xfs_btree_visit_blocks(cur, xrep_rmbt_visit_btblock, rr);
+ if (error)
+ goto err_cur;
+
+ /* Collect rmaps for CoW staging extents. */
+ memset(&low, 0, sizeof(low));
+ low.rc.rc_startblock = XFS_REFC_COW_START;
+ memset(&high, 0xFF, sizeof(high));
+ error = xfs_btree_query_range(cur, &low, &high,
+ xrep_rmbt_walk_cowblocks, rr);
+err_cur:
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
+
+/* Collect rmaps for all block mappings for every inode in this AG. */
+STATIC int
+xrep_rmbt_generate_aginode_rmaps(
+ struct xrep_rmbt *rr,
+ xfs_agnumber_t agno)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agi_bp;
+ int error;
+
+ error = xfs_ialloc_read_agi(mp, sc->tp, agno, &agi_bp);
+ if (error)
+ return error;
+ cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, agno, XFS_BTNUM_INO);
+ error = xfs_btree_query_all(cur, xrep_rmbt_scan_inobt, rr);
+ xfs_btree_del_cursor(cur, error);
+ xfs_trans_brelse(sc->tp, agi_bp);
+ return error;
+}
+
+/*
+ * Generate all the reverse-mappings for this AG, a list of the old rmapbt
+ * blocks, and the new btreeblks count. Figure out if we have enough free
+ * space to reconstruct the inode btrees. The caller must clean up the lists
+ * if anything goes wrong.
+ */
+STATIC int
+xrep_rmbt_find_rmaps(
+ struct xfs_scrub *sc,
+ struct xfbma *rmap_records,
+ xfs_agblock_t *new_btreeblks)
+{
+ struct xrep_rmbt rr;
+ xfs_agnumber_t agno;
+ int error;
+
+ rr.rmap_records = rmap_records;
+ rr.sc = sc;
+ rr.nr_records = 0;
+ xfs_bitmap_init(&rr.inobt_blocks);
+
+ /* Generate rmaps for AG space metadata */
+ error = xrep_rmbt_generate_agheader_rmaps(&rr);
+ if (error)
+ return error;
+ error = xrep_rmbt_generate_log_rmaps(&rr);
+ if (error)
+ return error;
+ error = xrep_rmbt_generate_freesp_rmaps(&rr, new_btreeblks);
+ if (error)
+ return error;
+ error = xrep_rmbt_generate_inobt_rmaps(&rr);
+ if (error)
+ return error;
+ error = xrep_rmbt_generate_refcountbt_rmaps(&rr);
+ if (error)
+ return error;
+
+ /* Iterate all AGs for inodes rmaps. */
+ for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
+ error = xrep_rmbt_generate_aginode_rmaps(&rr, agno);
+ if (error)
+ return error;
+ }
+
+ /* Do we actually have enough space to do this? */
+ if (!xrep_ag_has_space(sc->sa.pag,
+ xfs_rmapbt_calc_size(sc->mp, rr.nr_records),
+ XFS_AG_RESV_RMAPBT))
+ return -ENOSPC;
+
+ return 0;
+}
+
+/* Update the AGF counters. */
+STATIC int
+xrep_rmbt_reset_counters(
+ struct xfs_scrub *sc,
+ xfs_agblock_t new_btreeblks,
+ int *log_flags)
+{
+ struct xfs_agf *agf;
+ struct xfs_perag *pag = sc->sa.pag;
+
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ ASSERT(pag->pagf_init);
+ pag->pagf_init = 0;
+ pag->pagf_btreeblks = new_btreeblks;
+ agf->agf_btreeblks = cpu_to_be32(new_btreeblks);
+ *log_flags |= XFS_AGF_BTREEBLKS;
+
+ return 0;
+}
+
+/* Initialize a new rmapbt root and implant it into the AGF. */
+STATIC int
+xrep_rmbt_reset_btree(
+ struct xfs_scrub *sc,
+ int *log_flags)
+{
+ struct xfs_buf *bp;
+ struct xfs_agf *agf;
+ struct xfs_perag *pag = sc->sa.pag;
+ xfs_fsblock_t btfsb;
+ int error;
+
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+
+ /* Initialize a new rmapbt root. */
+ error = xrep_alloc_ag_block(sc, &XFS_RMAP_OINFO_SKIP_UPDATE, &btfsb,
+ XFS_AG_RESV_RMAPBT);
+ if (error)
+ return error;
+
+ /* The root block is not a btreeblks block. */
+ be32_add_cpu(&agf->agf_btreeblks, -1);
+ pag->pagf_btreeblks--;
+ *log_flags |= XFS_AGF_BTREEBLKS;
+
+ error = xrep_init_btblock(sc, btfsb, &bp, XFS_BTNUM_RMAP,
+ &xfs_rmapbt_buf_ops);
+ if (error)
+ return error;
+
+ agf->agf_roots[XFS_BTNUM_RMAPi] =
+ cpu_to_be32(XFS_FSB_TO_AGBNO(sc->mp, btfsb));
+ agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+ agf->agf_rmap_blocks = cpu_to_be32(1);
+ pag->pagf_levels[XFS_BTNUM_RMAPi] = 1;
+ *log_flags |= XFS_AGF_ROOTS | XFS_AGF_LEVELS | XFS_AGF_RMAP_BLOCKS;
+
+ return 0;
+}
+
+/*
+ * Make our new btree root permanent so that we can start refilling the rmap
+ * records.
+ */
+STATIC int
+xrep_rmbt_commit_new(
+ struct xfs_scrub *sc,
+ int log_flags)
+{
+ int error;
+
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, log_flags);
+ error = xrep_roll_ag_trans(sc);
+ if (error)
+ return error;
+ sc->sa.pag->pagf_init = 1;
+ sc->flags |= XREP_RESET_PERAG_RESV;
+ return 0;
+}
+
+/*
+ * Roll and fix the free list while reloading the rmapbt. Do not shrink the
+ * freelist because the rmapbt is not fully set up yet.
+ */
+STATIC int
+xrep_rmbt_fix_freelist(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ error = xrep_roll_ag_trans(sc);
+ if (error)
+ return error;
+ return xrep_fix_freelist(sc, false);
+}
+
+struct xrep_add_rmap {
+ struct xfs_scrub *sc;
+ struct xfs_btree_cur *cur;
+ uint32_t old_rmbt_size;
+};
+
+static inline unsigned int
+xrep_rmbt_size(
+ struct xfs_scrub *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+
+ return be32_to_cpu(agf->agf_rmap_blocks);
+}
+
+/* Add one rmap record. */
+STATIC int
+xrep_rmbt_insert_rec(
+ const void *item,
+ void *priv)
+{
+ const struct xrep_rmbt_extent *rre = item;
+ struct xfs_rmap_irec rmap = {
+ .rm_startblock = rre->startblock,
+ .rm_blockcount = rre->blockcount,
+ .rm_owner = rre->owner,
+ };
+ struct xrep_add_rmap *x = priv;
+ int error;
+
+ error = xfs_rmap_irec_offset_unpack(rre->offset, &rmap);
+ if (error)
+ return error;
+
+ /* Add the rmap. */
+ error = xfs_rmap_map_raw(x->cur, &rmap);
+ if (error)
+ return error;
+
+ /*
+ * If the flcount changed because the rmap btree changed shape then we
+ * need to fix the freelist to keep it full enough to handle a total
+ * btree split. We'll roll this transaction to get it out of the way
+ * and then fix the freelist in a fresh transaction.
+ *
+ * However, two things we must be careful about: (1) fixing the
+ * freelist changes the rmapbt so drop the rmapbt cursor and (2) we
+ * can't let the freelist shrink. The rmapbt isn't fully set up yet,
+ * which means that the current AGFL blocks might not be reflected in
+ * the rmapbt, which is a problem if we want to unmap blocks from the
+ * AGFL.
+ */
+ if (xrep_rmbt_size(x->sc) == x->old_rmbt_size)
+ return 0;
+
+ xfs_btree_del_cursor(x->cur, error);
+ x->cur = NULL;
+ error = xrep_rmbt_fix_freelist(x->sc);
+ if (error)
+ return error;
+ x->old_rmbt_size = xrep_rmbt_size(x->sc);
+ x->cur = xfs_rmapbt_init_cursor(x->sc->mp, x->sc->tp, x->sc->sa.agf_bp,
+ x->sc->sa.agno);
+ return 0;
+}
+
+/* Insert all the rmaps we collected. */
+STATIC int
+xrep_rmbt_rebuild_tree(
+ struct xfs_scrub *sc,
+ struct xfbma *rmap_records)
+{
+ struct xrep_add_rmap x = {
+ .sc = sc,
+ };
+ struct xfs_mount *mp = sc->mp;
+ int error;
+
+ /*
+ * Sort the reverse mappings by startblock to avoid btree splits when
+ * we rebuild the rmap btree.
+ */
+ error = xfbma_sort(rmap_records, xrep_rmbt_extent_cmp);
+ if (error)
+ return error;
+
+ /* Put everything back in the rmapbt. */
+ x.cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
+ x.old_rmbt_size = xrep_rmbt_size(sc);
+ error = xfbma_iter_del(rmap_records, xrep_rmbt_insert_rec, &x);
+ if (x.cur)
+ xfs_btree_del_cursor(x.cur, error);
+ if (error)
+ goto err;
+
+ /* Fix the freelist once more, if necessary. */
+ if (xrep_rmbt_size(sc) != x.old_rmbt_size) {
+ error = xrep_rmbt_fix_freelist(sc);
+ if (error)
+ goto err;
+ }
+ return 0;
+err:
+ return error;
+}
+
+/*
+ * Reap the old rmapbt blocks. Now that the rmapbt is fully rebuilt, we make
+ * a list of gaps in the rmap records and a list of the extents mentioned in
+ * the bnobt. Any block that's in the new rmapbt gap list but not mentioned
+ * in the bnobt is a block from the old rmapbt and can be removed.
+ */
+STATIC int
+xrep_rmbt_reap_old_blocks(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rmbt_freesp rrf;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_agf *agf;
+ struct xfs_btree_cur *cur;
+ xfs_fsblock_t btfsb;
+ xfs_agblock_t agend;
+ int error;
+
+ xfs_bitmap_init(&rrf.rmap_freelist);
+ xfs_bitmap_init(&rrf.bno_freelist);
+ rrf.next_bno = 0;
+ rrf.sc = sc;
+
+ /* Compute free space from the new rmapbt. */
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
+ error = xfs_rmap_query_all(cur, xrep_rmbt_record_rmap_freesp,
+ &rrf);
+ if (error)
+ goto err_cur;
+ xfs_btree_del_cursor(cur, error);
+
+ /* Insert a record for space between the last rmap and EOAG. */
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ agend = be32_to_cpu(agf->agf_length);
+ if (rrf.next_bno < agend) {
+ btfsb = XFS_AGB_TO_FSB(mp, sc->sa.agno, rrf.next_bno);
+ error = xfs_bitmap_set(&rrf.rmap_freelist, btfsb,
+ agend - rrf.next_bno);
+ if (error)
+ goto err;
+ }
+
+ /* Compute free space from the existing bnobt. */
+ cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.agno, XFS_BTNUM_BNO);
+ error = xfs_alloc_query_all(cur, xrep_rmbt_record_bno_freesp, &rrf);
+ if (error)
+ goto err_lists;
+ xfs_btree_del_cursor(cur, error);
+
+ /*
+ * Free the "free" blocks that the new rmapbt knows about but
+ * the old bnobt doesn't. These are the old rmapbt blocks.
+ */
+ error = xfs_bitmap_disunion(&rrf.rmap_freelist, &rrf.bno_freelist);
+ xfs_bitmap_destroy(&rrf.bno_freelist);
+ if (error)
+ goto err;
+ error = xrep_invalidate_blocks(sc, &rrf.rmap_freelist);
+ if (error)
+ goto err;
+ return xrep_reap_extents(sc, &rrf.rmap_freelist,
+ &XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_RMAPBT);
+err_lists:
+ xfs_bitmap_destroy(&rrf.bno_freelist);
+err_cur:
+ xfs_btree_del_cursor(cur, error);
+err:
+ return error;
+}
+
+/* Repair the rmap btree for some AG. */
+int
+xrep_rmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xfbma *rmap_records;
+ xfs_extlen_t new_btreeblks;
+ int log_flags = 0;
+ int error;
+
+ xchk_perag_get(sc->mp, &sc->sa);
+
+ /* Set up some storage */
+ rmap_records = xfbma_init(sizeof(struct xrep_rmbt_extent));
+ if (IS_ERR(rmap_records))
+ return PTR_ERR(rmap_records);
+
+ /* Collect rmaps for all AG headers. */
+ error = xrep_rmbt_find_rmaps(sc, rmap_records, &new_btreeblks);
+ if (error)
+ goto out;
+
+ /*
+ * Blow out the old rmap btrees. This is the point at which
+ * we are no longer able to bail out gracefully.
+ */
+ error = xrep_rmbt_reset_counters(sc, new_btreeblks, &log_flags);
+ if (error)
+ goto out;
+ error = xrep_rmbt_reset_btree(sc, &log_flags);
+ if (error)
+ goto out;
+ error = xrep_rmbt_commit_new(sc, log_flags);
+ if (error)
+ goto out;
+
+ /* Now rebuild the rmap information. */
+ error = xrep_rmbt_rebuild_tree(sc, rmap_records);
+ if (error)
+ goto out;
+
+ /* Find and destroy the blocks from the old rmapbt. */
+ error = xrep_rmbt_reap_old_blocks(sc);
+ if (error)
+ goto out;
+out:
+ xfbma_destroy(rmap_records);
+ return error;
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 58492374d20a..8047b647c9c4 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -273,7 +273,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_ag_rmapbt,
.scrub = xchk_rmapbt,
.has = xfs_sb_version_hasrmapbt,
- .repair = xrep_notsupported,
+ .repair = xrep_rmapbt,
},
[XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
.type = ST_PERAG,