summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:59:04 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:40:45 -0700
commitbd1001e99e6dca1a12809431d1cd6eb4896d82d2 (patch)
tree006f07f4461166bbd41643c14e0094ff264345f7
parent9fc504f372b56a2d5c64ecfaa95c4ce086342113 (diff)
xfs: online repair of realtime summariesrepair-rtsummary_2021-10-22
Repair the realtime summary data by constructing a new rtsummary file in the scrub temporary file, then atomically swapping the contents. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/scrub/repair.c15
-rw-r--r--fs/xfs/scrub/repair.h15
-rw-r--r--fs/xfs/scrub/rtsummary.c11
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c103
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/tempfile.c166
-rw-r--r--fs/xfs/scrub/tempfile.h11
8 files changed, 324 insertions, 3 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7455c1360548..7573fb05b244 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -187,6 +187,10 @@ xfs-y += $(addprefix scrub/, \
tempfile.o \
)
+xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
+ rtsummary_repair.o \
+ )
+
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
quota_repair.o \
quotacheck_repair.o \
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 8d04afffc21b..f98d2763918b 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -42,6 +42,7 @@
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
+#include "scrub/xfile.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -167,9 +168,21 @@ int
xrep_roll_trans(
struct xfs_scrub *sc)
{
+ int error;
+
if (!sc->ip)
return xrep_roll_ag_trans(sc);
- return xfs_trans_roll_inode(&sc->tp, sc->ip);
+
+ /*
+ * Roll the transaction with the inode we're fixing and the temp inode,
+ * so that neither can pin the log.
+ */
+ if (sc->tempip)
+ xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
+ error = xfs_trans_roll_inode(&sc->tp, sc->ip);
+ if (sc->tempip)
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ return error;
}
/*
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 6a8b85dd71be..e72a97f50ab2 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -72,6 +72,7 @@ int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
int xrep_setup_ag_rmapbt(struct xfs_scrub *sc);
+int xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks);
void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag,
@@ -107,6 +108,12 @@ int xrep_quotacheck(struct xfs_scrub *sc);
# define xrep_quotacheck xrep_notsupported
#endif /* CONFIG_XFS_QUOTA */
+#ifdef CONFIG_XFS_RT
+int xrep_rtsummary(struct xfs_scrub *sc);
+#else
+# define xrep_rtsummary xrep_notsupported
+#endif /* CONFIG_XFS_RT */
+
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
@@ -209,6 +216,13 @@ xrep_setup_ag_rmapbt(
return xchk_setup_ag_btree(sc, false);
}
+static inline int
+xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks)
+{
+ *resblks = 0;
+ return 0;
+}
+
#define xrep_revalidate_allocbt (NULL)
#define xrep_revalidate_iallocbt (NULL)
@@ -228,6 +242,7 @@ xrep_setup_ag_rmapbt(
#define xrep_quota xrep_notsupported
#define xrep_quotacheck xrep_notsupported
#define xrep_fscounters xrep_notsupported
+#define xrep_rtsummary xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index f5c400559a45..6c0ca58e82dc 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -21,6 +21,8 @@
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/xfile.h"
+#include "scrub/repair.h"
+#include "scrub/tempfile.h"
/*
* Realtime Summary
@@ -38,8 +40,15 @@ xchk_setup_rtsummary(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
+ unsigned int resblks = 0;
int error;
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+ error = xrep_setup_rtsummary(sc, &resblks);
+ if (error)
+ return error;
+ }
+
/*
* Create an xfile to construct a new rtsummary file. The xfile allows
* us to avoid pinning kernel memory for this purpose.
@@ -48,7 +57,7 @@ xchk_setup_rtsummary(
if (IS_ERR(sc->xfile))
return PTR_ERR(sc->xfile);
- error = xchk_trans_alloc(sc, 0);
+ error = xchk_trans_alloc(sc, resblks);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
new file mode 100644
index 000000000000..1e671d395221
--- /dev/null
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_swapext.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/tempfile.h"
+
+/* Set us up to repair the rtsummary file. */
+int
+xrep_setup_rtsummary(
+ struct xfs_scrub *sc,
+ unsigned int *resblks)
+{
+ struct xfs_mount *mp = sc->mp;
+ unsigned long long blocks;
+ int error;
+
+ error = xrep_tempfile_create(sc, S_IFREG);
+ if (error)
+ return error;
+
+ /*
+ * If we're doing a repair, we reserve enough blocks to write out a
+ * completely new summary file, plus twice as many blocks as we would
+ * need if we can only allocate one block per data fork mapping. This
+ * should cover the preallocation of the temporary file and swapping
+ * the extent mappings.
+ *
+ * We cannot use xfs_swapext_estimate because we have not yet
+ * constructed the replacement rtsummary and therefore do not know how
+ * many extents it will use. By the time we do, we will have a dirty
+ * transaction (which we cannot drop because we cannot drop the
+ * rtsummary ILOCK) and cannot ask for more reservation.
+ */
+ blocks = XFS_B_TO_FSB(mp, mp->m_rsumsize);
+ blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
+ if (blocks > UINT_MAX)
+ return -EOPNOTSUPP;
+
+ *resblks = blocks;
+ return 0;
+}
+
+/* Repair the realtime summary. */
+int
+xrep_rtsummary(
+ struct xfs_scrub *sc)
+{
+ struct xfs_swapext_req req = { .req_flags = 0 };
+ int error;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ /*
+ * Trylock the temporary file. We had better be the only ones holding
+ * onto this inode...
+ */
+ if (!xrep_tempfile_ilock_nowait(sc, XFS_ILOCK_EXCL))
+ return -EAGAIN;
+
+ /* Make sure we have space allocated for the entire summary file. */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_tempfile_prealloc(sc, 0,
+ XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize));
+ if (error)
+ return error;
+
+ /* Copy the rtsummary file that we generated. */
+ error = xrep_tempfile_copyin(sc, &xfs_rtbuf_ops,
+ XFS_BLFT_RTSUMMARY_BUF, sc->mp->m_rsumsize);
+ if (error)
+ return error;
+
+ /* Now swap the extents. */
+ req.ip1 = sc->tempip;
+ req.ip2 = sc->ip;
+ req.whichfork = XFS_DATA_FORK;
+ req.blockcount = XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize);
+ return xfs_swapext(&sc->tp, &req);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 1ba85edbf694..c52bbe25068b 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -335,7 +335,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
.has = xfs_has_realtime,
- .repair = xrep_notsupported,
+ .repair = xrep_rtsummary,
},
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
.type = ST_FS,
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 5ff376d228db..09dbefa62049 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -14,14 +14,17 @@
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
+#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_xchgrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
+#include "scrub/xfile.h"
/*
* Create a temporary file for reconstructing metadata, with the intention of
@@ -211,3 +214,166 @@ xrep_tempfile_rele(
xfs_irele(sc->tempip);
sc->tempip = NULL;
}
+
+/*
+ * Make sure that the given range of the data fork of the temporary file is
+ * mapped to written blocks. The caller must ensure that both inodes are
+ * joined to the transaction.
+ */
+int
+xrep_tempfile_prealloc(
+ struct xfs_scrub *sc,
+ xfs_fileoff_t off,
+ xfs_filblks_t len)
+{
+ struct xfs_bmbt_irec map;
+ xfs_fileoff_t end = off + len;
+ int nmaps;
+ int error = 0;
+
+ ASSERT(sc->tempip != NULL);
+ ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
+
+ while (off < len) {
+ /*
+ * If we have a real extent mapping this block then we're
+ * in ok shape.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
+ XFS_DATA_FORK);
+ if (error)
+ break;
+
+ if (nmaps == 1 && xfs_bmap_is_written_extent(&map)) {
+ off += map.br_startblock;
+ continue;
+ }
+
+ /*
+ * If we find a delalloc reservation then something is very
+ * very wrong. Bail out.
+ */
+ if (map.br_startblock == DELAYSTARTBLOCK)
+ return -EFSCORRUPTED;
+
+ /*
+ * Make sure this rtsum block has a real zeroed extent
+ * allocated to it.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
+ XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
+ &nmaps);
+ if (error)
+ break;
+
+ error = xrep_roll_trans(sc);
+ if (error)
+ break;
+ off += map.br_startblock;
+ }
+
+ return error;
+}
+
+/*
+ * Write a number of bytes from the xfile into the temp file. The copybuf must
+ * be large enough to hold one filesystem block's worth of data. The caller
+ * must join both inodes to the transaction.
+ */
+int
+xrep_tempfile_copyin(
+ struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops,
+ enum xfs_blft type,
+ xfs_fileoff_t isize)
+{
+ LIST_HEAD(buffers_list);
+ struct xfs_bmbt_irec map;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+ xfs_rtblock_t off = 0;
+ loff_t pos = 0;
+ unsigned int nr_buffers = 0;
+ int nmaps;
+ int error = 0;
+
+ ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
+
+ for (; pos < isize; pos += mp->m_sb.sb_blocksize, off++) {
+ size_t count;
+
+ /* Read block mapping for this file block. */
+ nmaps = 1;
+ error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
+ if (error)
+ goto out;
+ if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ /* Get the metadata buffer for this offset in the file. */
+ error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map.br_startblock),
+ mp->m_bsize, 0, &bp);
+ if (error)
+ goto out;
+ bp->b_ops = ops;
+ xfs_trans_buf_set_type(sc->tp, bp, type);
+
+ /* Read in a block's worth of data from the xfile. */
+ count = min_t(loff_t, isize - pos, mp->m_sb.sb_blocksize);
+ error = xfile_obj_load(sc->xfile, bp->b_addr, count, pos);
+ if (error) {
+ xfs_trans_brelse(sc->tp, bp);
+ goto out;
+ }
+
+ /*
+ * Put this buffer on the delwri list so we can write them all
+ * out in batches.
+ */
+ xfs_buf_delwri_queue(bp, &buffers_list);
+ xfs_trans_brelse(sc->tp, bp);
+ nr_buffers++;
+
+ /*
+ * If we have more than 256K of data to write out, flush it to
+ * disk so we don't use up too much memory.
+ */
+ if (XFS_FSB_TO_B(mp, nr_buffers) > 262144) {
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out;
+ nr_buffers = 0;
+ }
+ }
+
+ /*
+ * Write the new blocks to disk. If the ordered list isn't empty after
+ * that, then something went wrong and we have to fail. This should
+ * never happen, but we'll check anyway.
+ */
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out;
+ if (!list_empty(&buffers_list)) {
+ ASSERT(list_empty(&buffers_list));
+ return -EIO;
+ }
+
+ /* Set the new inode size, if needed. */
+ if (sc->tempip->i_disk_size != isize) {
+ sc->tempip->i_disk_size = isize;
+ i_size_write(VFS_I(sc->tempip), isize);
+ xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
+ return xrep_roll_trans(sc);
+ }
+
+ return 0;
+out:
+ xfs_buf_delwri_cancel(&buffers_list);
+ return error;
+}
diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h
index a6a4c8d6a373..4b9d56399cc2 100644
--- a/fs/xfs/scrub/tempfile.h
+++ b/fs/xfs/scrub/tempfile.h
@@ -13,6 +13,17 @@ void xrep_tempfile_rele(struct xfs_scrub *sc);
void xrep_tempfile_ilock(struct xfs_scrub *sc, unsigned int ilock_flags);
bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags);
void xrep_tempfile_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags);
+
+int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off,
+ xfs_filblks_t len);
+
+enum xfs_blft;
+
+typedef int (*xrep_tempfile_copyin_getbuf_fn)(struct xfs_scrub *sc,
+ xfs_fileoff_t off, struct xfs_buf **bpp);
+int xrep_tempfile_copyin(struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops, enum xfs_blft type,
+ xfs_fileoff_t isize);
#else
# define xrep_tempfile_rele(sc)
#endif /* CONFIG_XFS_ONLINE_REPAIR */