summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-10-25 17:15:48 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2020-10-26 18:32:24 -0700
commite8855fc16f125ed778dea1b0c9c7c3e9c92f8870 (patch)
tree894190c3df876815dbe4f9e9065d634896f678fa
parent32c6e10d243d14d2e20bd8471d761f12785e20f0 (diff)
xfs: online repair of realtime summariesrepair-rtsummary_2020-10-26
Repair the realtime summary data by constructing a new rtsummary file in the scrub temporary file, then atomically swapping the contents. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/scrub/repair.c177
-rw-r--r--fs/xfs/scrub/repair.h17
-rw-r--r--fs/xfs/scrub/rtsummary.c22
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c65
-rw-r--r--fs/xfs/scrub/scrub.c2
6 files changed, 283 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index e46c622561ab..f47c9d6a43f4 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -185,6 +185,10 @@ xfs-y += $(addprefix scrub/, \
symlink_repair.o \
)
+xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
+ rtsummary_repair.o \
+ )
+
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
quota_repair.o \
quotacheck_repair.o \
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 222ab8746513..8a5d2ea26fdf 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -39,6 +39,7 @@
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
+#include "scrub/xfile.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -165,9 +166,21 @@ int
xrep_roll_trans(
struct xfs_scrub *sc)
{
+ int error;
+
if (!sc->ip)
return xrep_roll_ag_trans(sc);
- return xfs_trans_roll_inode(&sc->tp, sc->ip);
+
+ /*
+ * Roll the transaction with the inode we're fixing and the temp inode,
+ * so that neither can pin the log.
+ */
+ if (sc->tempip)
+ xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
+ error = xfs_trans_roll_inode(&sc->tp, sc->ip);
+ if (sc->tempip)
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ return error;
}
/*
@@ -1618,3 +1631,165 @@ out_release_inode:
return error;
}
+
+/*
+ * Make sure that the given range of the data fork of the temporary file is
+ * mapped to written blocks. The caller must ensure that both inodes are
+ * joined to the transaction.
+ */
+int
+xrep_fallocate(
+ struct xfs_scrub *sc,
+ xfs_fileoff_t off,
+ xfs_filblks_t len)
+{
+ struct xfs_bmbt_irec map;
+ xfs_fileoff_t end = off + len;
+ int nmaps;
+ int error = 0;
+
+ ASSERT(sc->tempip != NULL);
+ ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
+
+ while (off < len) {
+ /*
+ * If we have a real extent mapping this block then we're
+ * in ok shape.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
+ XFS_DATA_FORK);
+ if (error)
+ break;
+
+ if (nmaps == 1 && xfs_bmap_is_written_extent(&map)) {
+ off += map.br_startblock;
+ continue;
+ }
+
+ /*
+ * If we find a delalloc reservation then something is very
+ * very wrong. Bail out.
+ */
+ if (map.br_startblock == DELAYSTARTBLOCK)
+ return -EFSCORRUPTED;
+
+ /*
+ * Make sure this rtsum block has a real zeroed extent
+ * allocated to it.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
+ XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
+ &nmaps);
+ if (error)
+ break;
+
+ error = xrep_roll_trans(sc);
+ if (error)
+ break;
+ off += map.br_startblock;
+ }
+
+ return error;
+}
+
+/*
+ * Write a number of bytes from the xfile into the temp file. The copybuf must
+ * be large enough to hold one filesystem block's worth of data. The caller
+ * must join both inodes to the transaction.
+ */
+int
+xrep_set_file_contents(
+ struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops,
+ enum xfs_blft type,
+ xfs_fileoff_t isize)
+{
+ LIST_HEAD(buffers_list);
+ struct xfs_bmbt_irec map;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+ xfs_rtblock_t off = 0;
+ loff_t pos = 0;
+ unsigned int nr_buffers = 0;
+ int nmaps;
+ int error = 0;
+
+ ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
+
+ for (; pos < isize; pos += mp->m_sb.sb_blocksize, off++) {
+ size_t count;
+
+ /* Read block mapping for this file block. */
+ nmaps = 1;
+ error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
+ if (error)
+ goto out;
+ if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ /* Get the metadata buffer for this offset in the file. */
+ error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map.br_startblock),
+ mp->m_bsize, 0, &bp);
+ if (error)
+ goto out;
+ bp->b_ops = ops;
+ xfs_trans_buf_set_type(sc->tp, bp, type);
+
+ /* Read in a block's worth of data from the xfile. */
+ count = min_t(loff_t, isize - pos, mp->m_sb.sb_blocksize);
+ error = xfile_pread(sc->xfile, bp->b_addr, count, pos);
+ if (error) {
+ xfs_trans_brelse(sc->tp, bp);
+ goto out;
+ }
+
+ /*
+ * Put this buffer on the delwri list so we can write them all
+ * out in batches.
+ */
+ xfs_buf_delwri_queue(bp, &buffers_list);
+ xfs_trans_brelse(sc->tp, bp);
+ nr_buffers++;
+
+ /*
+ * If we have more than 256K of data to write out, flush it to
+ * disk so we don't use up too much memory.
+ */
+ if (XFS_FSB_TO_B(mp, nr_buffers) > 262144) {
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out;
+ nr_buffers = 0;
+ }
+ }
+
+ /*
+ * Write the new blocks to disk. If the ordered list isn't empty after
+ * that, then something went wrong and we have to fail. This should
+ * never happen, but we'll check anyway.
+ */
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out;
+ if (!list_empty(&buffers_list)) {
+ ASSERT(list_empty(&buffers_list));
+ return -EIO;
+ }
+
+ /* Set the new inode size, if needed. */
+ if (sc->tempip->i_d.di_size != isize) {
+ sc->tempip->i_d.di_size = isize;
+ xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
+ return xrep_roll_trans(sc);
+ }
+
+ return 0;
+out:
+ xfs_buf_delwri_cancel(&buffers_list);
+ return error;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 98399ebf7f08..ab45b85d3659 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -22,6 +22,9 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
/* Repair helpers */
+enum xfs_blft;
+struct xbitmap;
+
int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc);
void xrep_failure(struct xfs_mount *mp);
int xrep_roll_ag_trans(struct xfs_scrub *sc);
@@ -36,8 +39,13 @@ int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
struct xfs_buf **bpp, xfs_btnum_t btnum,
const struct xfs_buf_ops *ops);
int xrep_setup_tempfile(struct xfs_scrub *sc, uint16_t mode);
+int xrep_fallocate(struct xfs_scrub *sc, xfs_fileoff_t off, xfs_filblks_t len);
-struct xbitmap;
+typedef int (*xrep_setfile_getbuf_fn)(struct xfs_scrub *sc,
+ xfs_fileoff_t off, struct xfs_buf **bpp);
+int xrep_set_file_contents(struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops, enum xfs_blft type,
+ xfs_fileoff_t isize);
int xrep_fix_freelist(struct xfs_scrub *sc, int alloc_flags);
int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist,
@@ -93,6 +101,12 @@ int xrep_quotacheck(struct xfs_scrub *sc);
# define xrep_quotacheck xrep_notsupported
#endif /* CONFIG_XFS_QUOTA */
+#ifdef CONFIG_XFS_RT
+int xrep_rtsummary(struct xfs_scrub *sc);
+#else
+# define xrep_rtsummary xrep_notsupported
+#endif /* CONFIG_XFS_RT */
+
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
@@ -209,6 +223,7 @@ xrep_rmapbt_setup(
#define xrep_quota xrep_notsupported
#define xrep_quotacheck xrep_notsupported
#define xrep_fscounters xrep_notsupported
+#define xrep_rtsummary xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index e6c7b52c2edf..149616f2d392 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -20,6 +20,7 @@
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/xfile.h"
+#include "scrub/repair.h"
/*
* Realtime Summary
@@ -57,8 +58,13 @@ xchk_setup_rtsummary(
struct xfs_inode *ip)
{
struct xfs_mount *mp = sc->mp;
+ unsigned long long resblks = 0;
int error;
+ error = xrep_setup_tempfile(sc, S_IFREG);
+ if (error)
+ return error;
+
/*
* Create an xfile to construct a new rtsummary file. The xfile allows
* us to avoid pinning kernel memory for this purpose.
@@ -67,7 +73,21 @@ xchk_setup_rtsummary(
if (IS_ERR(sc->xfile))
return PTR_ERR(sc->xfile);
- error = xchk_trans_alloc(sc, 0);
+ /*
+ * If we're doing a repair, we reserve 2x the summary blocks: once for
+ * the new summary contents and again for the bmbt blocks and the
+ * remapping operation. We cannot use xfs_swapext_estimate because we
+ * have not yet constructed the replacement rtsummary and therefore do
+ * not know how many extents it will use. By the time we do, we will
+ * have a dirty transaction (which we cannot drop because we cannot
+ * drop the rtsummary ILOCK) and cannot ask for more reservation.
+ */
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+ resblks = XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize) * 2;
+ if (resblks > UINT_MAX)
+ return -EOPNOTSUPP;
+ }
+ error = xchk_trans_alloc(sc, resblks);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
new file mode 100644
index 000000000000..5f9ec44a0577
--- /dev/null
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_swapext.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/* Repair the realtime summary. */
+int
+xrep_rtsummary(
+ struct xfs_scrub *sc)
+{
+ struct xfs_swapext_req req = { .flags = 0 };
+ int error;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ /*
+ * Trylock the temporary file. We had better be the only ones holding
+ * onto this inode...
+ */
+ if (!xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL))
+ return -EAGAIN;
+ sc->temp_ilock_flags = XFS_ILOCK_EXCL;
+
+ /* Make sure we have space allocated for the entire summary file. */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_fallocate(sc, 0, XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize));
+ if (error)
+ return error;
+
+ /* Copy the rtsummary file that we generated. */
+ error = xrep_set_file_contents(sc, &xfs_rtbuf_ops,
+ XFS_BLFT_RTSUMMARY_BUF, sc->mp->m_rsumsize);
+ if (error)
+ return error;
+
+ /* Now swap the extents. */
+ req.ip1 = sc->tempip;
+ req.ip2 = sc->ip;
+ req.whichfork = XFS_DATA_FORK;
+ req.blockcount = XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize);
+ return xfs_swapext(&sc->tp, &req);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index fa46a9958c9f..3a54c6730c1b 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -342,7 +342,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
.has = xfs_sb_version_hasrealtime,
- .repair = xrep_notsupported,
+ .repair = xrep_rtsummary,
},
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
.type = ST_FS,