summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:59:04 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-12-15 17:29:02 -0800
commita72e974d1b31e1d23f088e3f910d33094da2e174 (patch)
tree4530bc3b81887b7e1723d2c098fdd5982268c211
parent4624f8c76088688180c5a5a455a89d47935fa570 (diff)
xfs: online repair of realtime summariesrepair-rtsummary_2021-12-15
Repair the realtime summary data by constructing a new rtsummary file in the scrub temporary file, then atomically swapping the contents. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/scrub/repair.c17
-rw-r--r--fs/xfs/scrub/repair.h14
-rw-r--r--fs/xfs/scrub/rtsummary.c11
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c116
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/tempfile.c247
-rw-r--r--fs/xfs/scrub/tempfile.h14
-rw-r--r--fs/xfs/scrub/trace.h39
9 files changed, 461 insertions, 3 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7ddbcfc2dd46..14b098bdd7fc 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -187,6 +187,10 @@ xfs-y += $(addprefix scrub/, \
tempfile.o \
)
+xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
+ rtsummary_repair.o \
+ )
+
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
quota_repair.o \
quotacheck_repair.o \
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 59a3022ff267..247e3a971f46 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -42,6 +42,7 @@
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
+#include "scrub/xfile.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -167,9 +168,23 @@ int
xrep_roll_trans(
struct xfs_scrub *sc)
{
+ int error;
+
if (!sc->ip)
return xrep_roll_ag_trans(sc);
- return xfs_trans_roll_inode(&sc->tp, sc->ip);
+
+ /*
+ * Roll the transaction with the inode we're fixing and the temp inode,
+ * so that neither can pin the log.
+ *
+ * XXX: does this really need to be in the rtsummary repair patch?
+ */
+ if (sc->tempip)
+ xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
+ error = xfs_trans_roll_inode(&sc->tp, sc->ip);
+ if (sc->tempip)
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ return error;
}
/*
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index a83c6ad50153..198641f50505 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -76,6 +76,7 @@ int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
int xrep_setup_ag_rmapbt(struct xfs_scrub *sc);
+int xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks);
/* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
@@ -133,6 +134,12 @@ int xrep_quotacheck(struct xfs_scrub *sc);
# define xrep_quotacheck xrep_notsupported
#endif /* CONFIG_XFS_QUOTA */
+#ifdef CONFIG_XFS_RT
+int xrep_rtsummary(struct xfs_scrub *sc);
+#else
+# define xrep_rtsummary xrep_notsupported
+#endif /* CONFIG_XFS_RT */
+
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
@@ -235,6 +242,12 @@ xrep_setup_nothing(
#define xrep_setup_ag_allocbt xrep_setup_nothing
#define xrep_setup_ag_rmapbt xrep_setup_nothing
+static inline int
+xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *whatever)
+{
+ return 0;
+}
+
#define xrep_revalidate_allocbt (NULL)
#define xrep_revalidate_iallocbt (NULL)
@@ -254,6 +267,7 @@ xrep_setup_nothing(
#define xrep_quota xrep_notsupported
#define xrep_quotacheck xrep_notsupported
#define xrep_fscounters xrep_notsupported
+#define xrep_rtsummary xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index e401abba6b23..b8fb3923f7f7 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -20,6 +20,8 @@
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/xfile.h"
+#include "scrub/repair.h"
+#include "scrub/tempfile.h"
/*
* Realtime Summary
@@ -37,8 +39,15 @@ xchk_setup_rtsummary(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
+ unsigned int resblks = 0;
int error;
+ if (xchk_could_repair(sc)) {
+ error = xrep_setup_rtsummary(sc, &resblks);
+ if (error)
+ return error;
+ }
+
/*
* Create an xfile to construct a new rtsummary file. The xfile allows
* us to avoid pinning kernel memory for this purpose.
@@ -48,7 +57,7 @@ xchk_setup_rtsummary(
if (error)
return error;
- error = xchk_trans_alloc(sc, 0);
+ error = xchk_trans_alloc(sc, resblks);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
new file mode 100644
index 000000000000..97807a9dcf1d
--- /dev/null
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_swapext.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/tempfile.h"
+
+/* Set us up to repair the rtsummary file. */
+int
+xrep_setup_rtsummary(
+ struct xfs_scrub *sc,
+ unsigned int *resblks)
+{
+ struct xfs_mount *mp = sc->mp;
+ unsigned long long blocks;
+ int error;
+
+ error = xrep_tempfile_create(sc, S_IFREG);
+ if (error)
+ return error;
+
+ /*
+ * If we're doing a repair, we reserve enough blocks to write out a
+ * completely new summary file, plus twice as many blocks as we would
+ * need if we can only allocate one block per data fork mapping. This
+ * should cover the preallocation of the temporary file and swapping
+ * the extent mappings.
+ *
+ * We cannot use xfs_swapext_estimate because we have not yet
+ * constructed the replacement rtsummary and therefore do not know how
+ * many extents it will use. By the time we do, we will have a dirty
+ * transaction (which we cannot drop because we cannot drop the
+ * rtsummary ILOCK) and cannot ask for more reservation.
+ */
+ blocks = XFS_B_TO_FSB(mp, mp->m_rsumsize);
+ blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
+ if (blocks > UINT_MAX)
+ return -EOPNOTSUPP;
+
+ *resblks += blocks;
+ return 0;
+}
+
+/* Repair the realtime summary. */
+int
+xrep_rtsummary(
+ struct xfs_scrub *sc)
+{
+ struct xfs_swapext_req req;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_has_rmapbt(sc->mp))
+ return -EOPNOTSUPP;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ /*
+ * Trylock the temporary file. We had better be the only ones holding
+ * onto this inode...
+ */
+ if (!xrep_tempfile_ilock_nowait(sc, XFS_ILOCK_EXCL))
+ return -EAGAIN;
+
+ /* Make sure we have space allocated for the entire summary file. */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_tempfile_prealloc(sc, 0,
+ XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize));
+ if (error)
+ return error;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ /* Copy the rtsummary file that we generated. */
+ error = xrep_tempfile_copyin_xfile(sc, &xfs_rtbuf_ops,
+ XFS_BLFT_RTSUMMARY_BUF, sc->mp->m_rsumsize);
+ if (error)
+ return error;
+
+ /* Now swap the extents. */
+ error = xrep_tempfile_swapext_prep_request(sc, XFS_DATA_FORK, &req);
+ if (error)
+ return error;
+
+ error = xrep_tempfile_swapext(sc, &req);
+ if (error)
+ return error;
+
+ /* Stale old buffers and truncate the file. */
+ return xrep_reap_fork(sc, sc->tempip, XFS_DATA_FORK);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 4a2f755aa7d0..666f91f55052 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -335,7 +335,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
.has = xfs_has_realtime,
- .repair = xrep_notsupported,
+ .repair = xrep_rtsummary,
},
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
.type = ST_FS,
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index b0d0350833b9..bf5d1bd61f07 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -14,14 +14,19 @@
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
+#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_xchgrange.h"
+#include "xfs_swapext.h"
+#include "xfs_defer.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
+#include "scrub/xfile.h"
/*
* Create a temporary file for reconstructing metadata, with the intention of
@@ -212,3 +217,245 @@ xrep_tempfile_rele(
xfs_irele(sc->tempip);
sc->tempip = NULL;
}
+
+/*
+ * Make sure that the given range of the data fork of the temporary file is
+ * mapped to written blocks. The caller must ensure that both inodes are
+ * joined to the transaction.
+ */
+int
+xrep_tempfile_prealloc(
+ struct xfs_scrub *sc,
+ xfs_fileoff_t off,
+ xfs_filblks_t len)
+{
+ xfs_fileoff_t end = off + len;
+ int error = 0;
+
+ ASSERT(sc->tempip != NULL);
+ ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
+
+ while (off < len) {
+ struct xfs_bmbt_irec map;
+ int nmaps = 1;
+
+ /*
+ * If we have a real extent mapping this block then we're
+ * in ok shape.
+ */
+ error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
+ XFS_DATA_FORK);
+ if (error)
+ break;
+
+ if (nmaps == 1 && xfs_bmap_is_written_extent(&map)) {
+ off += map.br_startblock;
+ continue;
+ }
+
+ /*
+ * If we find a delalloc reservation then something is very
+ * very wrong. Bail out.
+ */
+ if (map.br_startblock == DELAYSTARTBLOCK)
+ return -EFSCORRUPTED;
+
+ /*
+ * Make sure this block has a real zeroed extent allocated to
+ * it.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
+ XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
+ &nmaps);
+ if (error)
+ break;
+
+ trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
+
+ /* Commit new extent and all deferred work. */
+ error = xfs_defer_finish(&sc->tp);
+ if (error)
+ break;
+
+ off += map.br_startblock;
+ }
+
+ return error;
+}
+
+/*
+ * Write a number of bytes from the xfile into the temp file, one filesystem
+ * block at a time. The caller must join both inodes to the transaction.
+ */
+int
+xrep_tempfile_copyin_xfile(
+ struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops,
+ enum xfs_blft type,
+ xfs_fileoff_t isize)
+{
+ LIST_HEAD(buffers_list);
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+ xfs_fileoff_t flush_mask;
+ xfs_rtblock_t off = 0;
+ loff_t pos = 0;
+ int error = 0;
+
+ ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
+
+ /* Flush buffers to disk every 512K */
+ flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
+
+ while (pos < isize) {
+ struct xfs_bmbt_irec map;
+ int nmaps = 1;
+ size_t count;
+
+ /* Read block mapping for this file block. */
+ error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
+ if (error)
+ goto out_err;
+ if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
+ error = -EFSCORRUPTED;
+ goto out_err;
+ }
+
+ /* Get the metadata buffer for this offset in the file. */
+ error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map.br_startblock),
+ mp->m_bsize, 0, &bp);
+ if (error)
+ goto out_err;
+ bp->b_ops = ops;
+ xfs_trans_buf_set_type(sc->tp, bp, type);
+
+ /* Read in a block's worth of data from the xfile. */
+ count = min_t(loff_t, isize - pos, mp->m_sb.sb_blocksize);
+ error = xfile_obj_load(sc->xfile, bp->b_addr, count, pos);
+ if (error) {
+ xfs_trans_brelse(sc->tp, bp);
+ goto out_err;
+ }
+
+ trace_xrep_tempfile_copyin_xfile(sc, XFS_DATA_FORK, &map);
+
+ /* Queue buffer, and flush if we have too much dirty data. */
+ xfs_buf_delwri_queue_here(bp, &buffers_list);
+ xfs_trans_brelse(sc->tp, bp);
+
+ if (!(off & flush_mask)) {
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out_err;
+ }
+
+ pos += mp->m_sb.sb_blocksize;
+ off++;
+ }
+
+ /*
+ * Write the new blocks to disk. If the ordered list isn't empty after
+ * that, then something went wrong and we have to fail. This should
+ * never happen, but we'll check anyway.
+ */
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out_err;
+
+ if (!list_empty(&buffers_list)) {
+ ASSERT(list_empty(&buffers_list));
+ error = -EIO;
+ goto out_err;
+ }
+
+ /* Set the new inode size, if needed. */
+ if (sc->tempip->i_disk_size != isize) {
+ sc->tempip->i_disk_size = isize;
+ i_size_write(VFS_I(sc->tempip), isize);
+ xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
+ return xrep_roll_trans(sc);
+ }
+
+ return 0;
+
+out_err:
+ xfs_buf_delwri_cancel(&buffers_list);
+ return error;
+}
+
+/*
+ * Fill out the swapext request in preparation for swapping the contents of a
+ * metadata file that we've rebuilt in the temp file.
+ */
+int
+xrep_tempfile_swapext_prep_request(
+ struct xfs_scrub *sc,
+ int whichfork,
+ struct xfs_swapext_req *req)
+{
+ /* COW forks don't exist on disk. */
+ if (whichfork == XFS_COW_FORK) {
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ /* Both files should have the relevant forks. */
+ if (!XFS_IFORK_PTR(sc->ip, whichfork) ||
+ !XFS_IFORK_PTR(sc->tempip, whichfork)) {
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ /* Swap all mappings in both forks. */
+ req->ip1 = sc->tempip;
+ req->ip2 = sc->ip;
+ req->startoff1 = 0;
+ req->startoff2 = 0;
+ req->whichfork = whichfork;
+ req->blockcount = XFS_MAX_FILEOFF;
+ req->req_flags = 0;
+
+ /* Always swap sizes when we're swapping data fork mappings. */
+ if (whichfork == XFS_DATA_FORK)
+ req->req_flags |= XFS_SWAP_REQ_SET_SIZES;
+
+ /*
+ * If we're repairing xattrs or directories, always try to convert ip2
+ * to short format after swapping.
+ */
+ if (whichfork == XFS_ATTR_FORK || S_ISDIR(VFS_I(sc->ip)->i_mode))
+ req->req_flags |= XFS_SWAP_REQ_FILE2_CVT_SF;
+
+ return 0;
+}
+
+/* Swap forks between the file being repaired and the temporary file. */
+int
+xrep_tempfile_swapext(
+ struct xfs_scrub *sc,
+ struct xfs_swapext_req *req)
+{
+ int error;
+
+ error = xfs_swapext(&sc->tp, req);
+ if (error)
+ return error;
+
+ /*
+ * If we swapped the ondisk sizes of two metadata files, we must swap
+ * the incore sizes as well. Since online fsck doesn't use swapext on
+ * the data forks of user-accessible files, the two sizes are always
+ * the same, so we don't need to log the inodes.
+ */
+ if (req->req_flags & XFS_SWAP_REQ_SET_SIZES) {
+ loff_t temp;
+
+ temp = i_size_read(VFS_I(sc->ip));
+ i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
+ i_size_write(VFS_I(sc->tempip), temp);
+ }
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h
index a6a4c8d6a373..2b9a9e6fa9d9 100644
--- a/fs/xfs/scrub/tempfile.h
+++ b/fs/xfs/scrub/tempfile.h
@@ -13,6 +13,20 @@ void xrep_tempfile_rele(struct xfs_scrub *sc);
void xrep_tempfile_ilock(struct xfs_scrub *sc, unsigned int ilock_flags);
bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags);
void xrep_tempfile_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags);
+
+int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off,
+ xfs_filblks_t len);
+
+enum xfs_blft;
+
+int xrep_tempfile_copyin_xfile(struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops, enum xfs_blft type,
+ xfs_fileoff_t isize);
+
+struct xfs_swapext_req;
+int xrep_tempfile_swapext_prep_request(struct xfs_scrub *sc, int whichfork,
+ struct xfs_swapext_req *req);
+int xrep_tempfile_swapext(struct xfs_scrub *sc, struct xfs_swapext_req *req);
#else
# define xrep_tempfile_rele(sc)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 932e5e900410..b94c209e4bfc 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1527,6 +1527,45 @@ TRACE_EVENT(xrep_tempfile_create,
__entry->temp_inum)
);
+DECLARE_EVENT_CLASS(xrep_tempfile_class,
+ TP_PROTO(struct xfs_scrub *sc, int whichfork,
+ struct xfs_bmbt_irec *irec),
+ TP_ARGS(sc, whichfork, irec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, whichfork)
+ __field(xfs_fileoff_t, lblk)
+ __field(xfs_filblks_t, len)
+ __field(xfs_fsblock_t, pblk)
+ __field(int, state)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->ino = sc->tempip->i_ino;
+ __entry->whichfork = whichfork;
+ __entry->lblk = irec->br_startoff;
+ __entry->len = irec->br_blockcount;
+ __entry->pblk = irec->br_startblock;
+ __entry->state = irec->br_state;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx whichfork %s fileoff 0x%llx fsbcount 0x%llx startblock 0x%llx state %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
+ __entry->lblk,
+ __entry->len,
+ __entry->pblk,
+ __entry->state)
+);
+#define DEFINE_XREP_TEMPFILE_EVENT(name) \
+DEFINE_EVENT(xrep_tempfile_class, name, \
+ TP_PROTO(struct xfs_scrub *sc, int whichfork, \
+ struct xfs_bmbt_irec *irec), \
+ TP_ARGS(sc, whichfork, irec))
+DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_prealloc);
+DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_copyin_xfile);
+
TRACE_EVENT(xrep_bmapi_reap_extent,
TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork,
const struct xfs_bmbt_irec *irec),