summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-03-12 14:49:02 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2020-06-24 18:12:20 -0700
commitb0f907567458aeb3f3776ebced291c0bacbf9750 (patch)
tree21fa216b2108c1a23bf85e0687a57c7e653090fc
parentb4a8e309923630d5b42c0bee70e6dfd066ba862b (diff)
xfs: online repair of realtime summariesrepair-fsfile-metadata_2020-06-24
Repair the online summary data. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/scrub/attr_repair.c2
-rw-r--r--fs/xfs/scrub/common.h8
-rw-r--r--fs/xfs/scrub/repair.c256
-rw-r--r--fs/xfs/scrub/repair.h19
-rw-r--r--fs/xfs/scrub/rtsummary.c13
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c45
-rw-r--r--fs/xfs/scrub/scrub.c7
8 files changed, 347 insertions, 7 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 52c9cf0eaecd..c457366d5060 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -179,5 +179,9 @@ xfs-y += $(addprefix scrub/, \
repair.o \
symlink_repair.o \
)
+
+xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
+ rtsummary_repair.o \
+ )
endif
endif
diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c
index 58e376397168..8967e8d960ff 100644
--- a/fs/xfs/scrub/attr_repair.c
+++ b/fs/xfs/scrub/attr_repair.c
@@ -582,7 +582,7 @@ xrep_xattr_stale_block(
* join the inode to the transaction. This function returns with the inode
* joined to a clean scrub transaction.
*/
-STATIC int
+int
xrep_xattr_reset_fork(
struct xfs_scrub *sc)
{
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 3324c88f6a08..0a79ea44315c 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -155,4 +155,12 @@ int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
void xchk_stop_reaping(struct xfs_scrub *sc);
void xchk_start_reaping(struct xfs_scrub *sc);
+/* Do we need to invoke the repair tool? */
+static inline bool xfs_scrub_needs_repair(struct xfs_scrub_metadata *sm)
+{
+ return sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+ XFS_SCRUB_OFLAG_XCORRUPT |
+ XFS_SCRUB_OFLAG_PREEN);
+}
+
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 1f3238bb4eea..a04ee90504e9 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -28,11 +28,14 @@
#include "xfs_bmap.h"
#include "xfs_defer.h"
#include "xfs_extfree_item.h"
+#include "xfs_attr.h"
+#include "xfs_reflink.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
+#include "scrub/xfile.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -1415,3 +1418,256 @@ xrep_buf_verify_struct(
return fa == NULL;
}
+
+/*
+ * Repair the attr/data forks of a metadata inode. The caller must ensure that
+ * sc->ip points to the metadata inode and the ILOCK is held on that inode.
+ * The inode must not be joined to the transaction before the call, and will
+ * not be afterwards.
+ */
+int
+xrep_metadata_inode_forks(
+ struct xfs_scrub *sc)
+{
+ __u32 smtype;
+ __u32 smflags;
+ bool dirty = false;
+ int error;
+
+ /* Clear the reflink flag since metadata never shares. */
+ if (xfs_is_reflink_inode(sc->ip)) {
+ dirty = true;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
+ if (error)
+ return error;
+ }
+
+ /* Clear the attr forks since metadata shouldn't have that. */
+ if (xfs_inode_hasattr(sc->ip)) {
+ if (!dirty) {
+ dirty = true;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ }
+ error = xrep_xattr_reset_fork(sc);
+ if (error)
+ return error;
+ }
+
+ /*
+ * If we modified the inode, roll the transaction but don't rejoin the
+ * inode to the new transaction because xrep_bmap_data can do that.
+ */
+ if (dirty) {
+ error = xfs_trans_roll(&sc->tp);
+ if (error)
+ return error;
+ dirty = false;
+ }
+
+ /*
+ * Let's see if the forks need repair. We're going to open-code calls
+ * to the bmapbtd scrub and repair functions so that we can hang on to
+ * the resources that we already acquired instead of using the standard
+ * setup/teardown routines.
+ */
+ smtype = sc->sm->sm_type;
+ smflags = sc->sm->sm_flags;
+ sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
+ sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+
+ error = xchk_metadata_inode_forks(sc);
+ if (error || !xfs_scrub_needs_repair(sc->sm))
+ goto out;
+
+ /*
+ * Repair the data fork. This will potentially join the inode to the
+ * transaction.
+ */
+ error = xrep_bmap_data(sc);
+ if (error)
+ goto out;
+
+ /*
+ * Roll the transaction but don't rejoin the inode to the new
+ * transaction because we're done making changes to the inode.
+ */
+ error = xfs_trans_roll(&sc->tp);
+ if (error)
+ goto out;
+
+ /* Bail out if we still need repairs. */
+ sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ error = xchk_metadata_inode_forks(sc);
+ if (error)
+ goto out;
+ if (xfs_scrub_needs_repair(sc->sm))
+ error = -EFSCORRUPTED;
+out:
+ sc->sm->sm_type = smtype;
+ sc->sm->sm_flags = smflags;
+ return error;
+}
+
+/*
+ * Make sure that the given range of the data fork of the metadata file being
+ * checked is mapped to written blocks. The caller must ensure that the inode
+ * is joined to the transaction.
+ */
+int
+xrep_fallocate(
+ struct xfs_scrub *sc,
+ xfs_fileoff_t off,
+ xfs_filblks_t len)
+{
+ struct xfs_bmbt_irec map;
+ xfs_fileoff_t end = off + len;
+ int nmaps;
+ int error = 0;
+
+ error = xrep_ino_dqattach(sc);
+ if (error)
+ return error;
+
+ while (off < len) {
+ /*
+ * If we have a real extent mapping this block then we're
+ * in ok shape.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_read(sc->ip, off, end - off, &map, &nmaps,
+ XFS_DATA_FORK);
+ if (error)
+ break;
+
+ if (nmaps == 1 && xfs_bmap_is_written_extent(&map)) {
+ off += map.br_startblock;
+ continue;
+ }
+
+ /*
+ * If we find a delalloc reservation then something is very
+ * very wrong. Bail out.
+ */
+ if (map.br_startblock == DELAYSTARTBLOCK)
+ return -EFSCORRUPTED;
+
+ /*
+ * Make sure this rtsum block has a real zeroed extent
+ * allocated to it.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_write(sc->tp, sc->ip, off, end - off,
+ XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
+ &nmaps);
+ if (error)
+ break;
+
+ error = xrep_roll_trans(sc);
+ if (error)
+ break;
+ off += map.br_startblock;
+ }
+
+ return error;
+}
+
+/*
+ * Write a number of bytes from the xfile into the metadata file being
+ * examined. The copybuf must be large enough to hold one filesystem block's
+ * worth of data. The caller must join the inode to the transaction.
+ */
+int
+xrep_set_file_contents(
+ struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops,
+ enum xfs_blft type,
+ xfs_fileoff_t isize)
+{
+ LIST_HEAD(buffers_list);
+ struct xfs_bmbt_irec map;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+ xfs_rtblock_t off = 0;
+ loff_t pos = 0;
+ unsigned int nr_buffers = 0;
+ int nmaps;
+ int error = 0;
+
+ ASSERT(S_ISREG(VFS_I(sc->ip)->i_mode));
+
+ for (; pos < isize; pos += mp->m_sb.sb_blocksize, off++) {
+ size_t count;
+
+ /* Read block mapping for this file block. */
+ nmaps = 1;
+ error = xfs_bmapi_read(sc->ip, off, 1, &map, &nmaps, 0);
+ if (error)
+ goto out;
+ if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ /* Get the metadata buffer for this offset in the file. */
+ error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map.br_startblock),
+ mp->m_bsize, 0, &bp);
+ if (error)
+ goto out;
+ bp->b_ops = ops;
+ xfs_trans_buf_set_type(sc->tp, bp, type);
+
+ /* Read in a block's worth of data from the xfile. */
+ count = min_t(loff_t, isize - pos, mp->m_sb.sb_blocksize);
+ error = xfile_pread(sc->xfile, bp->b_addr, count, pos);
+ if (error) {
+ xfs_trans_brelse(sc->tp, bp);
+ goto out;
+ }
+
+ /*
+ * Put this buffer on the delwri list so we can write them all
+ * out in batches.
+ */
+ xfs_buf_delwri_queue(bp, &buffers_list);
+ xfs_trans_brelse(sc->tp, bp);
+ nr_buffers++;
+
+ /*
+ * If we have more than 256K of data to write out, flush it to
+ * disk so we don't use up too much memory.
+ */
+ if (XFS_FSB_TO_B(mp, nr_buffers) > 262144) {
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out;
+ nr_buffers = 0;
+ }
+ }
+
+ /*
+ * Write the new blocks to disk. If the ordered list isn't empty after
+ * that, then something went wrong and we have to fail. This should
+ * never happen, but we'll check anyway.
+ */
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out;
+ if (!list_empty(&buffers_list)) {
+ ASSERT(list_empty(&buffers_list));
+ return -EIO;
+ }
+
+ /* Set the new inode size, if needed. */
+ if (sc->ip->i_d.di_size != isize) {
+ sc->ip->i_d.di_size = isize;
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ return xrep_roll_trans(sc);
+ }
+
+ return 0;
+out:
+ xfs_buf_delwri_cancel(&buffers_list);
+ return error;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 58caaa07843b..207b337d6c29 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -20,6 +20,9 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
/* Repair helpers */
+enum xfs_blft;
+struct xbitmap;
+
int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc);
void xrep_failure(struct xfs_mount *mp);
int xrep_roll_ag_trans(struct xfs_scrub *sc);
@@ -33,8 +36,13 @@ int xrep_alloc_ag_block(struct xfs_scrub *sc,
int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
struct xfs_buf **bpp, xfs_btnum_t btnum,
const struct xfs_buf_ops *ops);
+int xrep_fallocate(struct xfs_scrub *sc, xfs_fileoff_t off, xfs_filblks_t len);
-struct xbitmap;
+typedef int (*xrep_setfile_getbuf_fn)(struct xfs_scrub *sc,
+ xfs_fileoff_t off, struct xfs_buf **bpp);
+int xrep_set_file_contents(struct xfs_scrub *sc,
+ const struct xfs_buf_ops *ops, enum xfs_blft type,
+ xfs_fileoff_t isize);
int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist,
@@ -57,6 +65,8 @@ int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp,
void xrep_force_quotacheck(struct xfs_scrub *sc, uint dqtype);
int xrep_ino_dqattach(struct xfs_scrub *sc);
int xrep_reset_perag_resv(struct xfs_scrub *sc);
+int xrep_xattr_reset_fork(struct xfs_scrub *sc);
+int xrep_metadata_inode_forks(struct xfs_scrub *sc);
/* Metadata revalidators */
@@ -80,6 +90,12 @@ int xrep_symlink(struct xfs_scrub *sc);
int xrep_dir(struct xfs_scrub *sc);
int xrep_xattr(struct xfs_scrub *sc);
+#ifdef CONFIG_XFS_RT
+int xrep_rtsummary(struct xfs_scrub *sc);
+#else
+# define xrep_rtsummary xrep_notsupported
+#endif /* CONFIG_XFS_RT */
+
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
@@ -185,6 +201,7 @@ xrep_reset_perag_resv(
#define xrep_symlink xrep_notsupported
#define xrep_dir xrep_notsupported
#define xrep_xattr xrep_notsupported
+#define xrep_rtsummary xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 67a4bd9a1e89..b9a44cb89523 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -57,6 +57,7 @@ xchk_setup_rtsummary(
struct xfs_inode *ip)
{
struct xfs_mount *mp = sc->mp;
+ unsigned long long resblks = 0;
int error;
/*
@@ -67,7 +68,17 @@ xchk_setup_rtsummary(
if (IS_ERR(sc->xfile))
return PTR_ERR(sc->xfile);
- error = xchk_trans_alloc(sc, 0);
+ /*
+ * If we're doing a repair, we reserve 2x the summary blocks: once for
+ * the new summary contents and again for the bmbt blocks and the
+ * remapping operation.
+ */
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+ resblks = XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize) * 2;
+ if (resblks > UINT_MAX)
+ return -EOPNOTSUPP;
+ }
+ error = xchk_trans_alloc(sc, resblks);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
new file mode 100644
index 000000000000..33cdb5a0bfd8
--- /dev/null
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/* Repair the realtime summary. */
+int
+xrep_rtsummary(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ /* Make sure we have space allocated for the entire summary file. */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ error = xrep_fallocate(sc, 0, XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize));
+ if (error)
+ return error;
+
+ /* Copy the rtsummary file that we generated. */
+ return xrep_set_file_contents(sc, &xfs_rtbuf_ops,
+ XFS_BLFT_RTSUMMARY_BUF, sc->mp->m_rsumsize);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index d82589798550..6aac6d410623 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -325,7 +325,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
.has = xfs_sb_version_hasrealtime,
- .repair = xrep_notsupported,
+ .repair = xrep_rtsummary,
},
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
.type = ST_FS,
@@ -544,9 +544,8 @@ retry_op:
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
- needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
- XFS_SCRUB_OFLAG_XCORRUPT |
- XFS_SCRUB_OFLAG_PREEN));
+ needs_fix = xfs_scrub_needs_repair(sc.sm);
+
/*
* If userspace asked for a repair but it wasn't necessary,
* report that back to userspace.