summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-03-16 16:33:22 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2020-06-01 21:16:30 -0700
commite235a1f0be11cbfeec1e4b52210ad7d5a56eab96 (patch)
tree0f069e2545154df8fcd53c5a3393f30261967fc9
parent2a2c839a292ffebe99ee45ffa028e3b9a80be9b1 (diff)
xfs: implement online scrubbing of rtsummary infoscrub-rtsummary_2020-06-01
Finish the realtime summary scrubber by adding the functions we need to compute a fresh copy of the rtsummary info and comparing it to the copy on disk. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/scrub/common.h10
-rw-r--r--fs/xfs/scrub/rtbitmap.c4
-rw-r--r--fs/xfs/scrub/rtsummary.c315
-rw-r--r--fs/xfs/scrub/scrub.c8
-rw-r--r--fs/xfs/scrub/scrub.h1
-rw-r--r--fs/xfs/scrub/trace.h32
6 files changed, 342 insertions, 28 deletions
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 2e50d146105d..3324c88f6a08 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -96,10 +96,16 @@ int xchk_setup_symlink(struct xfs_scrub *sc,
int xchk_setup_parent(struct xfs_scrub *sc,
struct xfs_inode *ip);
#ifdef CONFIG_XFS_RT
-int xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip);
+int xchk_setup_rtbitmap(struct xfs_scrub *sc, struct xfs_inode *ip);
+int xchk_setup_rtsummary(struct xfs_scrub *sc, struct xfs_inode *ip);
#else
static inline int
-xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip)
+xchk_setup_rtbitmap(struct xfs_scrub *sc, struct xfs_inode *ip)
+{
+ return -ENOENT;
+}
+static inline int
+xchk_setup_rtsummary(struct xfs_scrub *sc, struct xfs_inode *ip)
{
return -ENOENT;
}
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 12c9c8fc6c70..fb58c25f35ac 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -19,13 +19,13 @@
/* Set us up with the realtime metadata locked. */
int
-xchk_setup_rt(
+xchk_setup_rtbitmap(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
int error;
- error = xchk_setup_fs(sc, ip);
+ error = xchk_trans_alloc(sc, 0);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 41c3634b8e2f..a70f6f0267bc 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -14,41 +14,312 @@
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/xfile.h"
-/* Scrub the realtime summary. */
+/*
+ * Realtime Summary
+ * ================
+ *
+ * We check the realtime summary by scanning the realtime bitmap file to create
+ * a new summary file incore, and then we compare the computed version against
+ * the ondisk version. We use the 'xfile' functionality to store this
+ * (potentially large) amount of data in pageable memory.
+ */
+
+struct xchk_rtsum_compute {
+ /* How far have we iterated through the rt extents? */
+ xfs_rtblock_t rt_extent_nr;
+
+ /* How many free rt extents have we seen? */
+ xfs_rtblock_t rt_free_nr;
+
+ /* block and bit offset of our current position in the rtbitmap. */
+ xfs_fileoff_t off;
+ unsigned int bit;
+
+ /* block and bit offset of the start of the most recent free rtext. */
+ xfs_fileoff_t start_off;
+ unsigned int start_bit;
+
+ /* Are we accumulating a free rtext? */
+ bool in_extent;
+};
+
+/* Set us up to check the rtsummary file. */
int
-xchk_rtsummary(
- struct xfs_scrub *sc)
+xchk_setup_rtsummary(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
{
- struct xfs_inode *rsumip = sc->mp->m_rsumip;
- struct xfs_inode *old_ip = sc->ip;
- uint old_ilock_flags = sc->ilock_flags;
- int error = 0;
+ struct xfs_mount *mp = sc->mp;
+ int error;
/*
- * We ILOCK'd the rt bitmap ip in the setup routine, now lock the
- * rt summary ip in compliance with the rt inode locking rules.
- *
- * Since we switch sc->ip to rsumip we have to save the old ilock
- * flags so that we don't mix up the inode state that @sc tracks.
+ * Create an xfile to construct a new rtsummary file. The xfile allows
+ * us to avoid pinning kernel memory for this purpose.
*/
- sc->ip = rsumip;
+ sc->xfile = xfile_create("rtsummary", mp->m_rsumsize);
+ if (IS_ERR(sc->xfile))
+ return PTR_ERR(sc->xfile);
+
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ return error;
+
+ /* Allocate a memory buffer for the summary comparison. */
+ sc->buf = kmem_alloc_large(sc->mp->m_sb.sb_blocksize, KM_MAYFAIL);
+ if (!sc->buf)
+ return -ENOMEM;
+
+ /*
+ * Locking order requires us to take the rtbitmap first. We must be
+ * careful to unlock it ourselves when we are done with the rtbitmap
+ * file since the scrub infrastructure won't do that for us.
+ */
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+
+ /* ...and then we can lock the rtsummary inode. */
sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM;
+ sc->ip = sc->mp->m_rsumip;
xfs_ilock(sc->ip, sc->ilock_flags);
+ return 0;
+}
+
+/* Update the summary file to reflect the free extent that we've accumulated. */
+STATIC int
+xchk_rtsum_record_free(
+ struct xfs_scrub *sc,
+ struct xchk_rtsum_compute *state)
+{
+ struct xfs_mount *mp = sc->mp;
+ loff_t pos;
+ uint64_t len;
+ unsigned int offs;
+ unsigned int log;
+ unsigned int bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+ xfs_suminfo_t v = 0;
+ int error;
+
+ /* Compute the relevant location in the rtsum file. */
+ len = (state->off - state->start_off) * bitsperblock +
+ (state->bit - state->start_bit);
+ log = XFS_RTBLOCKLOG(len);
+ offs = XFS_SUMOFFS(mp, log, state->start_off);
+
+ /* Read current rtsummary contents. */
+ pos = sizeof(xfs_suminfo_t) * offs;
+ error = xfile_io(sc->xfile, XFILE_IO_READ, &pos, &v,
+ sizeof(xfs_suminfo_t));
+ if (error)
+ return error;
+
+ /* Bump the summary count... */
+ v++;
+ pos = sizeof(xfs_suminfo_t) * offs;
+ trace_xchk_rtsum_record_free(mp,
+ state->start_off * bitsperblock + state->start_bit,
+ state->off * bitsperblock + state->bit - 1,
+ len, log, offs, v);
+
+ /* ...and write it back. */
+ error = xfile_io(sc->xfile, XFILE_IO_WRITE, &pos, &v,
+ sizeof(xfs_suminfo_t));
+ if (error)
+ return error;
+
+ state->in_extent = false;
+ return 0;
+}
+
+static inline bool
+xchk_rtsum_isset(
+ xfs_rtword_t *words,
+ unsigned int bit)
+{
+ return words[bit / (sizeof(*words) * NBBY)] &
+ (1ULL << (bit % (sizeof(*words) * NBBY)));
+}
+
+/* Walk a single rtbitmap block looking for changes in the free status. */
+STATIC int
+xchk_rtsum_process_bmblock(
+ struct xfs_scrub *sc,
+ xfs_fileoff_t block_off,
+ struct xchk_rtsum_compute *state)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+ xfs_rtword_t *words;
+ unsigned int bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+ int error = 0;
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ error = xfs_rtbuf_get(mp, sc->tp, block_off, 0, &bp);
+ if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, block_off,
+ &error))
+ return error;
+
+ state->off = block_off;
+ words = (xfs_rtword_t *)bp->b_addr;
+ for (state->bit = 0;
+ state->bit < bitsperblock &&
+ state->rt_extent_nr < mp->m_sb.sb_rextents;
+ state->bit++, state->rt_extent_nr++) {
+ if (xchk_rtsum_isset(words, state->bit)) {
+ state->rt_free_nr++;
+ if (!state->in_extent) {
+ state->start_off = block_off;
+ state->start_bit = state->bit;
+ state->in_extent = true;
+ }
+ } else if (state->in_extent) {
+ error = xchk_rtsum_record_free(sc, state);
+ if (error)
+ goto out_relse;
+ }
+ }
+
+out_relse:
+ xfs_trans_brelse(sc->tp, bp);
+ return error;
+}
+
+/*
+ * Compute the realtime summary from the realtime bitmap. This is a kernel
+ * port of the defunct process_rtbitmap function in xfs_repair.
+ */
+STATIC int
+xchk_rtsum_compute(
+ struct xfs_scrub *sc)
+{
+ struct xchk_rtsum_compute state = { 0 };
+ struct xfs_mount *mp = sc->mp;
+ unsigned long long rtbmp_bytes;
+ xfs_fileoff_t off = 0;
+ xfs_fileoff_t end_off;
+ int error;
+
+ rtbmp_bytes = howmany_64(mp->m_sb.sb_rextents, NBBY);
+ end_off = howmany_64(rtbmp_bytes, mp->m_sb.sb_blocksize);
+
+ /* If the bitmap size doesn't match the computed size, bail. */
+ if (roundup_64(rtbmp_bytes, mp->m_sb.sb_blocksize) !=
+ mp->m_rbmip->i_d.di_size)
+ return -EFSCORRUPTED;
+
+ for (off = 0; off < end_off; off++) {
+ error = xchk_rtsum_process_bmblock(sc, off, &state);
+ if (error)
+ return error;
+ if (state.rt_extent_nr == mp->m_sb.sb_rextents)
+ break;
+ }
+ if (state.in_extent) {
+ error = xchk_rtsum_record_free(sc, &state);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+/* Compare the rtsummary file against the one we computed. */
+STATIC int
+xchk_rtsum_compare(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+ struct xfs_bmbt_irec map;
+ xfs_rtblock_t off;
+ loff_t pos;
+ int nmap;
+ int error = 0;
+
+ for (off = 0, pos = 0;
+ pos < mp->m_rsumsize;
+ pos += mp->m_sb.sb_blocksize, off++) {
+ loff_t ppos = pos;
+ size_t count;
+
+ if (xchk_should_terminate(sc, &error) ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ break;
+
+ /* Make sure we have a written extent. */
+ nmap = 1;
+ error = xfs_bmapi_read(mp->m_rsumip, off, 1, &map, &nmap,
+ XFS_DATA_FORK);
+ if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
+ break;
+
+ if (nmap != 1 || !xfs_bmap_is_real_extent(&map)) {
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
+ break;
+ }
+
+ /* Read a block's worth of ondisk rtsummary file. */
+ error = xfs_rtbuf_get(mp, sc->tp, off, 1, &bp);
+ if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
+ break;
+
+ /* Read a block's worth of computed rtsummary file. */
+ count = min_t(loff_t, mp->m_rsumsize - pos,
+ mp->m_sb.sb_blocksize);
+ error = xfile_io(sc->xfile, XFILE_IO_READ, &ppos, sc->buf,
+ count);
+ if (error) {
+ xfs_trans_brelse(sc->tp, bp);
+ break;
+ }
+
+ if (memcmp(bp->b_addr, sc->buf, count) != 0)
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
+
+ xfs_trans_brelse(sc->tp, bp);
+ }
+
+ return error;
+}
+
+/* Scrub the realtime summary. */
+int
+xchk_rtsummary(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ int error = 0;
+
/* Invoke the fork scrubber. */
error = xchk_metadata_inode_forks(sc);
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
- goto out;
-
- /* XXX: implement this some day */
- xchk_set_incomplete(sc);
-out:
- /* Switch back to the rtbitmap inode and lock flags. */
- xfs_iunlock(sc->ip, sc->ilock_flags);
- sc->ilock_flags = old_ilock_flags;
- sc->ip = old_ip;
+ goto out_rbm;
+
+ /* Construct the new summary file from the rtbitmap. */
+ error = xchk_rtsum_compute(sc);
+ if (error == -EFSCORRUPTED) {
+ /*
+ * EFSCORRUPTED means the rtbitmap is corrupt, which is an xref
+ * error since we're checking the summary file.
+ */
+ xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino);
+ error = 0;
+ goto out_rbm;
+ }
+
+ /* Does the computed summary file match the actual rtsummary file? */
+ error = xchk_rtsum_compare(sc);
+
+out_rbm:
+ /* Unlock the rtbitmap since we're done with it. */
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
return error;
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index bcc3bf8ea813..6aaae5d39072 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -177,6 +177,10 @@ xchk_teardown(
mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
sc->flags &= ~XCHK_HAS_QUOTAOFFLOCK;
}
+ if (sc->xfile) {
+ fput(sc->xfile);
+ sc->xfile = NULL;
+ }
if (sc->buf) {
kmem_free(sc->buf);
sc->buf = NULL;
@@ -306,14 +310,14 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
},
[XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
.type = ST_FS,
- .setup = xchk_setup_rt,
+ .setup = xchk_setup_rtbitmap,
.scrub = xchk_rtbitmap,
.has = xfs_sb_version_hasrealtime,
.repair = xrep_notsupported,
},
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
.type = ST_FS,
- .setup = xchk_setup_rt,
+ .setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
.has = xfs_sb_version_hasrealtime,
.repair = xrep_notsupported,
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index ad1ceb44a628..6577ad22151d 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -61,6 +61,7 @@ struct xfs_scrub {
struct xfs_trans *tp;
struct xfs_inode *ip;
void *buf;
+ struct file *xfile;
uint ilock_flags;
/* See the XCHK/XREP state flags below. */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 3449d01e459f..2965c30ff319 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -651,6 +651,38 @@ TRACE_EVENT(xchk_fscounters_within_range,
__entry->old_value)
)
+TRACE_EVENT(xchk_rtsum_record_free,
+ TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start, xfs_rtblock_t end,
+ uint64_t len, unsigned int log, loff_t pos, xfs_suminfo_t v),
+ TP_ARGS(mp, start, end, len, log, pos, v),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rtblock_t, start)
+ __field(xfs_rtblock_t, end)
+ __field(unsigned long long, len)
+ __field(unsigned int, log)
+ __field(loff_t, pos)
+ __field(xfs_suminfo_t, v)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->start = start;
+ __entry->end = end;
+ __entry->len = len;
+ __entry->log = log;
+ __entry->pos = pos;
+ __entry->v = v;
+ ),
+ TP_printk("dev %d:%d start %llu end %llu len %llu log %u pos %lld v %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->start,
+ __entry->end,
+ __entry->len,
+ __entry->log,
+ __entry->pos,
+ __entry->v)
+)
+
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)