summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-07-14 11:06:11 -0700
committerDarrick J. Wong <djwong@kernel.org>2022-11-09 19:07:28 -0800
commit7169d3aa8365340048b58e36ccf78f01734c718e (patch)
treeaed7441934d169beeca9a22e959b07b3e8438437
parenta50595b3cb9cd7a16444fb207a8033efc0d68e7e (diff)
xfs: repair obviously broken inode modesrepair-inodes_2022-11-09
Building off the rmap scanner that we added in the previous patch, we can now find block 0 and try to use the information contained inside of it to guess the mode of an inode if it's totally improper. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/scrub/inode_repair.c170
-rw-r--r--fs/xfs/scrub/trace.h11
2 files changed, 172 insertions, 9 deletions
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index b19c57edd1bf..d5ac550fedaf 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -74,6 +74,9 @@ struct xrep_inode {
/* Blocks in use by the attr fork. */
xfs_rfsblock_t attr_blocks;
+ /* Physical block containing data block 0. */
+ xfs_fsblock_t block0;
+
/* Number of data device extents for the data fork. */
xfs_extnum_t data_extents;
@@ -113,6 +116,7 @@ xrep_setup_inode(
ri = sc->buf;
memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
ri->sc = sc;
+ ri->block0 = NULLFSBLOCK;
return 0;
}
@@ -188,12 +192,159 @@ xrep_dinode_header(
dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
}
+/* Parse enough of the directory block header to guess if this is a dir. */
+static inline bool
+xrep_dinode_is_dir(
+ xfs_ino_t ino,
+ xfs_daddr_t daddr,
+ struct xfs_buf *bp)
+{
+ struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ struct xfs_dir2_data_free *bf;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_lsn_t lsn = be64_to_cpu(hdr3->lsn);
+
+ /* Does the dir3 header match the filesystem? */
+ if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC) &&
+ hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
+ return false;
+
+ if (be64_to_cpu(hdr3->owner) != ino)
+ return false;
+
+ if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+
+ if (be64_to_cpu(hdr3->blkno) != daddr)
+ return false;
+
+ /* Directory blocks are always logged and must have a valid LSN. */
+ if (lsn == NULLCOMMITLSN)
+ return false;
+ if (!xlog_valid_lsn(mp->m_log, lsn))
+ return false;
+
+ /*
+ * bestfree information lives immediately after the end of the header,
+ * so we won't run off the end of the buffer.
+ */
+ bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
+ if (!bf[0].length && bf[0].offset)
+ return false;
+ if (!bf[1].length && bf[1].offset)
+ return false;
+ if (!bf[2].length && bf[2].offset)
+ return false;
+
+ if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length))
+ return false;
+ if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length))
+ return false;
+
+ return true;
+}
+
+/* Guess the mode of this file from the contents. */
+STATIC uint16_t
+xrep_dinode_guess_mode(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip)
+{
+ struct xfs_buf *bp;
+ struct xfs_mount *mp = ri->sc->mp;
+ xfs_daddr_t daddr;
+ uint64_t fsize = be64_to_cpu(dip->di_size);
+ unsigned int dfork_sz = XFS_DFORK_DSIZE(dip, mp);
+ uint16_t mode = S_IFREG;
+ int error;
+
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+ * If the data fork is local format, the size of the data area
+ * is reasonable and is big enough to contain the entire file,
+ * we can guess the file type from the local data.
+ *
+ * If there are no nulls, guess this is a symbolic link.
+ * Otherwise, this is probably a shortform directory.
+ */
+ if (dfork_sz <= XFS_LITINO(mp) && dfork_sz >= fsize) {
+ if (!memchr(XFS_DFORK_DPTR(dip), 0, fsize))
+ return S_IFLNK;
+ return S_IFDIR;
+ }
+
+ /* By default, we guess regular file. */
+ return S_IFREG;
+ case XFS_DINODE_FMT_DEV:
+ /*
+ * If the data fork is dev format, the size of the data area is
+ * reasonable and large enough to store a dev_t, and the file
+ * size is zero, this could be a blockdev, a chardev, a fifo,
+ * or a socket. There is no solid way to distinguish between
+ * those choices, so we guess blockdev if the device number is
+ * nonzero and chardev if it's zero (aka whiteout).
+ */
+ if (dfork_sz <= XFS_LITINO(mp) &&
+ dfork_sz >= sizeof(__be32) && fsize == 0) {
+ xfs_dev_t dev = xfs_dinode_get_rdev(dip);
+
+ return dev != 0 ? S_IFBLK : S_IFCHR;
+ }
+
+ /* By default, we guess regular file. */
+ return S_IFREG;
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ /* There are data blocks to examine below. */
+ break;
+ default:
+ /* Everything else is considered a regular file. */
+ return S_IFREG;
+ }
+
+ /* There are no zero-length directories. */
+ if (fsize == 0)
+ return S_IFREG;
+
+ /*
+ * If we didn't find a written mapping for file block zero, we'll guess
+ * that it's a sparse regular file.
+ */
+ if (ri->block0 == NULLFSBLOCK)
+ return S_IFREG;
+
+ /* Directories can't have rt extents. */
+ if (ri->rt_extents > 0)
+ return S_IFREG;
+
+ /*
+ * Read the first block of the file. Since we have no idea what kind
+ * of file geometry (e.g. dirblock size) we might be reading into, use
+ * an uncached buffer so that we don't pollute the buffer cache. We
+ * can't do uncached mapped buffers, so the best we can do is guess
+ * from the directory header.
+ */
+ daddr = XFS_FSB_TO_DADDR(mp, ri->block0);
+ error = xfs_buf_read_uncached(mp->m_ddev_targp, daddr,
+ XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
+ if (error)
+ return S_IFREG;
+
+ if (xrep_dinode_is_dir(ri->sc->sm->sm_ino, daddr, bp))
+ mode = S_IFDIR;
+
+ xfs_buf_relse(bp);
+ return mode;
+}
+
/* Turn di_mode into /something/ recognizable. */
STATIC void
xrep_dinode_mode(
- struct xfs_scrub *sc,
+ struct xrep_inode *ri,
struct xfs_dinode *dip)
{
+ struct xfs_scrub *sc = ri->sc;
uint16_t mode;
trace_xrep_dinode_mode(sc, dip);
@@ -203,7 +354,7 @@ xrep_dinode_mode(
return;
/* bad mode, so we set it to a file that only root can read */
- mode = S_IFREG;
+ mode = xrep_dinode_guess_mode(ri, dip);
dip->di_mode = cpu_to_be16(mode);
dip->di_uid = 0;
dip->di_gid = 0;
@@ -412,9 +563,17 @@ xrep_dinode_walk_rmap(
}
ri->data_blocks += rec->rm_blockcount;
- if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+ if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) {
ri->data_extents++;
+ if (rec->rm_offset == 0 &&
+ !(rec->rm_flags & XFS_RMAP_UNWRITTEN)) {
+ if (ri->block0 != NULLFSBLOCK)
+ return -EFSCORRUPTED;
+ ri->block0 = rec->rm_startblock;
+ }
+ }
+
return 0;
}
@@ -465,7 +624,8 @@ xrep_dinode_count_rmaps(
trace_xrep_dinode_count_rmaps(ri->sc,
ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
- ri->data_extents, ri->rt_extents, ri->attr_extents);
+ ri->data_extents, ri->rt_extents, ri->attr_extents,
+ ri->block0);
return 0;
}
@@ -1019,7 +1179,7 @@ xrep_dinode_core(
/* Fix everything the verifier will complain about. */
dip = xfs_buf_offset(bp, ri->imap.im_boffset);
xrep_dinode_header(sc, dip);
- xrep_dinode_mode(sc, dip);
+ xrep_dinode_mode(ri, dip);
xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
xrep_dinode_size(sc, dip);
xrep_dinode_extsize_hints(sc, dip);
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 63ceab3aa416..a3673ee2d684 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1484,9 +1484,9 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
TP_PROTO(struct xfs_scrub *sc, xfs_rfsblock_t data_blocks,
xfs_rfsblock_t rt_blocks, xfs_rfsblock_t attr_blocks,
xfs_extnum_t data_extents, xfs_extnum_t rt_extents,
- xfs_aextnum_t attr_extents),
+ xfs_aextnum_t attr_extents, xfs_fsblock_t block0),
TP_ARGS(sc, data_blocks, rt_blocks, attr_blocks, data_extents,
- rt_extents, attr_extents),
+ rt_extents, attr_extents, block0),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -1496,6 +1496,7 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
__field(xfs_extnum_t, data_extents)
__field(xfs_extnum_t, rt_extents)
__field(xfs_aextnum_t, attr_extents)
+ __field(xfs_fsblock_t, block0)
),
TP_fast_assign(
__entry->dev = sc->mp->m_super->s_dev;
@@ -1506,8 +1507,9 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
__entry->data_extents = data_extents;
__entry->rt_extents = rt_extents;
__entry->attr_extents = attr_extents;
+ __entry->block0 = block0;
),
- TP_printk("dev %d:%d ino 0x%llx dblocks 0x%llx rtblocks 0x%llx ablocks 0x%llx dextents %llu rtextents %llu aextents %u",
+ TP_printk("dev %d:%d ino 0x%llx dblocks 0x%llx rtblocks 0x%llx ablocks 0x%llx dextents %llu rtextents %llu aextents %u startblock0 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->data_blocks,
@@ -1515,7 +1517,8 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
__entry->attr_blocks,
__entry->data_extents,
__entry->rt_extents,
- __entry->attr_extents)
+ __entry->attr_extents,
+ __entry->block0)
);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */