summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-02-25 15:13:48 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2020-03-03 18:47:38 -0800
commitfaa8f3b259634ed6f67d4464d1da7e2fb7df1d99 (patch)
tree559b10d4390e35ceabad4b67217421fcf2bf8d99
parent90b7d9278f82ebaa5a0e357dbdc3bcc15cd127cc (diff)
xfs: online repair of directoriesrepair-inode-data_2020-03-03
If a directory looks like it's in bad shape, try to sift through the rubble to find whatever directory entries we can, zap the old tree, and re-add the entries. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/dir.c17
-rw-r--r--fs/xfs/scrub/dir_repair.c699
-rw-r--r--fs/xfs/scrub/repair.h2
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/trace.h94
6 files changed, 813 insertions, 2 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 257d8ffef721..83982250e491 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -165,6 +165,7 @@ xfs-y += $(addprefix scrub/, \
bitmap.o \
blob.o \
bmap_repair.o \
+ dir_repair.o \
ialloc_repair.o \
inode_repair.o \
refcount_repair.o \
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index c186c83544ac..46d8e1923af1 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -25,7 +25,22 @@ xchk_setup_directory(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- return xchk_setup_inode_contents(sc, ip, 0);
+ unsigned int sz;
+ int error;
+
+ error = xchk_setup_inode_contents(sc, ip, 0);
+ if (error)
+ return error;
+
+ if (!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+ return 0;
+
+ sz = max_t(unsigned int, MAXNAMELEN + 1, sizeof(struct xfs_da_args));
+ sc->buf = kmem_alloc_large(sz, 0);
+ if (!sc->buf)
+ return -ENOMEM;
+
+ return 0;
}
/* Directories */
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
new file mode 100644
index 000000000000..5236a30bbad8
--- /dev/null
+++ b/fs/xfs/scrub/dir_repair.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_bmap.h"
+#include "xfs_quota.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/array.h"
+#include "scrub/blob.h"
+
+/*
+ * Directory Repair
+ * ================
+ *
+ * We repair directories by reading the directory leaf blocks looking for
+ * entries, truncate the entire directory fork, and reinsert all the entries.
+ * Unfortunately, there's not yet a secondary copy of directory attribute data,
+ * which means that if we blow up midway through there's little we can do.
+ */
+
+struct xrep_dir_key {
+ xblob_cookie name_cookie;
+ xfs_ino_t ino;
+ unsigned int hash;
+ uint8_t namelen;
+ uint8_t ftype;
+} __packed;
+
+struct xrep_dir {
+ struct xfs_scrub *sc;
+ struct xfbma *dir_entries;
+ struct xblob *dir_names;
+ xfs_ino_t parent_ino;
+};
+
+/*
+ * Decide if we want to salvage this entry. We don't bother with oversized
+ * names or the dot entry.
+ */
+STATIC int
+xrep_dir_want_salvage(
+ struct xrep_dir *rd,
+ const char *name,
+ int namelen,
+ xfs_ino_t ino)
+{
+ struct xfs_mount *mp = rd->sc->mp;
+
+ /* No pointers to ourselves or to garbage. */
+ if (ino == rd->sc->ip->i_ino)
+ return false;
+ if (!xfs_verify_dir_ino(mp, ino))
+ return false;
+
+ /* No weird looking names or dot entries. */
+ if (namelen > MAXNAMELEN || namelen <= 0)
+ return false;
+ if (namelen == 1 && name[0] == '.')
+ return false;
+
+ return true;
+}
+
+/* Allocate an in-core record to hold entries while we rebuild the dir data. */
+STATIC int
+xrep_dir_salvage_entry(
+ struct xrep_dir *rd,
+ unsigned char *name,
+ unsigned int namelen,
+ xfs_ino_t ino)
+{
+ struct xrep_dir_key key = {
+ .ino = ino,
+ };
+ struct xfs_inode *ip;
+ unsigned int i;
+ int error = 0;
+
+ if (xchk_should_terminate(rd->sc, &error))
+ return error;
+
+ /* Truncate the name to the first illegal character. */
+ for (i = 0; i < namelen && name[i] != 0 && name[i] != '/'; i++);
+ key.namelen = i;
+ key.hash = xfs_da_hashname(name, key.namelen);
+
+ trace_xrep_dir_salvage_entry(rd->sc->ip, name, key.namelen, ino);
+
+ /* Save the parent pointer. */
+ if (key.namelen == 2 && name[0] == '.' && name[1] == '.') {
+ if (rd->parent_ino != NULLFSINO)
+ return -EFSCORRUPTED;
+ rd->parent_ino = ino;
+ return 0;
+ }
+
+ /*
+ * Compute the ftype or dump the entry if we can't. We don't lock the
+ * inode because inodes can't change type while we have a reference.
+ */
+ error = xfs_iget(rd->sc->mp, rd->sc->tp, ino,
+ XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
+ if (error)
+ return 0;
+ key.ftype = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
+ xfs_irele(ip);
+
+ /* Remember this for later. */
+ error = xblob_put(rd->dir_names, &key.name_cookie, name, key.namelen);
+ if (error)
+ return error;
+
+ return xfbma_append(rd->dir_entries, &key);
+}
+
+/* Record a shortform directory entry for later reinsertion. */
+STATIC int
+xrep_dir_salvage_sf_entry(
+ struct xrep_dir *rd,
+ struct xfs_dir2_sf_hdr *sfp,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ xfs_ino_t ino;
+
+ ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
+ if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
+ return 0;
+
+ return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
+}
+
+/* Record a regular directory entry for later reinsertion. */
+STATIC int
+xrep_dir_salvage_data_entry(
+ struct xrep_dir *rd,
+ struct xfs_dir2_data_entry *dep)
+{
+ xfs_ino_t ino;
+
+ ino = be64_to_cpu(dep->inumber);
+ if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
+ return 0;
+
+ return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
+}
+
+/* Try to recover block/data format directory entries. */
+STATIC int
+xrep_dir_recover_data(
+ struct xrep_dir *rd,
+ struct xfs_buf *bp)
+{
+ struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo;
+ unsigned int offset;
+ unsigned int end;
+ int error; /* error return value */
+
+ /*
+ * Loop over the data portion of the block.
+ * Each object is a real entry (dep) or an unused one (dup).
+ */
+ offset = geo->data_entry_offset;
+ end = min_t(unsigned int, BBTOB(bp->b_length),
+ xfs_dir3_data_end_offset(geo, bp->b_addr));
+
+ while (offset < end) {
+ struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
+ struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
+
+ if (xchk_should_terminate(rd->sc, &error))
+ break;
+
+ /* Skip unused entries. */
+ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+ offset += be16_to_cpu(dup->length);
+ continue;
+ }
+
+ /* Don't walk off the end of the block. */
+ offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
+ if (offset > end)
+ break;
+
+ /* Ok, let's save this entry. */
+ error = xrep_dir_salvage_data_entry(rd, dep);
+ if (error)
+ return error;
+
+ }
+
+ return 0;
+}
+
+/* Try to recover shortform directory entries. */
+STATIC int
+xrep_dir_recover_sf(
+ struct xrep_dir *rd)
+{
+ struct xfs_dir2_sf_hdr *sfp;
+ struct xfs_dir2_sf_entry *sfep;
+ struct xfs_dir2_sf_entry *next;
+ struct xfs_ifork *ifp;
+ unsigned char *end;
+ int error;
+
+ ifp = XFS_IFORK_PTR(rd->sc->ip, XFS_DATA_FORK);
+ sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data;
+ end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
+
+ rd->parent_ino = xfs_dir2_sf_get_parent_ino(sfp);
+
+ sfep = xfs_dir2_sf_firstentry(sfp);
+ while ((unsigned char *)sfep < end) {
+ if (xchk_should_terminate(rd->sc, &error))
+ break;
+
+ next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep);
+ if ((unsigned char *)next > end)
+ break;
+
+ /* Ok, let's save this entry. */
+ error = xrep_dir_salvage_sf_entry(rd, sfp, sfep);
+ if (error)
+ return error;
+
+ sfep = next;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to figure out the format of this directory from the data fork mappings
+ * and the directory size. If we can be reasonably sure of format, we can be
+ * more aggressive in salvaging directory entries. On return, @magic_guess
+ * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
+ * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
+ * and 0 if we can't tell.
+ */
+STATIC void
+xrep_dir_guess_format(
+ struct xrep_dir *rd,
+ __be32 *magic_guess)
+{
+ struct xfs_inode *ip = rd->sc->ip;
+ struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo;
+ xfs_fileoff_t last;
+ int error;
+
+ ASSERT(xfs_sb_version_hascrc(&ip->i_mount->m_sb));
+
+ *magic_guess = 0;
+
+ /*
+ * If there's a single directory block and the directory size is
+ * exactly one block, this has to be a single block format directory.
+ */
+ error = xfs_bmap_last_offset(ip, &last, XFS_DATA_FORK);
+ if (!error && XFS_FSB_TO_B(ip->i_mount, last) == geo->blksize &&
+ ip->i_d.di_size == geo->blksize) {
+ *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
+ return;
+ }
+
+ /*
+ * If the last extent before the leaf offset matches the directory
+ * size and the directory size is larger than 1 block, this is a
+ * data format directory.
+ */
+ last = geo->leafblk;
+ error = xfs_bmap_last_before(rd->sc->tp, ip, &last, XFS_DATA_FORK);
+ if (!error &&
+ XFS_FSB_TO_B(ip->i_mount, last) > geo->blksize &&
+ XFS_FSB_TO_B(ip->i_mount, last) == ip->i_d.di_size) {
+ *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
+ return;
+ }
+}
+
+/* Recover directory entries from a specific directory block. */
+STATIC int
+xrep_dir_recover_dirblock(
+ struct xrep_dir *rd,
+ __be32 magic_guess,
+ xfs_dablk_t dabno)
+{
+ struct xfs_dir2_data_hdr *hdr;
+ struct xfs_buf *bp;
+ __be32 oldmagic;
+ int error;
+
+ /*
+ * Try to read buffer. We invalidate them in the next step so we don't
+ * bother to set a buffer type or ops.
+ */
+ error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno, -1, &bp,
+ XFS_DATA_FORK, NULL);
+ if (error || !bp)
+ return error;
+
+ hdr = bp->b_addr;
+ oldmagic = hdr->magic;
+
+ trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
+ be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
+
+ /*
+ * If we're sure of the block's format, proceed with the salvage
+ * operation using the specified magic number.
+ */
+ if (magic_guess) {
+ hdr->magic = magic_guess;
+ goto recover;
+ }
+
+ /*
+ * If we couldn't guess what type of directory this is, then we will
+ * only salvage entries from directory blocks that match the magic
+ * number and pass verifiers.
+ */
+ switch (hdr->magic) {
+ case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
+ case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
+ if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
+ goto out;
+ break;
+ case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
+ case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
+ if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
+ goto out;
+ break;
+ default:
+ goto out;
+ }
+
+recover:
+ error = xrep_dir_recover_data(rd, bp);
+
+out:
+ hdr->magic = oldmagic;
+ xfs_trans_brelse(rd->sc->tp, bp);
+ return error;
+}
+
+/* Extract as many directory entries as we can. */
+STATIC int
+xrep_dir_recover(
+ struct xrep_dir *rd)
+{
+ struct xfs_iext_cursor icur;
+ struct xfs_bmbt_irec got;
+ struct xfs_scrub *sc = rd->sc;
+ struct xfs_ifork *ifp;
+ struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
+ xfs_dablk_t dabno;
+ __be32 magic_guess;
+ int error = 0;
+
+ if (rd->sc->ip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+ return xrep_dir_recover_sf(rd);
+
+ xrep_dir_guess_format(rd, &magic_guess);
+
+ /* Iterate each directory data block in the data fork. */
+ ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
+ for_each_xfs_iext(ifp, &icur, &got) {
+ /* Leaf blocks come after all data blocks, so cut off there. */
+ xfs_trim_extent(&got, 0, geo->leafblk);
+ if (got.br_blockcount == 0)
+ continue;
+
+ for (dabno = round_up(got.br_startoff, geo->fsbcount);
+ dabno < got.br_startoff + got.br_blockcount;
+ dabno += geo->fsbcount) {
+ if (xchk_should_terminate(rd->sc, &error))
+ return error;
+
+ error = xrep_dir_recover_dirblock(rd, magic_guess,
+ dabno);
+ if (error)
+ break;
+ }
+ }
+
+ return error;
+}
+
+/* Reset a non-local directory. */
+STATIC int
+xrep_dir_reset_nonlocal(
+ struct xfs_scrub *sc,
+ struct xfs_ifork *ifp)
+{
+ struct xfs_iext_cursor icur;
+ struct xfs_bmbt_irec got;
+ struct xfs_buf *bp;
+ struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
+ xfs_dablk_t dabno;
+ int error;
+
+ /* Invalidate each directory block. */
+ for_each_xfs_iext(ifp, &icur, &got) {
+ for (dabno = round_up(got.br_startoff, geo->fsbcount);
+ dabno < got.br_startoff + got.br_blockcount;
+ dabno += geo->fsbcount) {
+ error = xfs_da_get_buf(sc->tp, sc->ip, dabno, &bp,
+ XFS_DATA_FORK);
+ if (error || !bp)
+ continue;
+ xfs_trans_binval(sc->tp, bp);
+ error = xfs_trans_roll_inode(&sc->tp, sc->ip);
+ if (error)
+ return error;
+ }
+ }
+
+ /* Now free all the blocks. */
+ return xfs_bunmapi_range(&sc->tp, sc->ip, XFS_DATA_FORK, 0,
+ XFS_MAX_FILEOFF, XFS_BMAPI_NODISCARD);
+}
+
+/* Free all the directory blocks and delete the fork. */
+STATIC int
+xrep_dir_reset_fork(
+ struct xrep_dir *rd)
+{
+ struct xfs_ifork *ifp;
+ struct xfs_da_args *args = rd->sc->buf;
+ int error;
+
+ xfs_trans_ijoin(rd->sc->tp, rd->sc->ip, 0);
+ ifp = XFS_IFORK_PTR(rd->sc->ip, XFS_DATA_FORK);
+
+ /* Unmap all the directory buffers. */
+ if (xfs_ifork_has_extents(rd->sc->ip, XFS_DATA_FORK)) {
+ error = xrep_dir_reset_nonlocal(rd->sc, ifp);
+ if (error)
+ return error;
+ }
+
+ /* Reset the data fork to an empty data fork. */
+ xfs_ifork_reset(ifp);
+ ifp->if_flags = XFS_IFINLINE;
+ ifp->if_bytes = 0;
+ rd->sc->ip->i_d.di_size = 0;
+
+ /* Reinitialize the short form directory. */
+ set_nlink(VFS_I(rd->sc->ip), 2);
+ args->geo = rd->sc->mp->m_dir_geo;
+ args->dp = rd->sc->ip;
+ args->trans = rd->sc->tp;
+ error = xfs_dir2_sf_create(args, rd->parent_ino);
+ if (error)
+ return error;
+
+ return xfs_trans_roll_inode(&rd->sc->tp, rd->sc->ip);
+}
+
+/* Compare two dir keys, sorting in hash order. */
+static int
+xrep_dir_key_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xrep_dir_key *ap = a;
+ const struct xrep_dir_key *bp = b;
+
+ if (ap->hash > bp->hash)
+ return 1;
+ else if (ap->hash < bp->hash)
+ return -1;
+ return 0;
+}
+
+/*
+ * Find all the directory entries for this inode by scraping them out of the
+ * directory leaf blocks by hand. The caller must clean up the lists if
+ * anything goes wrong.
+ */
+STATIC int
+xrep_dir_find_entries(
+ struct xrep_dir *rd)
+{
+ struct xfs_inode *ip = rd->sc->ip;
+ struct xfs_ifork *ifp;
+ int error;
+
+ error = xrep_ino_dqattach(rd->sc);
+ if (error)
+ return error;
+
+ /* Extent map should be loaded. */
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ if (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_LOCAL &&
+ !(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(rd->sc->tp, ip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
+
+ /* Read every directory entry and record them in memory. */
+ return xrep_dir_recover(rd);
+}
+
+/* Insert one dir entry. */
+STATIC int
+xrep_dir_insert_rec(
+ const void *item,
+ void *priv)
+{
+ struct xfs_name name;
+ const struct xrep_dir_key *key = item;
+ struct xrep_dir *rd = priv;
+ struct xfs_trans *tp;
+ char *namebuf = rd->sc->buf;
+ struct xfs_mount *mp = rd->sc->mp;
+ uint resblks;
+ int error;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ /* The entry name is stored in the in-core buffer. */
+ name.name = namebuf;
+
+ error = xblob_get(rd->dir_names, key->name_cookie, namebuf,
+ key->namelen);
+ if (error)
+ return error;
+
+ error = xblob_free(rd->dir_names, key->name_cookie);
+ if (error)
+ return error;
+
+ trace_xrep_dir_insert_rec(rd->sc->ip, namebuf, key->namelen, key->ino,
+ key->ftype);
+
+ error = xfs_qm_dqattach(rd->sc->ip);
+ if (error)
+ return error;
+
+ resblks = XFS_LINK_SPACE_RES(mp, key->namelen);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
+ if (error == -ENOSPC) {
+ resblks = 0;
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
+ }
+ if (error)
+ return error;
+
+ xfs_ilock(rd->sc->ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, rd->sc->ip, XFS_ILOCK_EXCL);
+
+ name.len = key->namelen;
+ name.type = key->ftype;
+ error = xfs_dir_createname(tp, rd->sc->ip, &name, key->ino, resblks);
+ if (error)
+ goto err;
+
+ if (name.type == XFS_DIR3_FT_DIR)
+ inc_nlink(VFS_I(rd->sc->ip));
+ xfs_trans_log_inode(tp, rd->sc->ip, XFS_ILOG_CORE);
+ return xfs_trans_commit(tp);
+
+err:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/*
+ * Insert all the attributes that we collected.
+ *
+ * Commit the repair transaction and drop the ilock because the attribute
+ * setting code needs to be able to allocate special transactions and take the
+ * ilock on its own. Some day we'll have deferred attribute setting, at which
+ * point we'll be able to use that to replace the attributes atomically and
+ * safely.
+ */
+STATIC int
+xrep_dir_rebuild_tree(
+ struct xrep_dir *rd)
+{
+ int error;
+
+ /*
+ * Commit the existing transaction and drop the ILOCK so that we can
+ * use a series of small transactions to rebuild the directory.
+ */
+ error = xfs_trans_commit(rd->sc->tp);
+ rd->sc->tp = NULL;
+ if (error)
+ return error;
+
+ xfs_iunlock(rd->sc->ip, XFS_ILOCK_EXCL);
+ rd->sc->ilock_flags &= ~XFS_ILOCK_EXCL;
+
+ /*
+ * Sort the entries hash to minimize dabtree splits when we rebuild the
+ * directory tree information.
+ */
+ error = xfbma_sort(rd->dir_entries, xrep_dir_key_cmp);
+ if (error)
+ return error;
+
+ /* Re-add every entry to the directory. */
+ return xfbma_iter_del(rd->dir_entries, xrep_dir_insert_rec, rd);
+}
+
+/*
+ * Repair the directory metadata.
+ *
+ * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer
+ * cache in XFS can't handle aliased multiblock buffers, so this might
+ * misbehave if the directory blocks are crosslinked with other filesystem
+ * metadata.
+ *
+ * XXX: Is it necessary to check the dcache for this directory to make sure
+ * that we always recreate every cached entry?
+ */
+int
+xrep_dir(
+ struct xfs_scrub *sc)
+{
+ struct xrep_dir rd = {
+ .sc = sc,
+ .parent_ino = NULLFSINO,
+ };
+ int error;
+
+ /* Set up some storage */
+ rd.dir_entries = xfbma_init(sizeof(struct xrep_dir_key));
+ if (IS_ERR(rd.dir_entries))
+ return PTR_ERR(rd.dir_entries);
+ rd.dir_names = xblob_init();
+ if (IS_ERR(rd.dir_names)) {
+ error = PTR_ERR(rd.dir_names);
+ goto out_arr;
+ }
+
+ /*
+ * The directory scrubber might have dropped the ILOCK, so pick it up
+ * again.
+ */
+ if (!(sc->ilock_flags & XFS_ILOCK_EXCL)) {
+ xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+ sc->ilock_flags |= XFS_ILOCK_EXCL;
+ }
+
+ /* Collect directory entries by parsing raw leaf blocks. */
+ error = xrep_dir_find_entries(&rd);
+ if (error)
+ goto out;
+
+ /* If we can't find the parent pointer, we're sunk. */
+ if (rd.parent_ino == NULLFSINO)
+ return -EFSCORRUPTED;
+
+ /*
+ * Invalidate and truncate all data fork extents. This is the point at
+ * which we are no longer able to bail out gracefully. We commit the
+ * transaction here because the rebuilding step allocates its own
+ * transactions.
+ */
+ error = xrep_dir_reset_fork(&rd);
+ if (error)
+ goto out;
+
+ /* Now rebuild the directory information. */
+ error = xrep_dir_rebuild_tree(&rd);
+out:
+ xblob_destroy(rd.dir_names);
+out_arr:
+ xfbma_destroy(rd.dir_entries);
+ return error;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 3da529e6cc4b..39a9b30fdbaa 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -75,6 +75,7 @@ int xrep_inode(struct xfs_scrub *sc);
int xrep_bmap_data(struct xfs_scrub *sc);
int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_symlink(struct xfs_scrub *sc);
+int xrep_dir(struct xfs_scrub *sc);
int xrep_xattr(struct xfs_scrub *sc);
struct xrep_newbt_resv {
@@ -179,6 +180,7 @@ xrep_reset_perag_resv(
#define xrep_bmap_data xrep_notsupported
#define xrep_bmap_attr xrep_notsupported
#define xrep_symlink xrep_notsupported
+#define xrep_dir xrep_notsupported
#define xrep_xattr xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 86493e6cc712..022393e9a753 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -285,7 +285,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_INODE,
.setup = xchk_setup_directory,
.scrub = xchk_directory,
- .repair = xrep_notsupported,
+ .repair = xrep_dir,
},
[XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */
.type = ST_INODE,
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 88fc199493b0..2572adbd3921 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1104,6 +1104,100 @@ TRACE_EVENT(xrep_xattr_insert_rec,
__entry->valuelen)
);
+TRACE_EVENT(xrep_dir_recover_dirblock,
+ TP_PROTO(struct xfs_inode *dp, xfs_dablk_t dabno, uint32_t magic,
+ uint32_t magic_guess),
+ TP_ARGS(dp, dabno, magic, magic_guess),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dir_ino)
+ __field(xfs_dablk_t, dabno)
+ __field(uint32_t, magic)
+ __field(uint32_t, magic_guess)
+ ),
+ TP_fast_assign(
+ __entry->dev = dp->i_mount->m_super->s_dev;
+ __entry->dir_ino = dp->i_ino;
+ __entry->dabno = dabno;
+ __entry->magic = magic;
+ __entry->magic_guess = magic_guess;
+ ),
+ TP_printk("dev %d:%d dir 0x%llx dablk %u magic 0x%x magic_guess 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dir_ino,
+ __entry->dabno,
+ __entry->magic,
+ __entry->magic_guess)
+);
+
+TRACE_EVENT(xrep_dir_salvage_entry,
+ TP_PROTO(struct xfs_inode *dp, char *name, unsigned int namelen,
+ xfs_ino_t ino),
+ TP_ARGS(dp, name, namelen, ino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dir_ino)
+ __field(unsigned int, namelen)
+ __dynamic_array(char, name, namelen + 1)
+ __field(xfs_ino_t, ino)
+ ),
+ TP_fast_assign(
+ __entry->dev = dp->i_mount->m_super->s_dev;
+ __entry->dir_ino = dp->i_ino;
+ __entry->namelen = namelen;
+ memcpy(__get_str(name), name, namelen);
+ __get_str(name)[namelen] = 0;
+ __entry->ino = ino;
+ ),
+ TP_printk("dev %d:%d dir 0x%llx name '%.*s' ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dir_ino,
+ __entry->namelen,
+ __get_str(name),
+ __entry->ino)
+);
+
+#define XFS_DIR3_FTYPE_STR \
+ { XFS_DIR3_FT_UNKNOWN, "unknown" }, \
+ { XFS_DIR3_FT_REG_FILE, "file" }, \
+ { XFS_DIR3_FT_DIR, "directory" }, \
+ { XFS_DIR3_FT_CHRDEV, "char" }, \
+ { XFS_DIR3_FT_BLKDEV, "block" }, \
+ { XFS_DIR3_FT_FIFO, "fifo" }, \
+ { XFS_DIR3_FT_SOCK, "sock" }, \
+ { XFS_DIR3_FT_SYMLINK, "symlink" }, \
+ { XFS_DIR3_FT_WHT, "whiteout" }
+
+TRACE_EVENT(xrep_dir_insert_rec,
+ TP_PROTO(struct xfs_inode *dp, char *name, unsigned int namelen,
+ xfs_ino_t ino, uint8_t ftype),
+ TP_ARGS(dp, name, namelen, ino, ftype),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dir_ino)
+ __field(unsigned int, namelen)
+ __dynamic_array(char, name, namelen + 1)
+ __field(xfs_ino_t, ino)
+ __field(uint8_t, ftype)
+ ),
+ TP_fast_assign(
+ __entry->dev = dp->i_mount->m_super->s_dev;
+ __entry->dir_ino = dp->i_ino;
+ __entry->namelen = namelen;
+ memcpy(__get_str(name), name, namelen);
+ __get_str(name)[namelen] = 0;
+ __entry->ino = ino;
+ __entry->ftype = ftype;
+ ),
+ TP_printk("dev %d:%d dir 0x%llx name '%.*s' ino 0x%llx ftype %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dir_ino,
+ __entry->namelen,
+ __get_str(name),
+ __entry->ino,
+ __print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR))
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */