summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-07-14 11:16:22 -0700
committerDarrick J. Wong <djwong@kernel.org>2022-11-09 19:08:15 -0800
commit37fb90fd6eb0a30eb7133a2ca9ead780b574ed69 (patch)
tree62a54bef49ba557433c6d26af314617153e4b0af
parentf2e252458976c0c789710b53792491dd3e731014 (diff)
xfs: introduce vectored scrub modevectorized-scrub_2022-11-09
Introduce a variant on XFS_SCRUB_METADATA that allows for vectored mode. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_fs.h37
-rw-r--r--fs/xfs/scrub/scrub.c148
-rw-r--r--fs/xfs/scrub/trace.h78
-rw-r--r--fs/xfs/scrub/xfs_scrub.h2
-rw-r--r--fs/xfs/xfs_ioctl.c47
5 files changed, 311 insertions, 1 deletions
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 5768fca9a07d..48b1da71f70e 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -756,6 +756,15 @@ struct xfs_scrub_metadata {
/* Number of scrub subcommands. */
#define XFS_SCRUB_TYPE_NR 32
+/*
+ * This special type code only applies to the vectored scrub implementation.
+ *
+ * If any of the previous scrub vectors recorded runtime errors or have
+ * sv_flags bits set that match the OFLAG bits in the barrier vector's
+ * sv_flags, set the barrier's sv_ret to -ECANCELED and return to userspace.
+ */
+#define XFS_SCRUB_TYPE_BARRIER (-1U)
+
/* i: Repair this metadata. */
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)
@@ -800,6 +809,33 @@ struct xfs_scrub_metadata {
XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED)
#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
+struct xfs_scrub_vec {
+ __u32 sv_type; /* XFS_SCRUB_TYPE_* */
+ __u32 sv_flags; /* XFS_SCRUB_FLAGS_* */
+ __s32 sv_ret; /* 0 or a negative error code */
+ __u32 sv_reserved; /* must be zero */
+};
+
+/* Vectored metadata scrub control structure. */
+struct xfs_scrub_vec_head {
+ __u64 svh_ino; /* inode number. */
+ __u32 svh_gen; /* inode generation. */
+ __u32 svh_agno; /* ag number. */
+ __u32 svh_flags; /* XFS_SCRUB_VEC_FLAGS_* */
+ __u16 svh_rest_us; /* wait this much time between vector items */
+ __u16 svh_nr; /* number of svh_vecs */
+
+ struct xfs_scrub_vec svh_vecs[0];
+};
+
+#define XFS_SCRUB_VEC_FLAGS_ALL (0)
+
+static inline size_t sizeof_xfs_scrub_vec(unsigned int nr)
+{
+ return sizeof(struct xfs_scrub_vec_head) +
+ nr * sizeof(struct xfs_scrub_vec);
+}
+
/*
* ioctl limits
*/
@@ -844,6 +880,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
/* XFS_IOC_GETFSMAP ------ hoisted 59 */
#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
+#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 60, struct xfs_scrub_vec_head)
#define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry)
#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 62, struct xfs_rtgroup_geometry)
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 342a50248650..fc2cfef68366 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -20,6 +20,7 @@
#include "xfs_rmap.h"
#include "xfs_xchgrange.h"
#include "xfs_swapext.h"
+#include "xfs_icache.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -726,3 +727,150 @@ try_harder:
sc->flags |= XCHK_TRY_HARDER;
goto retry_op;
}
+
+/* Decide if there have been any scrub failures up to this point. */
+static inline bool
+xfs_scrubv_previous_failures(
+ struct xfs_mount *mp,
+ struct xfs_scrub_vec_head *vhead,
+ struct xfs_scrub_vec *barrier_vec)
+{
+ struct xfs_scrub_vec *v;
+ __u32 failmask;
+
+ failmask = barrier_vec->sv_flags & XFS_SCRUB_FLAGS_OUT;
+
+ for (v = vhead->svh_vecs; v < barrier_vec; v++) {
+ if (v->sv_type == XFS_SCRUB_TYPE_BARRIER)
+ continue;
+
+ /*
+ * Runtime errors count as a previous failure, except the ones
+ * used to ask userspace to retry.
+ */
+ if (v->sv_ret && v->sv_ret != -EBUSY && v->sv_ret != -ENOENT &&
+ v->sv_ret != -EUSERS)
+ return true;
+
+ /*
+ * If any of the out-flags on the scrub vector match the mask
+ * that was set on the barrier vector, that's a previous fail.
+ */
+ if (v->sv_flags & failmask)
+ return true;
+ }
+
+ return false;
+}
+
+/* Vectored scrub implementation to reduce ioctl calls. */
+int
+xfs_scrubv_metadata(
+ struct file *file,
+ struct xfs_scrub_vec_head *vhead)
+{
+ struct xfs_inode *ip_in = XFS_I(file_inode(file));
+ struct xfs_mount *mp = ip_in->i_mount;
+ struct xfs_inode *ip = NULL;
+ struct xfs_scrub_vec *v;
+ bool set_dontcache = false;
+ unsigned int i;
+ int error = 0;
+
+ BUILD_BUG_ON(sizeof(struct xfs_scrub_vec_head) ==
+ sizeof(struct xfs_scrub_metadata));
+ BUILD_BUG_ON(XFS_IOC_SCRUB_METADATA == XFS_IOC_SCRUBV_METADATA);
+
+ trace_xchk_scrubv_start(ip_in, vhead);
+
+ if (vhead->svh_flags & ~XFS_SCRUB_VEC_FLAGS_ALL)
+ return -EINVAL;
+ for (i = 0, v = vhead->svh_vecs; i < vhead->svh_nr; i++, v++) {
+ if (v->sv_reserved)
+ return -EINVAL;
+ if (v->sv_type == XFS_SCRUB_TYPE_BARRIER &&
+ (v->sv_flags & ~XFS_SCRUB_FLAGS_OUT))
+ return -EINVAL;
+
+ /*
+ * If we detect at least one inode-type scrub, we might
+ * consider setting dontcache at the end.
+ */
+ if (v->sv_type < XFS_SCRUB_TYPE_NR &&
+ meta_scrub_ops[v->sv_type].type == ST_INODE)
+ set_dontcache = true;
+
+ trace_xchk_scrubv_item(mp, vhead, v);
+ }
+
+ /*
+ * If the caller provided us with a nonzero inode number that isn't the
+ * ioctl file, try to grab a reference to it to eliminate all further
+ * untrusted inode lookups. If we can't get the inode, let each scrub
+ * function try again.
+ */
+ if (vhead->svh_ino != ip_in->i_ino) {
+ xfs_iget(mp, NULL, vhead->svh_ino, XFS_IGET_UNTRUSTED, 0, &ip);
+ if (ip && (VFS_I(ip)->i_generation != vhead->svh_gen ||
+ (xfs_is_metadata_inode(ip) &&
+ !S_ISDIR(VFS_I(ip)->i_mode)))) {
+ xfs_irele(ip);
+ ip = NULL;
+ }
+ }
+ if (!ip) {
+ if (!igrab(VFS_I(ip_in)))
+ return -EFSCORRUPTED;
+ ip = ip_in;
+ }
+
+ /* Run all the scrubbers. */
+ for (i = 0, v = vhead->svh_vecs; i < vhead->svh_nr; i++, v++) {
+ struct xfs_scrub_metadata sm = {
+ .sm_type = v->sv_type,
+ .sm_flags = v->sv_flags,
+ .sm_ino = vhead->svh_ino,
+ .sm_gen = vhead->svh_gen,
+ .sm_agno = vhead->svh_agno,
+ };
+
+ if (v->sv_type == XFS_SCRUB_TYPE_BARRIER) {
+ if (xfs_scrubv_previous_failures(mp, vhead, v)) {
+ v->sv_ret = -ECANCELED;
+ trace_xchk_scrubv_barrier_fail(mp, vhead, v);
+ break;
+ }
+
+ continue;
+ }
+
+ v->sv_ret = xfs_scrub_metadata(file, &sm);
+ v->sv_flags = sm.sm_flags;
+
+ /* Leave the inode in memory if something's wrong with it. */
+ if (xchk_needs_repair(&sm))
+ set_dontcache = false;
+
+ if (vhead->svh_rest_us) {
+ ktime_t expires;
+
+ expires = ktime_add_ns(ktime_get(),
+ vhead->svh_rest_us * 1000);
+ set_current_state(TASK_KILLABLE);
+ schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+ }
+ if (fatal_signal_pending(current)) {
+ error = -EINTR;
+ break;
+ }
+ }
+
+ /*
+ * If we're holding the only reference to this inode and the scan was
+ * clean, mark it dontcache so that we don't pollute the cache.
+ */
+ if (set_dontcache && atomic_read(&VFS_I(ip)->i_count) == 1)
+ d_mark_dontcache(VFS_I(ip));
+ xfs_irele(ip);
+ return error;
+}
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 79e833df3dc4..a9088cf3a93c 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -79,6 +79,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGSUPER);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGBITMAP);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTRMAPBT);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTREFCBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -112,7 +113,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTREFCBT);
{ XFS_SCRUB_TYPE_RGSUPER, "rgsuper" }, \
{ XFS_SCRUB_TYPE_RGBITMAP, "rgbitmap" }, \
{ XFS_SCRUB_TYPE_RTRMAPBT, "rtrmapbt" }, \
- { XFS_SCRUB_TYPE_RTREFCBT, "rtrefcountbt" }
+ { XFS_SCRUB_TYPE_RTREFCBT, "rtrefcountbt" }, \
+ { XFS_SCRUB_TYPE_BARRIER, "barrier" }
const char *xchk_type_string(unsigned int type);
@@ -212,6 +214,80 @@ DEFINE_EVENT(xchk_fshook_class, name, \
DEFINE_SCRUB_FSHOOK_EVENT(xchk_fshooks_enable);
DEFINE_SCRUB_FSHOOK_EVENT(xchk_fshooks_disable);
+DECLARE_EVENT_CLASS(xchk_vector_head_class,
+ TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_vec_head *vhead),
+ TP_ARGS(ip, vhead),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_ino_t, inum)
+ __field(unsigned int, gen)
+ __field(unsigned int, flags)
+ __field(unsigned short, rest_us)
+ __field(unsigned short, nr_vecs)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->agno = vhead->svh_agno;
+ __entry->inum = vhead->svh_ino;
+ __entry->gen = vhead->svh_gen;
+ __entry->flags = vhead->svh_flags;
+ __entry->rest_us = vhead->svh_rest_us;
+ __entry->nr_vecs = vhead->svh_nr;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx agno 0x%x inum 0x%llx gen 0x%x flags 0x%x rest_us %u nr_vecs %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->agno,
+ __entry->inum,
+ __entry->gen,
+ __entry->flags,
+ __entry->rest_us,
+ __entry->nr_vecs)
+)
+#define DEFINE_SCRUBV_HEAD_EVENT(name) \
+DEFINE_EVENT(xchk_vector_head_class, name, \
+ TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_vec_head *vhead), \
+ TP_ARGS(ip, vhead))
+
+DEFINE_SCRUBV_HEAD_EVENT(xchk_scrubv_start);
+
+DECLARE_EVENT_CLASS(xchk_vector_class,
+ TP_PROTO(struct xfs_mount *mp, struct xfs_scrub_vec_head *vhead,
+ struct xfs_scrub_vec *v),
+ TP_ARGS(mp, vhead, v),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, vec_nr)
+ __field(unsigned int, vec_type)
+ __field(unsigned int, vec_flags)
+ __field(int, vec_ret)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->vec_nr = v - vhead->svh_vecs;
+ __entry->vec_type = v->sv_type;
+ __entry->vec_flags = v->sv_flags;
+ __entry->vec_ret = v->sv_ret;
+ ),
+ TP_printk("dev %d:%d vec[%u] type %s flags 0x%x ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->vec_nr,
+ __print_symbolic(__entry->vec_type, XFS_SCRUB_TYPE_STRINGS),
+ __entry->vec_flags,
+ __entry->vec_ret)
+)
+#define DEFINE_SCRUBV_EVENT(name) \
+DEFINE_EVENT(xchk_vector_class, name, \
+ TP_PROTO(struct xfs_mount *mp, struct xfs_scrub_vec_head *vhead, \
+ struct xfs_scrub_vec *v), \
+ TP_ARGS(mp, vhead, v))
+
+DEFINE_SCRUBV_EVENT(xchk_scrubv_barrier_fail);
+DEFINE_SCRUBV_EVENT(xchk_scrubv_item);
+
TRACE_EVENT(xchk_op_error,
TP_PROTO(struct xfs_scrub *sc, xfs_agnumber_t agno,
xfs_agblock_t bno, int error, void *ret_ip),
diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h
index 2ceae614ade8..bdf89242e6cd 100644
--- a/fs/xfs/scrub/xfs_scrub.h
+++ b/fs/xfs/scrub/xfs_scrub.h
@@ -8,8 +8,10 @@
#ifndef CONFIG_XFS_ONLINE_SCRUB
# define xfs_scrub_metadata(file, sm) (-ENOTTY)
+# define xfs_scrubv_metadata(file, vhead) (-ENOTTY)
#else
int xfs_scrub_metadata(struct file *file, struct xfs_scrub_metadata *sm);
+int xfs_scrubv_metadata(struct file *file, struct xfs_scrub_vec_head *vhead);
#endif /* CONFIG_XFS_ONLINE_SCRUB */
#endif /* __XFS_SCRUB_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 47151b29a7ae..d5597d7b98c0 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1649,6 +1649,51 @@ xfs_ioc_scrub_metadata(
return 0;
}
+STATIC int
+xfs_ioc_scrubv_metadata(
+ struct file *filp,
+ void __user *arg)
+{
+ struct xfs_scrub_vec_head __user *uhead = arg;
+ struct xfs_scrub_vec_head head;
+ struct xfs_scrub_vec_head *vhead;
+ size_t bytes;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&head, uhead, sizeof(head)))
+ return -EFAULT;
+
+ bytes = sizeof_xfs_scrub_vec(head.svh_nr);
+ if (bytes > PAGE_SIZE)
+ return -ENOMEM;
+ vhead = kvmalloc(bytes, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ if (!vhead)
+ return -ENOMEM;
+ memcpy(vhead, &head, sizeof(struct xfs_scrub_vec_head));
+
+ if (copy_from_user(&vhead->svh_vecs, &uhead->svh_vecs,
+ head.svh_nr * sizeof(struct xfs_scrub_vec))) {
+ error = -EFAULT;
+ goto err_free;
+ }
+
+ error = xfs_scrubv_metadata(filp, vhead);
+ if (error)
+ goto err_free;
+
+ if (copy_to_user(uhead, vhead, bytes)) {
+ error = -EFAULT;
+ goto err_free;
+ }
+
+err_free:
+ kvfree(vhead);
+ return error;
+}
+
int
xfs_ioc_swapext(
struct xfs_swapext *sxp)
@@ -1914,6 +1959,8 @@ xfs_file_ioctl(
case FS_IOC_GETFSMAP:
return xfs_ioc_getfsmap(ip, arg);
+ case XFS_IOC_SCRUBV_METADATA:
+ return xfs_ioc_scrubv_metadata(filp, arg);
case XFS_IOC_SCRUB_METADATA:
return xfs_ioc_scrub_metadata(filp, arg);