summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/libxfs/xfs_ag.c2
-rw-r--r--fs/xfs/libxfs/xfs_ag.h9
-rw-r--r--fs/xfs/libxfs/xfs_defer.c9
-rw-r--r--fs/xfs/libxfs/xfs_defer.h3
-rw-r--r--fs/xfs/scrub/bmap_repair.c4
-rw-r--r--fs/xfs/scrub/common.c143
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/inode_repair.c4
-rw-r--r--fs/xfs/scrub/repair.c3
-rw-r--r--fs/xfs/scrub/trace.h31
-rw-r--r--fs/xfs/xfs_bmap_item.c49
-rw-r--r--fs/xfs/xfs_extfree_item.c29
-rw-r--r--fs/xfs/xfs_mount.c96
-rw-r--r--fs/xfs/xfs_mount.h33
-rw-r--r--fs/xfs/xfs_refcount_item.c26
-rw-r--r--fs/xfs/xfs_rmap_item.c25
-rw-r--r--fs/xfs/xfs_super.c7
-rw-r--r--fs/xfs/xfs_swapext_item.c10
-rw-r--r--fs/xfs/xfs_trace.h108
19 files changed, 574 insertions, 19 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index dc9d78fb7bac..5fab36a40cea 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -193,6 +193,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
+ ASSERT(atomic_read(&pag->pag_intents) == 0);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_iunlink_destroy(pag);
@@ -254,6 +255,7 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_state_lock);
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+ init_waitqueue_head(&pag->pag_intents_wq);
init_waitqueue_head(&pag->pagb_wait);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index c9e198e62b74..a58b0db9ef58 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -103,6 +103,15 @@ struct xfs_perag {
* or have some other means to control concurrency.
*/
struct rhashtable pagi_unlinked_hash;
+
+ /*
+ * Counter of live intents. We track the number of log intent items
+ * that have been queued (but not yet processed) so that scrub can
+ * detect the presence of other threads that are in the middle of
+ * processing a chain of deferred items.
+ */
+ atomic_t pag_intents;
+ wait_queue_head_t pag_intents_wq;
};
int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 3045ad184972..108d950b6f41 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -363,7 +363,8 @@ xfs_defer_cancel_list(
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
- ops->cancel_item(pwi);
+ trace_xfs_defer_cancel_item(mp, dfp, pwi);
+ ops->cancel_item(mp, pwi);
}
ASSERT(dfp->dfp_count == 0);
kmem_free(dfp);
@@ -442,6 +443,7 @@ xfs_defer_finish_one(
list_for_each_safe(li, n, &dfp->dfp_work) {
list_del(li);
dfp->dfp_count--;
+ trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
error = ops->finish_item(tp, dfp->dfp_done, li, &state);
if (error == -EAGAIN) {
/*
@@ -585,7 +587,7 @@ xfs_defer_add(
struct list_head *li)
{
struct xfs_defer_pending *dfp = NULL;
- const struct xfs_defer_op_type *ops;
+ const struct xfs_defer_op_type *ops = defer_op_types[type];
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
@@ -598,7 +600,6 @@ xfs_defer_add(
if (!list_empty(&tp->t_dfops)) {
dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
- ops = defer_op_types[dfp->dfp_type];
if (dfp->dfp_type != type ||
(ops->max_items && dfp->dfp_count >= ops->max_items))
dfp = NULL;
@@ -616,6 +617,8 @@ xfs_defer_add(
}
list_add_tail(li, &dfp->dfp_work);
+ trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
+ ops->add_item(tp->t_mountp, li);
dfp->dfp_count++;
}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 6b25faf1bd2a..3a5411af937f 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -55,7 +55,8 @@ struct xfs_defer_op_type {
struct list_head *item, struct xfs_btree_cur **state);
void (*finish_cleanup)(struct xfs_trans *tp,
struct xfs_btree_cur *state, int error);
- void (*cancel_item)(struct list_head *item);
+ void (*cancel_item)(struct xfs_mount *mp, struct list_head *item);
+ void (*add_item)(struct xfs_mount *mp, const struct list_head *item);
unsigned int max_items;
};
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 5b75804ad0e6..542799830530 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -333,7 +333,9 @@ xrep_bmap_scan_rt(
if (xrep_is_rtmeta_ino(sc, sc->ip->i_ino))
return 0;
- xchk_rt_lock(sc, &sc->sr);
+ error = xchk_rt_lock(sc, &sc->sr);
+ if (error)
+ return error;
xrep_rt_btcur_init(sc, &sc->sr);
error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb);
xchk_rt_btcur_free(&sc->sr);
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 443fe6b88dd2..5b33166e633f 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -486,8 +486,8 @@ want_ag_read_header_failure(
* all the buffers we grab to the scrub transaction so they'll all be freed
* when we cancel it. Returns ENOENT if we can't grab the perag structure.
*/
-int
-xchk_ag_read_headers(
+static inline int
+__xchk_ag_read_headers(
struct xfs_scrub *sc,
xfs_agnumber_t agno,
struct xchk_ag *sa)
@@ -495,11 +495,6 @@ xchk_ag_read_headers(
struct xfs_mount *mp = sc->mp;
int error;
- ASSERT(!sa->pag);
- sa->pag = xfs_perag_get(mp, agno);
- if (!sa->pag)
- return -ENOENT;
-
error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp);
if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
return error;
@@ -515,6 +510,87 @@ xchk_ag_read_headers(
return 0;
}
+static inline bool
+xchk_ag_intents_pending(
+ struct xfs_perag *pag)
+{
+ int intents = atomic_read(&pag->pag_intents);
+
+ trace_xchk_ag_read_headers(pag->pag_mount, pag->pag_agno, intents,
+ _RET_IP_);
+
+ return intents > 0;
+}
+
+/*
+ * Grab all the headers for an AG, and wait until there aren't any pending
+ * intents.
+ */
+int
+xchk_ag_read_headers(
+ struct xfs_scrub *sc,
+ xfs_agnumber_t agno,
+ struct xchk_ag *sa)
+{
+ struct xfs_mount *mp = sc->mp;
+ int error;
+
+ ASSERT(!sa->pag);
+ sa->pag = xfs_perag_get(mp, agno);
+ if (!sa->pag)
+ return -ENOENT;
+
+ do {
+ error = __xchk_ag_read_headers(sc, agno, sa);
+ if (error)
+ break;
+
+ /*
+ * Decide if this AG is quiet enough for all metadata to be
+ * consistent with each other. XFS allows the AG header buffer
+ * locks to cycle across transaction rolls while processing
+ * chains of deferred ops, which means that there could be
+ * other threads in the middle of processing a chain of
+ * deferred ops. For regular operations we are careful about
+ * ordering operations to prevent collisions between threads
+ * (which is why we don't need a per-AG lock), but scrub and
+ * repair have to serialize against chained operations.
+ *
+ * We just locked all the AG headers buffers; now take a look
+ * to see if there are any intents in progress. If there are,
+ * drop the AG headers and wait for the intents to drain.
+ * Since we hold all the AG header locks for the duration of
+ * the scrub, this is the only time we have to sample the
+ * intents counter; any threads increasing it after this point
+ * can't possibly be in the middle of a chain of AG metadata
+ * updates.
+ */
+ if (!xchk_ag_intents_pending(sa->pag)) {
+ error = 0;
+ break;
+ }
+
+ if (sa->agfl_bp) {
+ xfs_trans_brelse(sc->tp, sa->agfl_bp);
+ sa->agfl_bp = NULL;
+ }
+
+ if (sa->agf_bp) {
+ xfs_trans_brelse(sc->tp, sa->agf_bp);
+ sa->agf_bp = NULL;
+ }
+
+ if (sa->agi_bp) {
+ xfs_trans_brelse(sc->tp, sa->agi_bp);
+ sa->agi_bp = NULL;
+ }
+
+ error = xfs_perag_wait_intents(sa->pag);
+ } while (!error);
+
+ return error;
+}
+
/* Release all the AG btree cursors. */
void
xchk_ag_btcur_free(
@@ -641,14 +717,59 @@ xchk_ag_init(
return 0;
}
-/* Lock everything we need to work on realtime metadata. */
-void
+#if IS_ENABLED(CONFIG_XFS_RT)
+static inline bool
+xchk_rt_intents_pending(
+ struct xfs_mount *mp)
+{
+ int intents = atomic_read(&mp->m_rt_intents);
+
+ trace_xchk_rt_lock(mp, -1U, intents, _RET_IP_);
+
+ return intents > 0;
+}
+#else
+# define xchk_rt_intents_pending(mp) (false)
+#endif
+
+/* Lock everything we need to work on realtime metadata and wait for intents. */
+int
xchk_rt_lock(
struct xfs_scrub *sc,
struct xchk_rt *sr)
{
- xfs_rtlock(NULL, sc->mp, XFS_RTLOCK_ALL);
- sr->locked = true;
+ int error;
+
+ do {
+ xfs_rtlock(NULL, sc->mp, XFS_RTLOCK_ALL);
+
+ /*
+ * Decide if the RT volume is quiet enough for all metadata to
+ * be consistent with each other. Regular file IO doesn't get
+ * to lock all the rt inodes at the same time, which means that
+ * there could be other threads in the middle of processing a
+ * chain of deferred ops.
+ *
+ * We just locked all the rt inodes; now take a look to see if
+ * there are any rt intents in progress. If there are, drop
+ * the rt inode locks and wait for the intents to drain. Since
+ * we hold the rt inode locks for the duration of the scrub,
+ * this is the only time we have to sample the intents counter;
+ * any threads increasing it after this point can't possibly be
+ * in the middle of a chain of rt metadata updates.
+ */
+ if (!xchk_rt_intents_pending(sc->mp)) {
+ sr->locked = true;
+ error = 0;
+ break;
+ }
+
+ xfs_rtunlock(sc->mp, XFS_RTLOCK_ALL);
+
+ error = xfs_rt_wait_intents(sc->mp);
+ } while (!error);
+
+ return error;
}
/*
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index dbd4145690b2..70a42897cd22 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -153,7 +153,7 @@ xchk_ag_init_existing(
void xchk_rt_init(struct xfs_scrub *sc, struct xchk_rt *sr);
void xchk_rt_btcur_free(struct xchk_rt *sr);
-void xchk_rt_lock(struct xfs_scrub *sc, struct xchk_rt *sr);
+int xchk_rt_lock(struct xfs_scrub *sc, struct xchk_rt *sr);
void xchk_rt_unlock(struct xfs_scrub *sc, struct xchk_rt *sr);
int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno,
struct xchk_ag *sa);
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 144481f94ce8..c99fe223e3d6 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -597,7 +597,9 @@ xrep_dinode_count_rt_rmaps(
xrep_is_rtmeta_ino(sc, sc->sm->sm_ino))
return 0;
- xchk_rt_lock(sc, &sc->sr);
+ error = xchk_rt_lock(sc, &sc->sr);
+ if (error)
+ return error;
xrep_rt_btcur_init(sc, &sc->sr);
error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap,
dis);
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index b50304ee1305..4c4680a8b41b 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -482,6 +482,7 @@ xrep_newbt_schedule_reap(
INIT_LIST_HEAD(&efi_item.xefi_list);
list_add(&efi_item.xefi_list, &items);
+ xfs_fs_bump_intents(xnr->sc->mp, false, resv->fsbno);
resv->efi = xfs_extent_free_defer_type.create_intent(xnr->sc->tp,
&items, 1, false);
}
@@ -712,6 +713,7 @@ xrep_newbt_destroy(
goto junkit;
list_del(&resv->list);
+ xfs_fs_drop_intents(sc->mp, false, resv->fsbno);
kmem_free(resv);
}
@@ -724,6 +726,7 @@ junkit:
list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
xfs_extent_free_defer_type.abort_intent(resv->efi);
list_del(&resv->list);
+ xfs_fs_drop_intents(sc->mp, false, resv->fsbno);
kmem_free(resv);
}
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 732a7681d691..174381167893 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -710,6 +710,37 @@ TRACE_EVENT(xchk_iallocbt_check_cluster,
__entry->cluster_ino)
)
+DECLARE_EVENT_CLASS(xchk_ag_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+ unsigned long caller_ip),
+ TP_ARGS(mp, agno, refcount, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(int, refcount)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->refcount = refcount;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno %u refcount %d caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->refcount,
+ (char *)__entry->caller_ip)
+);
+
+#define DEFINE_XCHK_AG_EVENT(name) \
+DEFINE_EVENT(xchk_ag_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_XCHK_AG_EVENT(xchk_ag_read_headers);
+DEFINE_XCHK_AG_EVENT(xchk_rt_lock);
+
TRACE_EVENT(xchk_fscounters_calc,
TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree,
uint64_t fdblocks, uint64_t delalloc),
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 2b2d09a95ed9..4eead6f75509 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -351,14 +351,30 @@ xfs_bmap_update_finish_item(
struct xfs_btree_cur **state)
{
struct xfs_bmap_intent *bi;
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_fsblock_t orig_startblock;
int error;
bi = container_of(item, struct xfs_bmap_intent, bi_list);
+ orig_startblock = bi->bi_bmap.br_startblock;
error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi);
if (!error && bi->bi_bmap.br_blockcount > 0) {
ASSERT(bi->bi_type == XFS_BMAP_UNMAP);
return -EAGAIN;
}
+
+ /*
+ * Drop our intent counter reference now that we've either queued a
+ * deferred rmap intent or failed. Be careful to use the original
+ * startblock since the finishing functions can update the intent
+ * state.
+ */
+ if (xfs_has_rmapbt(mp)) {
+ bool rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork);
+
+ xfs_fs_drop_intents(mp, rt, orig_startblock);
+ }
+
kmem_free(bi);
return error;
}
@@ -371,17 +387,47 @@ xfs_bmap_update_abort_intent(
xfs_bui_release(BUI_ITEM(intent));
}
-/* Cancel a deferred rmap update. */
+/* Cancel a deferred bmap update. */
STATIC void
xfs_bmap_update_cancel_item(
+ struct xfs_mount *mp,
struct list_head *item)
{
struct xfs_bmap_intent *bi;
bi = container_of(item, struct xfs_bmap_intent, bi_list);
+
+ /* Drop our intent counter reference since we're going away. */
+ if (xfs_has_rmapbt(mp)) {
+ bool rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork);
+
+ xfs_fs_drop_intents(mp, rt, bi->bi_bmap.br_startblock);
+ }
+
kmem_free(bi);
}
+/* Add a deferred bmap update. */
+STATIC void
+xfs_bmap_update_add_item(
+ struct xfs_mount *mp,
+ const struct list_head *item)
+{
+ const struct xfs_bmap_intent *bi;
+
+ bi = container_of(item, struct xfs_bmap_intent, bi_list);
+
+ /*
+ * Grab an intent counter reference on behalf of the deferred rmap
+ * intent item that we will queue when we finish this bmap work.
+ */
+ if (xfs_has_rmapbt(mp)) {
+ bool rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork);
+
+ xfs_fs_bump_intents(mp, rt, bi->bi_bmap.br_startblock);
+ }
+}
+
const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
.max_items = XFS_BUI_MAX_FAST_EXTENTS,
.create_intent = xfs_bmap_update_create_intent,
@@ -389,6 +435,7 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
.create_done = xfs_bmap_update_create_done,
.finish_item = xfs_bmap_update_finish_item,
.cancel_item = xfs_bmap_update_cancel_item,
+ .add_item = xfs_bmap_update_add_item,
};
/* Is this recovered BUI ok? */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 81e7a7e1533d..9f7f25a17e47 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -487,6 +487,7 @@ xfs_extent_free_finish_item(
struct list_head *item,
struct xfs_btree_cur **state)
{
+ struct xfs_mount *mp = tp->t_mountp;
struct xfs_extent_free_item *xefi;
int error;
@@ -502,6 +503,14 @@ xfs_extent_free_finish_item(
}
error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
+
+ /*
+ * Drop our intent counter reference now that we've finished all the
+ * work or failed. The finishing function doesn't update the intent
+ * state, so we need not preserve the original startblock.
+ */
+ xfs_fs_drop_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock);
+
kmem_free(xefi);
return error;
}
@@ -517,14 +526,30 @@ xfs_extent_free_abort_intent(
/* Cancel a free extent. */
STATIC void
xfs_extent_free_cancel_item(
+ struct xfs_mount *mp,
struct list_head *item)
{
struct xfs_extent_free_item *xefi;
xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
+ xfs_fs_drop_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock);
kmem_free(xefi);
}
+/* Add a deferred free extent. */
+STATIC void
+xfs_extent_free_add_item(
+ struct xfs_mount *mp,
+ const struct list_head *item)
+{
+ const struct xfs_extent_free_item *xefi;
+
+ xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+ /* Grab an intent counter reference for this intent item. */
+ xfs_fs_bump_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock);
+}
+
const struct xfs_defer_op_type xfs_extent_free_defer_type = {
.max_items = XFS_EFI_MAX_FAST_EXTENTS,
.create_intent = xfs_extent_free_create_intent,
@@ -532,6 +557,7 @@ const struct xfs_defer_op_type xfs_extent_free_defer_type = {
.create_done = xfs_extent_free_create_done,
.finish_item = xfs_extent_free_finish_item,
.cancel_item = xfs_extent_free_cancel_item,
+ .add_item = xfs_extent_free_add_item,
};
/*
@@ -585,6 +611,8 @@ xfs_agfl_free_finish_item(
extp->ext_len = xefi->xefi_blockcount;
efdp->efd_next_extent++;
+ xfs_fs_drop_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock);
+
kmem_free(xefi);
return error;
}
@@ -597,6 +625,7 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
.create_done = xfs_extent_free_create_done,
.finish_item = xfs_agfl_free_finish_item,
.cancel_item = xfs_extent_free_cancel_item,
+ .add_item = xfs_extent_free_add_item,
};
/* Is this recovered EFI ok? */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 92cd5087958e..fdda9fbd638e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1465,3 +1465,99 @@ xfs_hook_call(
{
return srcu_notifier_call_chain(&chain->head, val, priv);
}
+
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+
+#if IS_ENABLED(CONFIG_XFS_RT)
+static inline void
+xfs_rt_bump_intents(
+ struct xfs_mount *mp)
+{
+ trace_xfs_rt_bump_intents(mp, __return_address);
+
+ atomic_inc(&mp->m_rt_intents);
+}
+
+static inline void
+xfs_rt_drop_intents(
+ struct xfs_mount *mp)
+{
+ trace_xfs_rt_drop_intents(mp, __return_address);
+
+ ASSERT(atomic_read(&mp->m_rt_intents) > 0);
+
+ if (atomic_dec_and_test(&mp->m_rt_intents))
+ wake_up(&mp->m_rt_intents_wq);
+}
+
+int
+xfs_rt_wait_intents(
+ struct xfs_mount *mp)
+{
+ trace_xfs_rt_wait_intents(mp, __return_address);
+
+ return wait_event_killable(mp->m_rt_intents_wq,
+ atomic_read(&mp->m_rt_intents) == 0);
+}
+#else
+static inline void xfs_rt_bump_intents(struct xfs_mount *mp) { }
+static inline void xfs_rt_drop_intents(struct xfs_mount *mp) { }
+#endif /* CONFIG_XFS_RT */
+
+static inline void
+xfs_ag_bump_intents(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag = xfs_perag_get(mp, agno);
+
+ trace_xfs_perag_bump_intents(pag, __return_address);
+
+ atomic_inc(&pag->pag_intents);
+ xfs_perag_put(pag);
+}
+
+static inline void
+xfs_ag_drop_intents(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag = xfs_perag_get(mp, agno);
+
+ trace_xfs_perag_drop_intents(pag, __return_address);
+
+ ASSERT(atomic_read(&pag->pag_intents) > 0);
+
+ if (atomic_dec_and_test(&pag->pag_intents))
+ wake_up(&pag->pag_intents_wq);
+ xfs_perag_put(pag);
+}
+
+void
+xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb)
+{
+ if (isrt)
+ xfs_rt_bump_intents(mp);
+ else
+ xfs_ag_bump_intents(mp, XFS_FSB_TO_AGNO(mp, fsb));
+}
+
+void
+xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb)
+{
+ if (isrt)
+ xfs_rt_drop_intents(mp);
+ else
+ xfs_ag_drop_intents(mp, XFS_FSB_TO_AGNO(mp, fsb));
+}
+
+int
+xfs_perag_wait_intents(
+ struct xfs_perag *pag)
+{
+ trace_xfs_perag_wait_intents(pag, __return_address);
+
+ return wait_event_killable(pag->pag_intents_wq,
+ atomic_read(&pag->pag_intents) == 0);
+}
+#endif /* CONFIG_XFS_ONLINE_SCRUB */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a161aabd8438..62b8fe194219 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -263,6 +263,17 @@ typedef struct xfs_mount {
/* online nlink check stuff */
struct xfs_hook_chain m_nlink_mod_hooks;
#endif
+
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT)
+ /*
+ * Counter of live intents. We track the number of log intent items
+ * that have been queued (but not yet processed) so that scrub can
+ * detect the presence of other threads that are in the middle of
+ * processing a chain of deferred items.
+ */
+ atomic_t m_rt_intents;
+ wait_queue_head_t m_rt_intents_wq;
+#endif
} xfs_mount_t;
/* Parameters for xfs_bumplink/droplink hook. */
@@ -580,4 +591,26 @@ int xfs_hook_add(struct xfs_hook_chain *chain, struct notifier_block *hook,
void xfs_hook_del(struct xfs_hook_chain *chain, struct notifier_block *hook);
int xfs_hook_call(struct xfs_hook_chain *chain, unsigned long val, void *priv);
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+# if IS_ENABLED(CONFIG_XFS_RT)
+int xfs_rt_wait_intents(struct xfs_mount *mp);
+# else
+# define xfs_rt_wait_intents(mp) (-ENOSYS)
+# endif /* CONFIG_XFS_RT */
+
+void xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb);
+void xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb);
+int xfs_perag_wait_intents(struct xfs_perag *pag);
+
+#else
+static inline void
+xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) { }
+static inline void
+xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) { }
+
+int xfs_perag_wait_intents(struct xfs_perag *pag);
+# define xfs_perag_wait_intents(pag) (-ENOSYS)
+# define xfs_rt_wait_intents(mp) (-ENOSYS)
+#endif /* CONFIG_XFS_ONLINE_SCRUB */
+
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 006bd1086e50..f9d67cdf9c5d 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -364,9 +364,12 @@ xfs_refcount_update_finish_item(
struct xfs_btree_cur **state)
{
struct xfs_refcount_intent *ri;
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_fsblock_t orig_startblock;
int error;
ri = container_of(item, struct xfs_refcount_intent, ri_list);
+ orig_startblock = ri->ri_startblock;
error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri,
state);
@@ -376,6 +379,13 @@ xfs_refcount_update_finish_item(
ri->ri_type == XFS_REFCOUNT_DECREASE);
return -EAGAIN;
}
+
+ /*
+ * Drop our intent counter reference now that we've finished all the
+ * work or failed. Be careful to use the original startblock because
+ * the finishing functions can update the intent state.
+ */
+ xfs_fs_drop_intents(mp, ri->ri_realtime, orig_startblock);
kmem_free(ri);
return error;
}
@@ -391,14 +401,29 @@ xfs_refcount_update_abort_intent(
/* Cancel a deferred refcount update. */
STATIC void
xfs_refcount_update_cancel_item(
+ struct xfs_mount *mp,
struct list_head *item)
{
struct xfs_refcount_intent *ri;
ri = container_of(item, struct xfs_refcount_intent, ri_list);
+ xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_startblock);
kmem_free(ri);
}
+/* Add a deferred refcount update. */
+STATIC void
+xfs_refcount_update_add_item(
+ struct xfs_mount *mp,
+ const struct list_head *item)
+{
+ const struct xfs_refcount_intent *ri;
+
+ /* Grab an intent counter reference for this intent item. */
+ ri = container_of(item, struct xfs_refcount_intent, ri_list);
+ xfs_fs_bump_intents(mp, ri->ri_realtime, ri->ri_startblock);
+}
+
const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
.max_items = XFS_CUI_MAX_FAST_EXTENTS,
.create_intent = xfs_refcount_update_create_intent,
@@ -407,6 +432,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
.finish_item = xfs_refcount_update_finish_item,
.finish_cleanup = xfs_refcount_finish_one_cleanup,
.cancel_item = xfs_refcount_update_cancel_item,
+ .add_item = xfs_refcount_update_add_item,
};
/* Is this recovered CUI ok? */
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 6f115ca55746..5a3953088b5d 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -411,11 +411,19 @@ xfs_rmap_update_finish_item(
struct xfs_btree_cur **state)
{
struct xfs_rmap_intent *ri;
+ struct xfs_mount *mp = tp->t_mountp;
int error;
ri = container_of(item, struct xfs_rmap_intent, ri_list);
error = xfs_trans_log_finish_rmap_update(tp, RUD_ITEM(done), ri,
state);
+
+ /*
+ * Drop our intent counter reference now that we've finished all the
+ * work or failed. The finishing function doesn't update the intent
+ * state, so we need not preserve the original startblock.
+ */
+ xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock);
kmem_free(ri);
return error;
}
@@ -431,14 +439,30 @@ xfs_rmap_update_abort_intent(
/* Cancel a deferred rmap update. */
STATIC void
xfs_rmap_update_cancel_item(
+ struct xfs_mount *mp,
struct list_head *item)
{
struct xfs_rmap_intent *ri;
ri = container_of(item, struct xfs_rmap_intent, ri_list);
+ xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock);
kmem_free(ri);
}
+/* Add a deferred rmap update. */
+STATIC void
+xfs_rmap_update_add_item(
+ struct xfs_mount *mp,
+ const struct list_head *item)
+{
+ const struct xfs_rmap_intent *ri;
+
+ ri = container_of(item, struct xfs_rmap_intent, ri_list);
+
+ /* Grab an intent counter reference for this intent item. */
+ xfs_fs_bump_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock);
+}
+
const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
.max_items = XFS_RUI_MAX_FAST_EXTENTS,
.create_intent = xfs_rmap_update_create_intent,
@@ -447,6 +471,7 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
.finish_item = xfs_rmap_update_finish_item,
.finish_cleanup = xfs_rmap_finish_one_cleanup,
.cancel_item = xfs_rmap_update_cancel_item,
+ .add_item = xfs_rmap_update_add_item,
};
/* Is this recovered RUI ok? */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d8c03d84b78a..a0e47df75444 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -725,6 +725,9 @@ xfs_mount_free(
ASSERT(!mutex_is_locked(&mp->m_scrub_freeze));
mutex_destroy(&mp->m_scrub_freeze);
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT)
+ ASSERT(atomic_read(&mp->m_rt_intents) == 0);
+#endif
kmem_free(mp);
}
@@ -1977,6 +1980,10 @@ static int xfs_init_fs_context(
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
mp->m_kobj.kobject.kset = xfs_kset;
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT)
+ init_waitqueue_head(&mp->m_rt_intents_wq);
+ atomic_set(&mp->m_rt_intents, 0);
+#endif
/*
* We don't create the finobt per-ag space reservation until after log
* recovery, so we must set this to true so that an ifree transaction
diff --git a/fs/xfs/xfs_swapext_item.c b/fs/xfs/xfs_swapext_item.c
index 7c29e4c9737a..08fb0d514d69 100644
--- a/fs/xfs/xfs_swapext_item.c
+++ b/fs/xfs/xfs_swapext_item.c
@@ -346,6 +346,7 @@ xfs_swapext_abort_intent(
/* Cancel a deferred swapext update. */
STATIC void
xfs_swapext_cancel_item(
+ struct xfs_mount *mp,
struct list_head *item)
{
struct xfs_swapext_intent *sxi;
@@ -354,6 +355,14 @@ xfs_swapext_cancel_item(
kmem_free(sxi);
}
+/* Add a deferred swapext update. */
+STATIC void
+xfs_swapext_add_item(
+ struct xfs_mount *mp,
+ const struct list_head *item)
+{
+}
+
const struct xfs_defer_op_type xfs_swapext_defer_type = {
.max_items = XFS_SXI_MAX_FAST_EXTENTS,
.create_intent = xfs_swapext_create_intent,
@@ -361,6 +370,7 @@ const struct xfs_defer_op_type xfs_swapext_defer_type = {
.create_done = xfs_swapext_create_done,
.finish_item = xfs_swapext_finish_item,
.cancel_item = xfs_swapext_cancel_item,
+ .add_item = xfs_swapext_add_item,
};
/* Is this recovered SXI ok? */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7879e11115b8..c3b3c18dc3fb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -271,6 +271,8 @@ DEFINE_FS_EVENT(xfs_blockgc_start);
DEFINE_FS_EVENT(xfs_blockgc_stop);
DEFINE_FS_EVENT(xfs_blockgc_worker);
DEFINE_FS_EVENT(xfs_blockgc_flush_all);
+DEFINE_FS_EVENT(xfs_force_shutdown1);
+DEFINE_FS_EVENT(xfs_force_shutdown2);
TRACE_EVENT(xfs_inodegc_shrinker_scan,
TP_PROTO(struct xfs_mount *mp, struct shrink_control *sc,
@@ -2748,6 +2750,44 @@ DEFINE_EVENT(xfs_free_extent_deferred_class, name, \
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred);
+DECLARE_EVENT_CLASS(xfs_defer_pending_item_class,
+ TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp,
+ void *item),
+ TP_ARGS(mp, dfp, item),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, type)
+ __field(void *, intent)
+ __field(void *, item)
+ __field(char, committed)
+ __field(int, nr)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp ? mp->m_super->s_dev : 0;
+ __entry->type = dfp->dfp_type;
+ __entry->intent = dfp->dfp_intent;
+ __entry->item = item;
+ __entry->committed = dfp->dfp_done != NULL;
+ __entry->nr = dfp->dfp_count;
+ ),
+ TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->type,
+ __entry->intent,
+ __entry->item,
+ __entry->committed,
+ __entry->nr)
+)
+#define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \
+DEFINE_EVENT(xfs_defer_pending_item_class, name, \
+ TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, \
+ void *item), \
+ TP_ARGS(mp, dfp, item))
+
+DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_add_item);
+DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_cancel_item);
+DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item);
+
/* rmap tracepoints */
DECLARE_EVENT_CLASS(xfs_rmap_class,
TP_PROTO(struct xfs_btree_cur *cur,
@@ -4893,6 +4933,74 @@ DEFINE_IMETA_RESV_EVENT(xfs_imeta_resv_free_extent);
DEFINE_IMETA_RESV_EVENT(xfs_imeta_resv_critical);
DEFINE_INODE_ERROR_EVENT(xfs_imeta_resv_init_error);
+DECLARE_EVENT_CLASS(xfs_perag_intents_class,
+ TP_PROTO(struct xfs_perag *pag, void *caller_ip),
+ TP_ARGS(pag, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(long, nr_intents)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->agno = pag->pag_agno;
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+ __entry->nr_intents = atomic_read(&pag->pag_intents);
+#else
+ __entry->nr_intents = -1;
+#endif
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->nr_intents,
+ __entry->caller_ip)
+);
+
+#define DEFINE_PERAG_INTENTS_EVENT(name) \
+DEFINE_EVENT(xfs_perag_intents_class, name, \
+ TP_PROTO(struct xfs_perag *pag, void *caller_ip), \
+ TP_ARGS(pag, caller_ip))
+DEFINE_PERAG_INTENTS_EVENT(xfs_perag_bump_intents);
+DEFINE_PERAG_INTENTS_EVENT(xfs_perag_drop_intents);
+DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents);
+
+DECLARE_EVENT_CLASS(xfs_rt_intents_class,
+ TP_PROTO(struct xfs_mount *mp, void *caller_ip),
+ TP_ARGS(mp, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(long, nr_intents)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT)
+ __entry->nr_intents = atomic_read(&mp->m_rt_intents);
+#else
+ __entry->nr_intents = -1;
+#endif
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d intents %ld caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->nr_intents,
+ __entry->caller_ip)
+);
+
+#define DEFINE_RT_INTENTS_EVENT(name) \
+DEFINE_EVENT(xfs_rt_intents_class, name, \
+ TP_PROTO(struct xfs_mount *mp, void *caller_ip), \
+ TP_ARGS(mp, caller_ip))
+DEFINE_RT_INTENTS_EVENT(xfs_rt_bump_intents);
+DEFINE_RT_INTENTS_EVENT(xfs_rt_drop_intents);
+DEFINE_RT_INTENTS_EVENT(xfs_rt_wait_intents);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH