summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2019-08-30 15:45:11 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2019-10-09 09:39:24 -0700
commit6065474081094c19b7d524f49d2d1948f5a28e02 (patch)
tree849e0fba6a6d0c9f26e962cc28acf9c8efbc4d67
parenta6ab93ef18f917e442b2ce300b10c1d6b5c53579 (diff)
xfs: parallelize inode inactivation
Split the inode inactivation work into per-AG work items so that we can take advantage of parallelization. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/scrub/common.c2
-rw-r--r--fs/xfs/xfs_icache.c105
-rw-r--r--fs/xfs/xfs_icache.h2
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_super.c4
6 files changed, 104 insertions, 16 deletions
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index abe88fa756aa..299567271e01 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -910,6 +910,7 @@ xchk_stop_reaping(
{
sc->flags |= XCHK_REAPING_DISABLED;
xfs_stop_block_reaping(sc->mp);
+ xfs_inactive_cancel_work(sc->mp);
}
/* Restart background reaping of resources. */
@@ -917,6 +918,7 @@ void
xchk_start_reaping(
struct xfs_scrub *sc)
{
+ xfs_inactive_schedule_work(sc->mp, 0);
xfs_start_block_reaping(sc->mp);
sc->flags &= ~XCHK_REAPING_DISABLED;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 296cf2f2c417..4cce17eb3d6c 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1981,12 +1981,12 @@ xfs_start_block_reaping(
/* Queue a new inode inactivation pass if there are reclaimable inodes. */
static void
xfs_inactive_work_queue(
- struct xfs_mount *mp)
+ struct xfs_perag *pag)
{
rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG))
- queue_delayed_work(mp->m_inactive_workqueue,
- &mp->m_inactive_work,
+ if (pag->pag_ici_inactive)
+ queue_delayed_work(pag->pag_mount->m_inactive_workqueue,
+ &pag->pag_inactive_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
rcu_read_unlock();
}
@@ -2012,7 +2012,7 @@ xfs_perag_set_inactive_tag(
* take a while, so we allow the deferral of an already-scheduled
* inactivation on the grounds that we prefer batching.
*/
- xfs_inactive_work_queue(mp);
+ xfs_inactive_work_queue(pag);
trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
}
@@ -2121,6 +2121,28 @@ xfs_inactive_inode(
}
/*
+ * Inactivate the inodes in an AG. Even if the filesystem is corrupted, we
+ * still need to clear the INACTIVE iflag so that we can move on to reclaiming
+ * the inode.
+ */
+static int
+xfs_inactive_inodes_pag(
+ struct xfs_perag *pag)
+{
+ DEFINE_INACTIVE_CTX(inctx, NULL);
+ int error;
+
+ error = xfs_ici_walk_ag(pag, 0, xfs_inactive_grab, xfs_inactive_inode,
+ NULL, &inctx, XFS_ICI_RECLAIM_TAG);
+
+ /* If we inactivated any inodes at all, we need to kick reclaim. */
+ if (inctx.kick_reclaim)
+ xfs_reclaim_work_queue(pag->pag_mount);
+
+ return error;
+}
+
+/*
* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
* corrupted, we still need to clear the INACTIVE iflag so that we can move
* on to reclaiming the inode.
@@ -2148,8 +2170,9 @@ void
xfs_inactive_worker(
struct work_struct *work)
{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_inactive_work);
+ struct xfs_perag *pag = container_of(to_delayed_work(work),
+ struct xfs_perag, pag_inactive_work);
+ struct xfs_mount *mp = pag->pag_mount;
int error;
/*
@@ -2164,12 +2187,33 @@ xfs_inactive_worker(
if (!sb_start_write_trylock(mp->m_super))
return;
- error = xfs_inactive_inodes(mp, NULL);
+ error = xfs_inactive_inodes_pag(pag);
if (error && error != -EAGAIN)
xfs_err(mp, "inode inactivation failed, error %d", error);
sb_end_write(mp->m_super);
- xfs_inactive_work_queue(mp);
+ xfs_inactive_work_queue(pag);
+}
+
+/* Wait for all background inactivation work to finish. */
+static void
+xfs_inactive_flush(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno = 0;
+
+ while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) {
+ bool flush;
+
+ agno = pag->pag_agno + 1;
+ spin_lock(&pag->pag_ici_lock);
+ flush = pag->pag_ici_inactive > 0;
+ spin_unlock(&pag->pag_ici_lock);
+ if (flush)
+ flush_delayed_work(&pag->pag_inactive_work);
+ xfs_perag_put(pag);
+ }
}
/* Flush all inode inactivation work that might be queued. */
@@ -2177,8 +2221,8 @@ void
xfs_inactive_force(
struct xfs_mount *mp)
{
- queue_delayed_work(mp->m_inactive_workqueue, &mp->m_inactive_work, 0);
- flush_delayed_work(&mp->m_inactive_work);
+ xfs_inactive_schedule_work(mp, 0);
+ xfs_inactive_flush(mp);
}
/*
@@ -2190,7 +2234,42 @@ void
xfs_inactive_shutdown(
struct xfs_mount *mp)
{
- cancel_delayed_work_sync(&mp->m_inactive_work);
- flush_workqueue(mp->m_inactive_workqueue);
+ xfs_inactive_cancel_work(mp);
xfs_inactive_inodes(mp, NULL);
}
+
+/* Cancel all queued inactivation work. */
+void
+xfs_inactive_cancel_work(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno = 0;
+
+ while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) {
+ agno = pag->pag_agno + 1;
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
+ xfs_perag_put(pag);
+ }
+ flush_workqueue(mp->m_inactive_workqueue);
+}
+
+/* Reschedule background inactivation work. */
+void
+xfs_inactive_schedule_work(
+ struct xfs_mount *mp,
+ unsigned long delay)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno = 0;
+
+ while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) {
+ agno = pag->pag_agno + 1;
+ spin_lock(&pag->pag_ici_lock);
+ if (pag->pag_ici_inactive)
+ queue_delayed_work(mp->m_inactive_workqueue,
+ &pag->pag_inactive_work, delay);
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+ }
+}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index eaddfba46f3e..f0a18238bc86 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -86,5 +86,7 @@ void xfs_inactive_worker(struct work_struct *work);
int xfs_inactive_inodes(struct xfs_mount *mp, struct xfs_eofblocks *eofb);
void xfs_inactive_force(struct xfs_mount *mp);
void xfs_inactive_shutdown(struct xfs_mount *mp);
+void xfs_inactive_cancel_work(struct xfs_mount *mp);
+void xfs_inactive_schedule_work(struct xfs_mount *mp, unsigned long delay);
#endif
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 3b8669f6bd24..060e558f44c3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -126,6 +126,7 @@ __xfs_free_perag(
{
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+ ASSERT(!delayed_work_pending(&pag->pag_inactive_work));
ASSERT(atomic_read(&pag->pag_ref) == 0);
kmem_free(pag);
}
@@ -146,6 +147,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
mutex_destroy(&pag->pag_ici_reclaim_lock);
@@ -201,6 +203,7 @@ xfs_initialize_perag(
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
+ INIT_DELAYED_WORK(&pag->pag_inactive_work, xfs_inactive_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
goto out_free_pag;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 2d675dd37e5b..e5170fbc98e9 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -177,7 +177,6 @@ typedef struct xfs_mount {
trimming */
struct delayed_work m_cowblocks_work; /* background cow blocks
trimming */
- struct delayed_work m_inactive_work; /* background inode inactive */
bool m_update_sb; /* sb needs update in mount */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
@@ -416,6 +415,9 @@ typedef struct xfs_perag {
/* Blocks reserved for the reverse mapping btree. */
struct xfs_ag_resv pag_rmapbt_resv;
+ /* background inode inactivation */
+ struct delayed_work pag_inactive_work;
+
/* reference count */
uint8_t pagf_refcount_level;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 6ecf4686e8a8..90de69cfc00d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -839,7 +839,8 @@ xfs_init_mount_workqueues(
goto out_destroy_eofb;
mp->m_inactive_workqueue = alloc_workqueue("xfs-inactive/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE, 0,
+ mp->m_fsname);
if (!mp->m_inactive_workqueue)
goto out_destroy_sync;
@@ -1641,7 +1642,6 @@ xfs_mount_alloc(
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
- INIT_DELAYED_WORK(&mp->m_inactive_work, xfs_inactive_worker);
mp->m_kobj.kobject.kset = xfs_kset;
/*
* We don't create the finobt per-ag space reservation until after log