summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2019-01-16 10:12:13 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2019-02-04 09:31:15 -0800
commit242f84fd671c2f7d4cdd8c743b24e57ab12e6dc7 (patch)
tree54e0f9253adf5752d4c765d5e6a7a5112d1d2c5e
parentda01851741c0bb94c6ffee814a9faa8c5bca73cc (diff)
xfs: parallelize inode inactivationdeferred-inactivation_2019-02-04
Split the inode inactivation work into per-AG work items so that we can take advantage of parallelization. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/xfs_icache.c108
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_super.c3
4 files changed, 95 insertions, 23 deletions
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index d1a9e1571e38..37baaabfb29a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -231,12 +231,12 @@ xfs_reclaim_work_queue(
/* Queue a new inode inactivation pass if there are reclaimable inodes. */
static void
xfs_inactive_work_queue(
- struct xfs_mount *mp)
+ struct xfs_perag *pag)
{
rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG))
- queue_delayed_work(mp->m_inactive_workqueue,
- &mp->m_inactive_work,
+ if (pag->pag_ici_inactive)
+ queue_delayed_work(pag->pag_mount->m_inactive_workqueue,
+ &pag->pag_inactive_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
rcu_read_unlock();
}
@@ -319,7 +319,7 @@ xfs_perag_set_inactive_tag(
* idea of when it ought to force inactivation, and in the mean time
* we prefer batching.
*/
- xfs_inactive_work_queue(mp);
+ xfs_inactive_work_queue(pag);
trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
}
@@ -1697,6 +1697,37 @@ xfs_inactive_inode(
}
/*
+ * Inactivate the inodes in an AG. Even if the filesystem is corrupted, we
+ * still need to clear the INACTIVE iflag so that we can move on to reclaiming
+ * the inode.
+ */
+int
+xfs_inactive_inodes_pag(
+ struct xfs_perag *pag,
+ struct xfs_eofblocks *eofb)
+{
+ int nr_to_scan = INT_MAX;
+ bool done = false;
+
+ return xfs_reclaim_inodes_pag(pag, eofb, 0, xfs_inactive_inode_grab,
+ xfs_inactive_inode, &nr_to_scan, &done);
+}
+
+/* Does this pag have inactive inodes? */
+static inline bool
+xfs_pag_has_inactive(
+ struct xfs_perag *pag)
+{
+ unsigned int inactive;
+
+ spin_lock(&pag->pag_ici_lock);
+ inactive = pag->pag_ici_inactive;
+ spin_unlock(&pag->pag_ici_lock);
+
+ return inactive > 0;
+}
+
+/*
* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
* corrupted, we still need to clear the INACTIVE iflag so that we can move
* on to reclaiming the inode.
@@ -1725,15 +1756,12 @@ xfs_inactive_inodes(
agno = 0;
while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) {
- int nr_to_scan = INT_MAX;
- bool done = false;
-
agno = pag->pag_agno + 1;
- error = xfs_reclaim_inodes_pag(pag, eofb, 0,
- xfs_inactive_inode_grab, xfs_inactive_inode,
- &nr_to_scan, &done);
- if (error && last_error != -EFSCORRUPTED)
- last_error = error;
+ if (xfs_pag_has_inactive(pag)) {
+ error = xfs_inactive_inodes_pag(pag, eofb);
+ if (error && last_error != -EFSCORRUPTED)
+ last_error = error;
+ }
xfs_perag_put(pag);
}
@@ -1746,14 +1774,29 @@ void
xfs_inactive_worker(
struct work_struct *work)
{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_inactive_work);
+ struct xfs_perag *pag = container_of(to_delayed_work(work),
+ struct xfs_perag, pag_inactive_work);
+ struct xfs_mount *mp = pag->pag_mount;
int error;
- error = xfs_inactive_inodes(mp, NULL);
+ /*
+ * We want to skip inode inactivation while the filesystem is frozen
+ * because we don't want the inactivation thread to block while taking
+ * sb_intwrite. Therefore, we try to take sb_write for the duration
+ * of the inactive scan -- a freeze attempt will block until we're
+ * done here, and if the fs is past stage 1 freeze we'll bounce out
+ * until things unfreeze. If the fs goes down while frozen we'll
+ * still have log recovery to clean up after us.
+ */
+ if (!sb_start_write_trylock(mp->m_super))
+ return;
+
+ error = xfs_inactive_inodes_pag(pag, NULL);
if (error && error != -EAGAIN)
xfs_err(mp, "inode inactivation failed, error %d", error);
- xfs_inactive_work_queue(mp);
+
+ sb_end_write(mp->m_super);
+ xfs_inactive_work_queue(pag);
}
/* Flush all inode inactivation work that might be queued. */
@@ -1761,8 +1804,25 @@ void
xfs_inactive_force(
struct xfs_mount *mp)
{
- queue_delayed_work(mp->m_inactive_workqueue, &mp->m_inactive_work, 0);
- flush_delayed_work(&mp->m_inactive_work);
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ agno = 0;
+ while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) {
+ agno = pag->pag_agno + 1;
+ if (xfs_pag_has_inactive(pag))
+ queue_delayed_work(mp->m_inactive_workqueue,
+ &pag->pag_inactive_work, 0);
+ xfs_perag_put(pag);
+ }
+
+ agno = 0;
+ while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) {
+ agno = pag->pag_agno + 1;
+ if (xfs_pag_has_inactive(pag))
+ flush_delayed_work(&pag->pag_inactive_work);
+ xfs_perag_put(pag);
+ }
}
/*
@@ -1773,7 +1833,15 @@ void
xfs_inactive_deactivate(
struct xfs_mount *mp)
{
- cancel_delayed_work_sync(&mp->m_inactive_work);
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno = 0;
+
+ while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) {
+ agno = pag->pag_agno + 1;
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
+ xfs_perag_put(pag);
+ }
+
flush_workqueue(mp->m_inactive_workqueue);
xfs_inactive_inodes(mp, NULL);
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 1bc5c27a9425..58d8b7de2788 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -129,6 +129,7 @@ __xfs_free_perag(
{
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+ ASSERT(!delayed_work_pending(&pag->pag_inactive_work));
ASSERT(atomic_read(&pag->pag_ref) == 0);
kmem_free(pag);
}
@@ -151,6 +152,7 @@ xfs_free_perag(
ASSERT(atomic_read(&pag->pag_ref) == 0);
ASSERT(pag->pagi_unlinked_count == 0 ||
XFS_FORCED_SHUTDOWN(mp));
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
mutex_destroy(&pag->pag_ici_reclaim_lock);
@@ -206,6 +208,7 @@ xfs_initialize_perag(
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
+ INIT_DELAYED_WORK(&pag->pag_inactive_work, xfs_inactive_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
goto out_free_pag;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1ca871a5c349..e46f1ddd3d45 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -153,7 +153,6 @@ typedef struct xfs_mount {
trimming */
struct delayed_work m_cowblocks_work; /* background cow blocks
trimming */
- struct delayed_work m_inactive_work; /* background inode inactive */
bool m_update_sb; /* sb needs update in mount */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
@@ -392,6 +391,9 @@ typedef struct xfs_perag {
/* Blocks reserved for the reverse mapping btree. */
struct xfs_ag_resv pag_rmapbt_resv;
+ /* background inode inactivation */
+ struct delayed_work pag_inactive_work;
+
/* reference count */
uint8_t pagf_refcount_level;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 1953fb76df6f..d9363a82646b 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -875,7 +875,7 @@ xfs_init_mount_workqueues(
goto out_destroy_eofb;
mp->m_inactive_workqueue = alloc_workqueue("xfs-inactive/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
if (!mp->m_inactive_workqueue)
goto out_destroy_sync;
@@ -1661,7 +1661,6 @@ xfs_mount_alloc(
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
- INIT_DELAYED_WORK(&mp->m_inactive_work, xfs_inactive_worker);
mp->m_kobj.kobject.kset = xfs_kset;
return mp;
}