summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-04-08 16:59:35 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2020-04-12 13:03:14 -0700
commit31501139f889955a5b6484163e2b834ef2c1fe68 (patch)
tree6d1a99d65c605703e40cbd6917a26493fe2584b7
parentc48bd9e1b968a148a475be169fbfa9e738906fd5 (diff)
xfs: move inode flush to a workqueuexfs-5.7-fixes_2020-04-12
Move the inode dirty data flushing to a workqueue so that multiple threads can take advantage of a single thread's flush work. The ratelimiting technique was not successful, because threads that skipped the inode flush scan due to ratelimiting would ENOSPC early and apparently now there are complaints about that. So make everyone wait. Fixes: bdd4ee4f8407 Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/xfs_mount.h6
-rw-r--r--fs/xfs/xfs_super.c40
2 files changed, 27 insertions, 19 deletions
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 42267bf052dc..dd397b778e76 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -167,8 +167,12 @@ typedef struct xfs_mount {
struct xfs_kobj m_error_meta_kobj;
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
struct xstats m_stats; /* per-fs stats */
- struct ratelimit_state m_flush_inodes_ratelimit;
+ /*
+ * Workqueue item so that we can coalesce multiple inode flush attempts
+ * into a single flush.
+ */
+ struct work_struct m_flush_inodes_work;
struct workqueue_struct *m_buf_workqueue;
struct workqueue_struct *m_unwritten_workqueue;
struct workqueue_struct *m_cil_workqueue;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index abf06bf9c3f3..dced03a4571d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -516,6 +516,20 @@ xfs_destroy_mount_workqueues(
destroy_workqueue(mp->m_buf_workqueue);
}
+static void
+xfs_flush_inodes_worker(
+ struct work_struct *work)
+{
+ struct xfs_mount *mp = container_of(work, struct xfs_mount,
+ m_flush_inodes_work);
+ struct super_block *sb = mp->m_super;
+
+ if (down_read_trylock(&sb->s_umount)) {
+ sync_inodes_sb(sb);
+ up_read(&sb->s_umount);
+ }
+}
+
/*
* Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
* or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
@@ -526,15 +540,15 @@ void
xfs_flush_inodes(
struct xfs_mount *mp)
{
- struct super_block *sb = mp->m_super;
-
- if (!__ratelimit(&mp->m_flush_inodes_ratelimit))
+ /*
+ * If flush_work() returns true then that means we waited for a flush
+ * which was already in progress. Don't bother running another scan.
+ */
+ if (flush_work(&mp->m_flush_inodes_work))
return;
- if (down_read_trylock(&sb->s_umount)) {
- sync_inodes_sb(sb);
- up_read(&sb->s_umount);
- }
+ schedule_work(&mp->m_flush_inodes_work);
+ flush_work(&mp->m_flush_inodes_work);
}
/* Catch misguided souls that try to use this interface on XFS */
@@ -1369,17 +1383,6 @@ xfs_fc_fill_super(
if (error)
goto out_free_names;
- /*
- * Cap the number of invocations of xfs_flush_inodes to 16 for every
- * quarter of a second. The magic numbers here were determined by
- * observation neither to cause stalls in writeback when there are a
- * lot of IO threads and the fs is near ENOSPC, nor cause any fstest
- * regressions. YMMV.
- */
- ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16);
- ratelimit_set_flags(&mp->m_flush_inodes_ratelimit,
- RATELIMIT_MSG_ON_RELEASE);
-
error = xfs_init_mount_workqueues(mp);
if (error)
goto out_close_devices;
@@ -1752,6 +1755,7 @@ static int xfs_init_fs_context(
spin_lock_init(&mp->m_perag_lock);
mutex_init(&mp->m_growlock);
atomic_set(&mp->m_active_trans, 0);
+ INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);