summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-02-19 17:01:33 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2020-03-10 17:42:22 -0700
commit16b45f0afb2400197d81d931a1c33e35c7ea8d4b (patch)
tree5a41e3a76cfa678683383b1e80184791f3626a7d
parent16fea2ea811cfd080848444e9bef11bb47b4b982 (diff)
xfs: increase the default parallelism levels of pwork clientspwork-parallelism_2020-03-10
Increase the default parallelism level for pwork clients so that we can take advantage of computers with a lot of CPUs and a lot of hardware. 8x raid0 spinning rust running quotacheck: 1 39s 2 29s 4 26s 8 24s 24 (nr_cpus) 24s 4x raid0 sata ssds running quotacheck: 1 12s 2 12s 4 12s 8 13s 24 (nr_cpus) 14s 4x raid0 nvme ssds running quotacheck: 1 18s 2 18s 4 19s 8 20s 20 (nr_cpus) 20s So, mixed results... Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/xfs_buf.c34
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_iwalk.c2
-rw-r--r--fs/xfs/xfs_mount.c39
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_pwork.c17
-rw-r--r--fs/xfs/xfs_pwork.h2
7 files changed, 82 insertions, 14 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 217e4f82a44a..11a97bc35f70 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2158,3 +2158,37 @@ xfs_verify_magic16(
return false;
return dmagic == bp->b_ops->magic16[idx];
}
+
+/* Estimate the amount of parallelism available for a given device. */
+unsigned int
+xfs_buftarg_guess_threads(
+ struct xfs_buftarg *btp)
+{
+ int iomin;
+ int ioopt;
+
+ /*
+ * The device tells us that it is non-rotational, and we take that to
+ * mean there are no moving parts and that the device can handle all
+ * the CPUs throwing IO requests at it.
+ */
+ if (blk_queue_nonrot(btp->bt_bdev->bd_queue))
+ return num_online_cpus();
+
+ /*
+ * The device has a preferred and minimum IO size that suggest a RAID
+ * setup, so infer the number of disks and assume that the parallelism
+ * is equal to the disk count.
+ */
+ iomin = bdev_io_min(btp->bt_bdev);
+ ioopt = bdev_io_opt(btp->bt_bdev);
+ if (iomin > 0 && ioopt > iomin)
+ return ioopt / iomin;
+
+ /*
+ * The device did not indicate that it has any capabilities beyond that
+ * of a rotating disk with a single drive head, so we estimate no
+ * parallelism at all.
+ */
+ return 1;
+}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d79a1fe5d738..1c2640771b9e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -348,6 +348,7 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
+unsigned int xfs_buftarg_guess_threads(struct xfs_buftarg *btp);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 233dcc8784db..f2f89cae1f3c 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -605,7 +605,7 @@ xfs_iwalk_threaded(
ASSERT(agno < mp->m_sb.sb_agcount);
ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
- nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+ nr_threads = xfs_pwork_guess_threads(mp);
error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
nr_threads);
if (error)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 56efe140c923..ad57b4403587 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1408,3 +1408,42 @@ xfs_mod_delalloc(
percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
XFS_DELALLOC_BATCH);
}
+
+/*
+ * Estimate the amount of parallelism that is available for metadata operations
+ * on this filesystem.
+ */
+unsigned int
+xfs_guess_metadata_threads(
+ struct xfs_mount *mp)
+{
+ unsigned int threads;
+
+ /*
+ * Estimate the amount of parallelism for metadata operations from the
+ * least capable of the two devices that handle metadata. Cap that
+ * estimate to the number of AGs to avoid unnecessary lock contention.
+ */
+ threads = xfs_buftarg_guess_threads(mp->m_ddev_targp);
+ if (mp->m_logdev_targp != mp->m_ddev_targp)
+ threads = min(xfs_buftarg_guess_threads(mp->m_logdev_targp),
+ threads);
+ threads = min(mp->m_sb.sb_agcount, threads);
+
+ /* If the storage told us it has fancy capabilities, we're done. */
+ if (threads > 1)
+ goto clamp;
+
+ /*
+ * Metadata storage did not even hint that it has any parallel
+ * capability. If the filesystem was formatted with a stripe unit and
+ * width, we'll treat that as evidence of a RAID setup and estimate
+ * the number of disks.
+ */
+ if (mp->m_sb.sb_unit > 0 && mp->m_sb.sb_width > mp->m_sb.sb_unit)
+ threads = mp->m_sb.sb_width / mp->m_sb.sb_unit;
+
+clamp:
+ /* Don't return an estimate larger than the CPU count. */
+ return min(num_online_cpus(), threads);
+}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 88ab09ed29e7..7664af01af69 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -412,5 +412,6 @@ struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
int error_class, int error);
void xfs_force_summary_recalc(struct xfs_mount *mp);
void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta);
+unsigned int xfs_guess_metadata_threads(struct xfs_mount *mp);
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
index 4bcc3e61056c..5f1a5e575a48 100644
--- a/fs/xfs/xfs_pwork.c
+++ b/fs/xfs/xfs_pwork.c
@@ -118,19 +118,12 @@ xfs_pwork_poll(
touch_softlockup_watchdog();
}
-/*
- * Return the amount of parallelism that the data device can handle, or 0 for
- * no limit.
- */
+/* Estimate how many threads we need for a parallel work queue. */
unsigned int
-xfs_pwork_guess_datadev_parallelism(
+xfs_pwork_guess_threads(
struct xfs_mount *mp)
{
- struct xfs_buftarg *btp = mp->m_ddev_targp;
-
- /*
- * For now we'll go with the most conservative setting possible,
- * which is two threads for an SSD and 1 thread everywhere else.
- */
- return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1;
+ /* pwork queues are not unbounded, so we have to abide WQ_MAX_ACTIVE. */
+ return min_t(unsigned int, xfs_guess_metadata_threads(mp),
+ WQ_MAX_ACTIVE);
}
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
index 8133124cf3bb..f402920f7061 100644
--- a/fs/xfs/xfs_pwork.h
+++ b/fs/xfs/xfs_pwork.h
@@ -56,6 +56,6 @@ int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
-unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
+unsigned int xfs_pwork_guess_threads(struct xfs_mount *mp);
#endif /* __XFS_PWORK_H__ */