summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-01-05 17:43:24 -0800
committerDarrick J. Wong <djwong@kernel.org>2021-03-25 17:08:16 -0700
commit7c2b52fe75a08cf5ed4d971f967400ebc3e1e9a6 (patch)
treee5a60949577fe293f925630354f0563a6236c511
parent960a33095aed5e8a25e74b22eff90cb917c65445 (diff)
xfs: reduce transaction reservations with reflinkreflink-speedups_2021-03-25
Before to the introduction of deferred refcount operations, reflink would try to cram refcount btree updates into the same transaction as an allocation or a free event. Mainline XFS has never actually done that, but we never refactored the transaction reservations to reflect that we now do all refcount updates in separate transactions. Fix this to reduce the transaction reservation size even farther, so that between this patch and the previous one, we reduce the tr_write and tr_itruncate sizes by 66%. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c9
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c97
2 files changed, 98 insertions, 8 deletions
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 66c14ef5cc5a..5154c5e55122 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -883,8 +883,13 @@ xfs_refcount_still_have_space(
{
unsigned long overhead;
- overhead = cur->bc_ag.refc.shape_changes *
- xfs_allocfree_log_count(cur->bc_mp, 1);
+ /*
+ * Worst case estimate: full splits of the free space and rmap btrees
+ * to handle each of the shape changes to the refcount btree.
+ */
+ overhead = xfs_allocfree_log_count(cur->bc_mp,
+ cur->bc_ag.refc.shape_changes);
+ overhead += cur->bc_mp->m_refc_maxlevels;
overhead *= cur->bc_mp->m_sb.sb_blocksize;
/*
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index c3990fd6fb88..00456588dd87 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -56,8 +56,7 @@ xfs_calc_buf_res(
* Per-extent log reservation for the btree changes involved in freeing or
* allocating an extent. In classic XFS there were two trees that will be
* modified (bnobt + cntbt). With rmap enabled, there are three trees
- * (rmapbt). With reflink, there are four trees (refcountbt). The number of
- * blocks reserved is based on the formula:
+ * (rmapbt). The number of blocks reserved is based on the formula:
*
* num trees * ((2 blocks/level * max depth) - 1)
*
@@ -73,13 +72,24 @@ xfs_allocfree_log_count(
blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1);
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
- if (xfs_sb_version_hasreflink(&mp->m_sb))
- blocks += num_ops * (2 * mp->m_refc_maxlevels - 1);
return blocks;
}
/*
+ * Per-extent log reservation for refcount btree changes. These are never done
+ * in the same transaction as an allocation or a free, so we compute them
+ * separately.
+ */
+static unsigned int
+xfs_refcount_log_count(
+ struct xfs_mount *mp,
+ unsigned int num_ops)
+{
+ return num_ops * (2 * mp->m_refc_maxlevels - 1);
+}
+
+/*
* Logging inodes is really tricksy. They are logged in memory format,
* which means that what we write into the log doesn't directly translate into
* the amount of space they use on disk.
@@ -233,6 +243,28 @@ xfs_rtalloc_log_count(
* register overflow from temporaries in the calculations.
*/
+/*
+ * Compute the log reservation required to handle the refcount update
+ * transaction. Refcount updates are always done via deferred log items.
+ *
+ * This is calculated as:
+ * Data device refcount updates (t1):
+ * the agfs of the ags containing the blocks: nr_ops * sector size
+ * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+static unsigned int
+xfs_refcount_log_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr_ops)
+{
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+
+ return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_refcount_log_count(mp, nr_ops), blksz);
+}
/*
* In a write transaction we can allocate a maximum of 2
@@ -255,12 +287,13 @@ xfs_rtalloc_log_count(
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And any refcount updates that happen in a separate transaction (t4).
*/
STATIC uint
xfs_calc_write_reservation(
struct xfs_mount *mp)
{
- unsigned int t1, t2, t3;
+ unsigned int t1, t2, t3, t4;
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
t1 = xfs_calc_inode_res(mp, 1) +
@@ -282,7 +315,9 @@ xfs_calc_write_reservation(
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ t4 = xfs_refcount_log_reservation(mp, 1);
+
+ return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
}
/*
@@ -303,11 +338,43 @@ xfs_calc_write_reservation(
* the realtime summary: 2 exts * 1 block
* worst case split in allocation btrees per extent assuming 2 extents:
* 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And any refcount updates that happen in a separate transaction (t4).
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
+ unsigned int t1, t2, t3, t4;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
+
+ t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+ } else {
+ t3 = 0;
+ }
+
+ t4 = xfs_refcount_log_reservation(mp, 2);
+
+ return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+}
+
+/*
+ * For log size calculation, this is the same as above except that we used to
+ * include refcount updates in the allocfree computation even though we've
+ * always run them as a separate transaction.
+ */
+STATIC uint
+xfs_calc_itruncate_reservation_logsize(
+ struct xfs_mount *mp)
+{
unsigned int t1, t2, t3;
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
@@ -317,6 +384,9 @@ xfs_calc_itruncate_reservation(
t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ t2 += xfs_calc_buf_res(xfs_refcount_log_count(mp, 4), blksz);
+
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
@@ -974,6 +1044,9 @@ xfs_trans_resv_calc_logsize(
xfs_trans_resv_calc(mp, resp);
if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ unsigned int t4;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
/*
* In the early days of reflink we set the logcounts absurdly
* high.
@@ -982,6 +1055,18 @@ xfs_trans_resv_calc_logsize(
resp->tr_itruncate.tr_logcount =
XFS_ITRUNCATE_LOG_COUNT_REFLINK;
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
+
+ /*
+ * We also used to account two refcount updates per extent into
+ * the alloc/free step of write and truncate calls, even though
+ * those are run in separate transactions.
+ */
+ t4 = xfs_calc_buf_res(xfs_refcount_log_count(mp, 2), blksz);
+ resp->tr_write.tr_logres += t4;
+ resp->tr_qm_dqalloc.tr_logres += t4;
+
+ resp->tr_itruncate.tr_logres =
+ xfs_calc_itruncate_reservation_logsize(mp);
} else if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
/*
* In the early days of non-reflink rmap we set the logcount