summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-02-19 17:02:08 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2020-06-01 21:16:36 -0700
commit2281ad25919635940762507b526e0f2416535252 (patch)
tree3d22bdb2551a086a321088ea7c56ea7d43ee16e3
parentf50fcbcf72ba8f7c98fc399c9ecbb1edee19c90f (diff)
xfs: parallelize block preallocation garbage collectioneofblocks-consolidation_2020-06-01
Split the block preallocation garbage collection work into per-AG work items so that we can take advantage of parallelization. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/xfs_icache.c47
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_super.c25
4 files changed, 62 insertions, 18 deletions
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index bd1fa2ba7585..18c8fdaf4cac 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -971,12 +971,12 @@ xfs_inode_walk(
*/
static void
xfs_queue_blockgc(
- struct xfs_mount *mp)
+ struct xfs_perag *pag)
{
rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_BLOCK_GC_TAG))
- queue_delayed_work(mp->m_blockgc_workqueue,
- &mp->m_blockgc_work,
+ if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCK_GC_TAG))
+ queue_delayed_work(pag->pag_mount->m_blockgc_workqueue,
+ &pag->pag_blockgc_work,
msecs_to_jiffies(xfs_blockgc_secs * 1000));
rcu_read_unlock();
}
@@ -1011,25 +1011,40 @@ void
xfs_blockgc_worker(
struct work_struct *work)
{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_blockgc_work);
+ struct xfs_perag *pag = container_of(to_delayed_work(work),
+ struct xfs_perag, pag_blockgc_work);
int error;
- if (!sb_start_write_trylock(mp->m_super))
+ if (!sb_start_write_trylock(pag->pag_mount->m_super))
return;
- error = xfs_blockgc_scan(mp, NULL);
+
+ error = xfs_inode_walk_ag(pag, 0, xfs_blockgc_scan_inode, NULL,
+ XFS_ICI_BLOCK_GC_TAG);
if (error)
- xfs_info(mp, "preallocation gc worker failed, err=%d", error);
- sb_end_write(mp->m_super);
- xfs_queue_blockgc(mp);
+ xfs_info(pag->pag_mount,
+ "AG %u preallocation gc worker failed, err=%d",
+ pag->pag_agno, error);
+ sb_end_write(pag->pag_mount->m_super);
+ xfs_queue_blockgc(pag);
}
+#define for_each_perag_tag(mp, next_agno, pag, tag) \
+ for ((next_agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \
+ (pag) != NULL; \
+ (next_agno) = (pag)->pag_agno + 1, \
+ xfs_perag_put(pag), \
+ (pag) = xfs_perag_get_tag((mp), (next_agno), (tag)))
+
/* Disable post-EOF and CoW block auto-reclamation. */
void
xfs_blockgc_stop(
struct xfs_mount *mp)
{
- cancel_delayed_work_sync(&mp->m_blockgc_work);
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCK_GC_TAG)
+ cancel_delayed_work_sync(&pag->pag_blockgc_work);
}
/* Enable post-EOF and CoW block auto-reclamation. */
@@ -1037,7 +1052,11 @@ void
xfs_blockgc_start(
struct xfs_mount *mp)
{
- xfs_queue_blockgc(mp);
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCK_GC_TAG)
+ xfs_queue_blockgc(pag);
}
/*
@@ -1681,7 +1700,7 @@ __xfs_inode_set_blocks_tag(
spin_unlock(&ip->i_mount->m_perag_lock);
/* kick off background trimming */
- xfs_queue_blockgc(ip->i_mount);
+ xfs_queue_blockgc(pag);
trace_xfs_perag_set_blockgc(ip->i_mount, pag->pag_agno, -1,
_RET_IP_);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 00adbc6befa3..50c2a859f24e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -126,6 +126,7 @@ __xfs_free_perag(
{
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+ ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
ASSERT(atomic_read(&pag->pag_ref) == 0);
kmem_free(pag);
}
@@ -146,6 +147,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
+ cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
mutex_destroy(&pag->pag_ici_reclaim_lock);
@@ -201,6 +203,7 @@ xfs_initialize_perag(
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
+ INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
goto out_free_pag;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a2f3f3d38ae4..c99285021f29 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -155,8 +155,6 @@ typedef struct xfs_mount {
atomic_t m_active_trans; /* number trans frozen */
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_reclaim_work; /* background inode reclaim */
- struct delayed_work m_blockgc_work; /* background prealloc blocks
- trimming */
bool m_update_sb; /* sb needs update in mount */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
@@ -355,6 +353,9 @@ typedef struct xfs_perag {
/* Blocks reserved for the reverse mapping btree. */
struct xfs_ag_resv pag_rmapbt_resv;
+ /* background prealloc block trimming */
+ struct delayed_work pag_blockgc_work;
+
/* reference count */
uint8_t pagf_refcount_level;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 02d1844b4ea4..bd634c94b5bb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -479,7 +479,8 @@ xfs_init_mount_workqueues(
goto out_destroy_cil;
mp->m_blockgc_workqueue = alloc_workqueue("xfs-blockgc/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE, 0,
+ mp->m_super->s_id);
if (!mp->m_blockgc_workqueue)
goto out_destroy_reclaim;
@@ -1341,6 +1342,25 @@ xfs_fc_validate_params(
return 0;
}
+/*
+ * Constrain the number of threads that we start for background work. This
+ * is the estimated parallelism of the filesystem capped to the unbound work
+ * queue maximum.
+ *
+ * We can't set this when we allocate the workqueues because the thread count
+ * derives from AG count, and we can't know that until we're far enough through
+ * setup to read the superblock, which requires functioning workqueues.
+ */
+static inline void
+xfs_configure_background_workqueues(
+ struct xfs_mount *mp)
+{
+ unsigned int max_active = xfs_guess_metadata_threads(mp);
+
+ max_active = min_t(unsigned int, max_active, WQ_UNBOUND_MAX_ACTIVE);
+ workqueue_set_max_active(mp->m_blockgc_workqueue, max_active);
+}
+
static int
xfs_fc_fill_super(
struct super_block *sb,
@@ -1406,6 +1426,8 @@ xfs_fc_fill_super(
if (error)
goto out_free_sb;
+ xfs_configure_background_workqueues(mp);
+
error = xfs_setup_devices(mp);
if (error)
goto out_free_sb;
@@ -1757,7 +1779,6 @@ static int xfs_init_fs_context(
atomic_set(&mp->m_active_trans, 0);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
- INIT_DELAYED_WORK(&mp->m_blockgc_work, xfs_blockgc_worker);
mp->m_kobj.kobject.kset = xfs_kset;
/*
* We don't create the finobt per-ag space reservation until after log