summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-01-05 17:45:51 -0800
committerDarrick J. Wong <djwong@kernel.org>2021-03-25 17:08:41 -0700
commit74b914d1ceaaf0fd9c5c492a8ff25b6a671c9bb9 (patch)
treea661f67590185704c2aa741ab7e0ee5fc6437673
parentd16cdb990ba0515732bc043b7bcbd982ef92d42f (diff)
xfs: add metadata reservations for realtime btreesreserve-rt-metadata-space_2021-03-25
Extend the per-AG metadata block reservation system (which is really just an accounting trick) to support reserving blocks for realtime metadata btrees. This ensures that we will never run out of blocks for the rt rmap or refcount btrees during an operation, similar to what the per-ag reservations do. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c85
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h2
-rw-r--r--fs/xfs/libxfs/xfs_types.h1
-rw-r--r--fs/xfs/scrub/fscounters.c3
-rw-r--r--fs/xfs/xfs_fsops.c9
-rw-r--r--fs/xfs/xfs_mount.h25
-rw-r--r--fs/xfs/xfs_rtalloc.c13
-rw-r--r--fs/xfs/xfs_trace.h31
8 files changed, 159 insertions, 10 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index d12feec486fb..c0cc9c823c9e 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -21,6 +21,7 @@
#include "xfs_ialloc_btree.h"
#include "xfs_sb.h"
#include "xfs_ag_resv.h"
+#include "xfs_ag.h"
/*
* Per-AG Block Reservations
@@ -75,6 +76,10 @@ xfs_ag_resv_critical(
xfs_extlen_t btree_maxlevels;
switch (type) {
+ case XFS_AG_RESV_RTMETADATA:
+ avail = percpu_counter_sum(&pag->pag_mount->m_fdblocks);
+ orig = pag->pag_mount->m_rtmeta_resv.ar_asked;
+ break;
case XFS_AG_RESV_METADATA:
avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
orig = pag->pag_meta_resv.ar_asked;
@@ -111,6 +116,7 @@ xfs_ag_resv_needed(
len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
switch (type) {
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
len -= xfs_perag_resv(pag, type)->ar_reserved;
@@ -177,6 +183,82 @@ xfs_ag_resv_free(
return error;
}
+/* Clean out a rt reservation */
+int
+xfs_rt_resv_free(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ trace_xfs_rt_resv_free(mp, 0);
+
+ error = xfs_mod_fdblocks(mp, mp->m_rtmeta_resv.ar_reserved, true);
+ if (error)
+ return error;
+
+ mp->m_rtmeta_resv.ar_reserved = 0;
+ mp->m_rtmeta_resv.ar_asked = 0;
+ mp->m_rtmeta_resv.ar_orig_reserved = 0;
+ return 0;
+}
+
+static int
+__xfs_rt_resv_init(
+ struct xfs_mount *mp,
+ xfs_filblks_t ask,
+ xfs_filblks_t used)
+{
+ xfs_filblks_t hidden_space;
+ int error;
+
+ /*
+ * Space taken by all other metadata btrees are accounted on-disk as
+ * used space. We therefore only hide the space that is reserved but
+ * not used by the trees.
+ */
+ if (used > ask)
+ ask = used;
+ hidden_space = ask - used;
+
+ error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
+ if (error) {
+ trace_xfs_ag_resv_init_error(mp, NULLAGNUMBER, error,
+ _RET_IP_);
+ xfs_warn(mp,
+"Space reservation for rt metadata failed. Filesystem may run out of space.");
+ return error;
+ }
+
+ mp->m_rtmeta_resv.ar_asked = ask;
+ mp->m_rtmeta_resv.ar_orig_reserved = hidden_space;
+ mp->m_rtmeta_resv.ar_reserved = ask - used;
+
+ trace_xfs_rt_resv_init(mp, ask);
+ return 0;
+}
+
+/* Create a rt metadata block reservation. */
+int
+xfs_rt_resv_init(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp)
+{
+ xfs_filblks_t ask;
+ xfs_filblks_t used;
+ int error;
+
+ /* Create the rt metadata reservation. */
+ if (mp->m_rtmeta_resv.ar_asked == 0) {
+ ask = used = 0;
+
+ error = __xfs_rt_resv_init(mp, ask, used);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
static int
__xfs_ag_resv_init(
struct xfs_perag *pag,
@@ -202,6 +284,7 @@ __xfs_ag_resv_init(
*/
hidden_space = ask;
break;
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
/*
* Space taken by all other metadata btrees are accounted
@@ -335,6 +418,7 @@ xfs_ag_resv_alloc_extent(
switch (type) {
case XFS_AG_RESV_AGFL:
return;
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
@@ -377,6 +461,7 @@ xfs_ag_resv_free_extent(
switch (type) {
case XFS_AG_RESV_AGFL:
return;
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index d851510e01de..3899063d5892 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -8,6 +8,8 @@
int xfs_ag_resv_free(struct xfs_perag *pag);
int xfs_ag_resv_init(struct xfs_perag *pag, struct xfs_trans *tp);
+int xfs_rt_resv_free(struct xfs_mount *mp);
+int xfs_rt_resv_init(struct xfs_mount *mp, struct xfs_trans *tp);
bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
xfs_filblks_t xfs_ag_resv_needed(struct xfs_perag *pag,
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 24b859da68ef..9c7b4cbfd602 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -173,6 +173,7 @@ enum xfs_ag_resv_type {
XFS_AG_RESV_AGFL,
XFS_AG_RESV_METADATA,
XFS_AG_RESV_RMAPBT,
+ XFS_AG_RESV_RTMETADATA,
/*
* Don't increase fdblocks when freeing extent. This is a pony for
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 812a69cc723a..04a766924d75 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -233,6 +233,9 @@ retry:
delayed = percpu_counter_sum(&mp->m_delalloc_blks);
fsc->fdblocks -= delayed;
+ /* Account blocks reserved for rt metadata btrees. */
+ fsc->fdblocks -= mp->m_rtmeta_resv.ar_reserved;
+
trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks,
delayed);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ef6a700679ad..9c7c104e47fa 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -535,6 +535,12 @@ xfs_fs_reserve_ag_blocks(
error = err2;
}
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ err2 = xfs_rt_resv_init(mp, NULL);
+ if (err2 && !error)
+ error = err2;
+ }
+
if (error && error != -ENOSPC) {
xfs_warn(mp,
"Error %d reserving per-AG metadata reserve pool.", error);
@@ -556,6 +562,9 @@ xfs_fs_unreserve_ag_blocks(
int error = 0;
int err2;
+ if (xfs_sb_version_hasrealtime(&mp->m_sb))
+ error = xfs_rt_resv_free(mp);
+
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
pag = xfs_perag_get(mp, agno);
err2 = xfs_ag_resv_free(pag);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3a4ce7be8d6b..f6bc51a7f985 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -59,6 +59,16 @@ struct xfs_hook_chain {
struct srcu_notifier_head head;
};
+/* metadata object block reservation data structure */
+struct xfs_ag_resv {
+ /* number of blocks originally reserved here */
+ xfs_filblks_t ar_orig_reserved;
+ /* number of blocks reserved here */
+ xfs_filblks_t ar_reserved;
+ /* number of blocks originally asked for */
+ xfs_filblks_t ar_asked;
+};
+
/*
* The struct xfsmount layout is optimised to separate read-mostly variables
* from variables that are frequently modified. We put the read-mostly variables
@@ -233,6 +243,9 @@ typedef struct xfs_mount {
* while a repair freeze is in progress.
*/
struct mutex m_scrub_freeze;
+
+ /* Blocks reserved for all kinds of inode-based (rt) metadata. */
+ struct xfs_ag_resv m_rtmeta_resv;
} xfs_mount_t;
#define M_IGEO(mp) (&(mp)->m_ino_geo)
@@ -319,16 +332,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
-/* per-AG block reservation data structures*/
-struct xfs_ag_resv {
- /* number of blocks originally reserved here */
- xfs_filblks_t ar_orig_reserved;
- /* number of blocks reserved here */
- xfs_filblks_t ar_reserved;
- /* number of blocks originally asked for */
- xfs_filblks_t ar_asked;
-};
-
/*
* Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection.
@@ -420,6 +423,8 @@ xfs_perag_resv(
return &pag->pag_meta_resv;
case XFS_AG_RESV_RMAPBT:
return &pag->pag_rmapbt_resv;
+ case XFS_AG_RESV_RTMETADATA:
+ return &pag->pag_mount->m_rtmeta_resv;
default:
return NULL;
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 52d243188569..ed98a7d412c2 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -22,6 +22,9 @@
#include "xfs_health.h"
#include "xfs_da_format.h"
#include "xfs_imeta.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_ag_resv.h"
/*
* Read and return the summary information for a given extent size,
@@ -1110,6 +1113,16 @@ error_cancel:
/* Update secondary superblocks now the physical grow has completed */
error = xfs_update_secondary_sbs(mp);
+ if (error)
+ goto out_free;
+
+ /* Reset the rt metadata btree space reservations. */
+ error = xfs_rt_resv_free(mp);
+ if (error)
+ goto out_free;
+ error = xfs_rt_resv_init(mp, NULL);
+ if (error == -ENOSPC)
+ error = 0;
out_free:
/*
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6efd54a618e0..970d688c30a8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2773,6 +2773,37 @@ DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed);
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error);
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
+DECLARE_EVENT_CLASS(xfs_rt_resv_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_filblks_t len),
+ TP_ARGS(mp, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long long, freeblks)
+ __field(unsigned long long, reserved)
+ __field(unsigned long long, asked)
+ __field(unsigned long long, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->freeblks = percpu_counter_sum(&mp->m_fdblocks);
+ __entry->reserved = mp->m_rtmeta_resv.ar_reserved;
+ __entry->asked = mp->m_rtmeta_resv.ar_asked;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d freeblks %llu resv %llu ask %llu len %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->freeblks,
+ __entry->reserved,
+ __entry->asked,
+ __entry->len)
+)
+#define DEFINE_RT_RESV_EVENT(name) \
+DEFINE_EVENT(xfs_rt_resv_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_filblks_t len), \
+ TP_ARGS(mp, len))
+DEFINE_RT_RESV_EVENT(xfs_rt_resv_init);
+DEFINE_RT_RESV_EVENT(xfs_rt_resv_free);
+
/* refcount tracepoint classes */
/* reuse the discard trace class for agbno/aglen-based traces */