summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-10-25 17:16:18 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2020-10-26 18:32:36 -0700
commitd6dc6eeb2dfa76919d7ef017eedbd6b021403bb5 (patch)
treebce16c70348a9297887ed8f304d9fe14b8e8703c
parentc1bbd7ed124391796b92e0c33b19cb248869fc3b (diff)
xfs: add metadata reservations for realtime btreesreserve-rt-metadata-space_2020-10-26
Extend the per-AG metadata block reservation system (which is really just an accounting trick) to support reserving blocks for realtime metadata btrees. This ensures that we will never run out of blocks for the rt rmap or refcount btrees during an operation, similar to what the per-ag reservations do. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c85
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h2
-rw-r--r--fs/xfs/libxfs/xfs_types.h1
-rw-r--r--fs/xfs/scrub/fscounters.c3
-rw-r--r--fs/xfs/xfs_fsops.c9
-rw-r--r--fs/xfs/xfs_mount.h25
-rw-r--r--fs/xfs/xfs_rtalloc.c13
-rw-r--r--fs/xfs/xfs_trace.h31
8 files changed, 159 insertions, 10 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index d01e9234e512..df4f6c9c6d74 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -21,6 +21,7 @@
#include "xfs_ialloc_btree.h"
#include "xfs_sb.h"
#include "xfs_ag_resv.h"
+#include "xfs_ag.h"
/*
* Per-AG Block Reservations
@@ -74,6 +75,10 @@ xfs_ag_resv_critical(
xfs_filblks_t orig;
switch (type) {
+ case XFS_AG_RESV_RTMETADATA:
+ avail = percpu_counter_sum(&pag->pag_mount->m_fdblocks);
+ orig = pag->pag_mount->m_rtmeta_resv.ar_asked;
+ break;
case XFS_AG_RESV_METADATA:
avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
orig = pag->pag_meta_resv.ar_asked;
@@ -109,6 +114,7 @@ xfs_ag_resv_needed(
len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
switch (type) {
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
len -= xfs_perag_resv(pag, type)->ar_reserved;
@@ -175,6 +181,82 @@ xfs_ag_resv_free(
return error;
}
+/* Clean out a rt reservation */
+int
+xfs_rt_resv_free(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ trace_xfs_rt_resv_free(mp, 0);
+
+ error = xfs_mod_fdblocks(mp, mp->m_rtmeta_resv.ar_reserved, true);
+ if (error)
+ return error;
+
+ mp->m_rtmeta_resv.ar_reserved = 0;
+ mp->m_rtmeta_resv.ar_asked = 0;
+ mp->m_rtmeta_resv.ar_orig_reserved = 0;
+ return 0;
+}
+
+static int
+__xfs_rt_resv_init(
+ struct xfs_mount *mp,
+ xfs_filblks_t ask,
+ xfs_filblks_t used)
+{
+ xfs_filblks_t hidden_space;
+ int error;
+
+ /*
+ * Space taken by all other metadata btrees are accounted on-disk as
+ * used space. We therefore only hide the space that is reserved but
+ * not used by the trees.
+ */
+ if (used > ask)
+ ask = used;
+ hidden_space = ask - used;
+
+ error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
+ if (error) {
+ trace_xfs_ag_resv_init_error(mp, NULLAGNUMBER, error,
+ _RET_IP_);
+ xfs_warn(mp,
+"Space reservation for rt metadata failed. Filesystem may run out of space.");
+ return error;
+ }
+
+ mp->m_rtmeta_resv.ar_asked = ask;
+ mp->m_rtmeta_resv.ar_orig_reserved = hidden_space;
+ mp->m_rtmeta_resv.ar_reserved = ask - used;
+
+ trace_xfs_rt_resv_init(mp, ask);
+ return 0;
+}
+
+/* Create a rt metadata block reservation. */
+int
+xfs_rt_resv_init(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp)
+{
+ xfs_filblks_t ask;
+ xfs_filblks_t used;
+ int error;
+
+ /* Create the rt metadata reservation. */
+ if (mp->m_rtmeta_resv.ar_asked == 0) {
+ ask = used = 0;
+
+ error = __xfs_rt_resv_init(mp, ask, used);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
static int
__xfs_ag_resv_init(
struct xfs_perag *pag,
@@ -200,6 +282,7 @@ __xfs_ag_resv_init(
*/
hidden_space = ask;
break;
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
/*
* Space taken by all other metadata btrees are accounted
@@ -333,6 +416,7 @@ xfs_ag_resv_alloc_extent(
switch (type) {
case XFS_AG_RESV_AGFL:
return;
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
@@ -375,6 +459,7 @@ xfs_ag_resv_free_extent(
switch (type) {
case XFS_AG_RESV_AGFL:
return;
+ case XFS_AG_RESV_RTMETADATA:
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index d851510e01de..3899063d5892 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -8,6 +8,8 @@
int xfs_ag_resv_free(struct xfs_perag *pag);
int xfs_ag_resv_init(struct xfs_perag *pag, struct xfs_trans *tp);
+int xfs_rt_resv_free(struct xfs_mount *mp);
+int xfs_rt_resv_init(struct xfs_mount *mp, struct xfs_trans *tp);
bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
xfs_filblks_t xfs_ag_resv_needed(struct xfs_perag *pag,
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 50254487e40d..5eeea36ac3b0 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -173,6 +173,7 @@ enum xfs_ag_resv_type {
XFS_AG_RESV_AGFL,
XFS_AG_RESV_METADATA,
XFS_AG_RESV_RMAPBT,
+ XFS_AG_RESV_RTMETADATA,
/*
* Don't increase fdblocks when freeing extent. This is a pony for
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 7652f82ffbcb..793086343b32 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -234,6 +234,9 @@ retry:
delayed = percpu_counter_sum(&mp->m_delalloc_blks);
fsc->fdblocks -= delayed;
+ /* Account blocks reserved for rt metadata btrees. */
+ fsc->fdblocks -= mp->m_rtmeta_resv.ar_reserved;
+
trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks,
delayed);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ef1d5bb88b93..7dc0b1713ca8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -534,6 +534,12 @@ xfs_fs_reserve_ag_blocks(
error = err2;
}
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ err2 = xfs_rt_resv_init(mp, NULL);
+ if (err2 && !error)
+ error = err2;
+ }
+
if (error && error != -ENOSPC) {
xfs_warn(mp,
"Error %d reserving per-AG metadata reserve pool.", error);
@@ -555,6 +561,9 @@ xfs_fs_unreserve_ag_blocks(
int error = 0;
int err2;
+ if (xfs_sb_version_hasrealtime(&mp->m_sb))
+ error = xfs_rt_resv_free(mp);
+
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
pag = xfs_perag_get(mp, agno);
err2 = xfs_ag_resv_free(pag);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b9e6c8e7f85b..eff148d916c5 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -55,6 +55,16 @@ struct xfs_error_cfg {
long retry_timeout; /* in jiffies, -1 = infinite */
};
+/* metadata object block reservation data structure */
+struct xfs_ag_resv {
+ /* number of blocks originally reserved here */
+ xfs_filblks_t ar_orig_reserved;
+ /* number of blocks reserved here */
+ xfs_filblks_t ar_reserved;
+ /* number of blocks originally asked for */
+ xfs_filblks_t ar_asked;
+};
+
/*
* The struct xfsmount layout is optimised to separate read-mostly variables
* from variables that are frequently modified. We put the read-mostly variables
@@ -234,6 +244,9 @@ typedef struct xfs_mount {
* while a repair freeze is in progress.
*/
struct mutex m_scrub_freeze;
+
+ /* Blocks reserved for all kinds of inode-based (rt) metadata. */
+ struct xfs_ag_resv m_rtmeta_resv;
} xfs_mount_t;
#define M_IGEO(mp) (&(mp)->m_ino_geo)
@@ -312,16 +325,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
-/* per-AG block reservation data structures*/
-struct xfs_ag_resv {
- /* number of blocks originally reserved here */
- xfs_filblks_t ar_orig_reserved;
- /* number of blocks reserved here */
- xfs_filblks_t ar_reserved;
- /* number of blocks originally asked for */
- xfs_filblks_t ar_asked;
-};
-
/*
* Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection.
@@ -414,6 +417,8 @@ xfs_perag_resv(
return &pag->pag_meta_resv;
case XFS_AG_RESV_RMAPBT:
return &pag->pag_rmapbt_resv;
+ case XFS_AG_RESV_RTMETADATA:
+ return &pag->pag_mount->m_rtmeta_resv;
default:
return NULL;
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6b809a31eed2..bb4c783c1728 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -22,6 +22,9 @@
#include "xfs_health.h"
#include "xfs_da_format.h"
#include "xfs_imeta.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_ag_resv.h"
/*
* Read and return the summary information for a given extent size,
@@ -1105,6 +1108,16 @@ error_cancel:
/* Update secondary superblocks now the physical grow has completed */
error = xfs_update_secondary_sbs(mp);
+ if (error)
+ goto out_free;
+
+ /* Reset the rt metadata btree space reservations. */
+ error = xfs_rt_resv_free(mp);
+ if (error)
+ goto out_free;
+ error = xfs_rt_resv_init(mp, NULL);
+ if (error == -ENOSPC)
+ error = 0;
out_free:
/*
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 589ac8106db9..d0318c4818ce 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2789,6 +2789,37 @@ DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed);
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error);
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
+DECLARE_EVENT_CLASS(xfs_rt_resv_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_filblks_t len),
+ TP_ARGS(mp, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long long, freeblks)
+ __field(unsigned long long, reserved)
+ __field(unsigned long long, asked)
+ __field(unsigned long long, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->freeblks = percpu_counter_sum(&mp->m_fdblocks);
+ __entry->reserved = mp->m_rtmeta_resv.ar_reserved;
+ __entry->asked = mp->m_rtmeta_resv.ar_asked;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d freeblks %llu resv %llu ask %llu len %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->freeblks,
+ __entry->reserved,
+ __entry->asked,
+ __entry->len)
+)
+#define DEFINE_RT_RESV_EVENT(name) \
+DEFINE_EVENT(xfs_rt_resv_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_filblks_t len), \
+ TP_ARGS(mp, len))
+DEFINE_RT_RESV_EVENT(xfs_rt_resv_init);
+DEFINE_RT_RESV_EVENT(xfs_rt_resv_free);
+
/* refcount tracepoint classes */
/* reuse the discard trace class for agbno/aglen-based traces */