summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2019-08-09 09:30:12 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2019-08-10 09:58:11 -0700
commit772d9d9f3e057d022ca6d235b2ea5b0be599d582 (patch)
tree6a8eafb1dcf3ae89287d792f0dd4eefef1138745
parent00b8d248886e778edb502c4794faf00c0424cd84 (diff)
convert freespto btree bulk loadrepair-redesign_2019-08-10
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c89
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.h7
-rw-r--r--fs/xfs/scrub/alloc_repair.c599
-rw-r--r--fs/xfs/scrub/repair.c15
-rw-r--r--fs/xfs/scrub/repair.h4
5 files changed, 459 insertions, 255 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 2a94543857a1..c71318216931 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -24,6 +24,10 @@ STATIC struct xfs_btree_cur *
xfs_allocbt_dup_cursor(
struct xfs_btree_cur *cur)
{
+ if (cur->bc_flags & XFS_BTREE_STAGING)
+ return xfs_allocbt_stage_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.afake,
+ cur->bc_private.a.agno, cur->bc_btnum);
return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
cur->bc_private.a.agbp, cur->bc_private.a.agno,
cur->bc_btnum);
@@ -474,15 +478,13 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
/*
* Allocate a new allocation btree cursor.
*/
-struct xfs_btree_cur * /* new alloc btree cursor */
-xfs_allocbt_init_cursor(
+STATIC struct xfs_btree_cur * /* new alloc btree cursor */
+xfs_allocbt_init_common(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
- struct xfs_buf *agbp, /* buffer for agf structure */
xfs_agnumber_t agno, /* allocation group number */
xfs_btnum_t btnum) /* btree identifier */
{
- struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
struct xfs_btree_cur *cur;
ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
@@ -493,28 +495,95 @@ xfs_allocbt_init_cursor(
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
+ cur->bc_private.a.agno = agno;
- if (btnum == XFS_BTNUM_CNT) {
+ if (btnum == XFS_BTNUM_CNT)
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
+ else
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+ return cur;
+}
+
+/*
+ * Allocate a new allocation btree cursor.
+ */
+struct xfs_btree_cur * /* new alloc btree cursor */
+xfs_allocbt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_buf *agbp, /* buffer for agf structure */
+ xfs_agnumber_t agno, /* allocation group number */
+ xfs_btnum_t btnum) /* btree identifier */
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
+
+ cur = xfs_allocbt_init_common(mp, tp, agno, btnum);
+ if (btnum == XFS_BTNUM_CNT) {
cur->bc_ops = &xfs_cntbt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
- cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
+ cur->bc_flags |= XFS_BTREE_LASTREC_UPDATE;
} else {
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
cur->bc_ops = &xfs_bnobt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
}
cur->bc_private.a.agbp = agbp;
- cur->bc_private.a.agno = agno;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+ return cur;
+}
+/* Create a free space btree cursor with a fake root for staging. */
+struct xfs_btree_cur *
+xfs_allocbt_stage_cursor(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xbtree_afakeroot *afake,
+ xfs_agnumber_t agno,
+ xfs_btnum_t btnum)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_btree_ops *ops;
+
+ cur = xfs_allocbt_init_common(mp, tp, agno, btnum);
+ if (btnum == XFS_BTNUM_BNO)
+ xfs_btree_stage_afakeroot(cur, afake, &xfs_bnobt_ops, &ops);
+ else
+ xfs_btree_stage_afakeroot(cur, afake, &xfs_cntbt_ops, &ops);
+ ops->set_root = xbtree_afakeroot_set_root;
+ ops->init_ptr_from_cur = xbtree_afakeroot_init_ptr_from_cur;
return cur;
}
/*
+ * Install a new inobt btree root. Caller is responsible for invalidating
+ * and freeing the old btree blocks.
+ */
+void
+xfs_allocbt_commit_staged_btree(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *agbp)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xbtree_afakeroot *afake = cur->bc_private.a.afake;
+
+ ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
+
+ agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
+ agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+
+ if (cur->bc_btnum == XFS_BTNUM_BNO)
+ xfs_btree_commit_afakeroot(cur, agbp, &xfs_bnobt_ops);
+ else
+ xfs_btree_commit_afakeroot(cur, agbp, &xfs_cntbt_ops);
+}
+
+/*
* Calculate number of records in an alloc btree block.
*/
int
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index c9305ebb69f6..dde324609a89 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
@@ -13,6 +13,7 @@
struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
+struct xbtree_afakeroot;
/*
* Btree block header size depends on a superblock flag.
@@ -48,8 +49,14 @@ struct xfs_mount;
extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_buf *,
xfs_agnumber_t, xfs_btnum_t);
+struct xfs_btree_cur *xfs_allocbt_stage_cursor(struct xfs_mount *mp,
+ struct xfs_trans *tp, struct xbtree_afakeroot *afake,
+ xfs_agnumber_t agno, xfs_btnum_t btnum);
extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
+void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur,
+ struct xfs_buf *agbp);
+
#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c
index f21506dbffaa..2bf632eb1df7 100644
--- a/fs/xfs/scrub/alloc_repair.c
+++ b/fs/xfs/scrub/alloc_repair.c
@@ -23,6 +23,7 @@
#include "xfs_refcount.h"
#include "xfs_extent_busy.h"
#include "xfs_health.h"
+#include "xfs_bmap.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -72,11 +73,26 @@ struct xrep_abt {
/* All OWN_AG blocks. */
struct xfs_bitmap old_allocbt_blocks;
+ /*
+ * New bnobt information. All btree block reservations are added to
+ * the reservation list in new_bnobt_info.
+ */
+ struct xrep_newbt new_bnobt_info;
+
+ /* new cntbt information */
+ struct xrep_newbt new_cntbt_info;
+
/* Free space extents. */
struct xfbma *free_records;
struct xfs_scrub *sc;
+ /* Number of non-null records in @free_records. */
+ uint64_t nr_real_records;
+
+ /* get_data()'s position in the free space record array. */
+ uint64_t iter;
+
/*
* Next block we anticipate seeing in the rmap records. If the next
* rmap record is greater than next_bno, we have found unused space.
@@ -85,6 +101,9 @@ struct xrep_abt {
/* Number of free blocks in this AG. */
xfs_agblock_t nr_blocks;
+
+ /* Longest free extent we found in the AG. */
+ xfs_agblock_t longest;
};
/* Record extents that aren't in use from gaps in the rmap records. */
@@ -151,9 +170,12 @@ xrep_abt_walk_agfl(
return xfs_bitmap_set(&ra->not_allocbt_blocks, fsb, 1);
}
-/* Compare two free space extents. */
+/*
+ * Compare two free space extents by block number. We want to sort by block
+ * number.
+ */
static int
-xrep_abt_extent_cmp(
+xrep_bnobt_extent_cmp(
const void *a,
const void *b)
{
@@ -168,93 +190,30 @@ xrep_abt_extent_cmp(
}
/*
- * Add a free space record back into the bnobt/cntbt. It is assumed that the
- * space is already accounted for in fdblocks, so we use a special per-AG
- * reservation code to skip the fdblocks update.
+ * Compare two free space extents by length and then block number. We want
+ * to sort first in order of decreasing length and then in increasing block
+ * number.
*/
-STATIC int
-xrep_abt_free_extent(
- const void *item,
- void *priv)
-{
- struct xrep_abt *ra = priv;
- struct xfs_scrub *sc = ra->sc;
- const struct xrep_abt_extent *rae = item;
- xfs_fsblock_t fsbno;
- int error;
-
- fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, rae->bno);
-
- error = xfs_free_extent(sc->tp, fsbno, rae->len,
- &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_IGNORE);
- if (error)
- return error;
- return xrep_roll_ag_trans(sc);
-}
-
-/* Find the longest free extent in the list. */
static int
-xrep_abt_get_longest(
- struct xrep_abt *ra,
- struct xrep_abt_extent *longest)
-{
- struct xrep_abt_extent rae;
- uint64_t victim = -1ULL;
- uint64_t i;
-
- longest->len = 0;
- foreach_xfbma_item(ra->free_records, i, rae) {
- if (rae.len > longest->len) {
- memcpy(longest, &rae, sizeof(*longest));
- victim = i;
- }
- }
-
- if (longest->len == 0)
- return 0;
- return xfbma_nullify(ra->free_records, victim);
-}
-
-/*
- * Allocate a block from the (cached) first extent in the AG. In theory
- * this should never fail, since we already checked that there was enough
- * space to handle the new btrees.
- */
-STATIC xfs_agblock_t
-xrep_abt_alloc_block(
- struct xrep_abt *ra)
+xrep_cntbt_extent_cmp(
+ const void *a,
+ const void *b)
{
- struct xrep_abt_extent ext = { 0 };
- uint64_t i;
- xfs_agblock_t agbno;
- int error;
+ const struct xrep_abt_extent *ap = a;
+ const struct xrep_abt_extent *bp = b;
- /* Pull the first free space extent off the list, and... */
- foreach_xfbma_item(ra->free_records, i, ext) {
- break;
- }
- if (ext.len == 0)
- return NULLAGBLOCK;
-
- /* ...take its first block. */
- agbno = ext.bno;
- ext.bno++;
- ext.len--;
- if (ext.len)
- error = xfbma_set(ra->free_records, i, &ext);
- else
- error = xfbma_nullify(ra->free_records, i);
- if (error)
- return NULLAGBLOCK;
- return agbno;
+ if (ap->len > bp->len)
+ return 1;
+ else if (ap->len < bp->len)
+ return -1;
+ return xrep_bnobt_extent_cmp(a, b);
}
/*
- * Iterate all reverse mappings to find (1) the free extents, (2) the OWN_AG
- * extents, (3) the rmapbt blocks, and (4) the AGFL blocks. The free space is
- * (1) + (2) - (3) - (4). Figure out if we have enough free space to
- * reconstruct the free space btrees. Caller must clean up the input lists
- * if something goes wrong.
+ * Iterate all reverse mappings to find (1) the gaps between rmap records (all
+ * unowned space), (2) the OWN_AG extents (which encompass the free space
+ * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
+ * blocks. The free space is (1) + (2) - (3) - (4).
*/
STATIC int
xrep_abt_find_freespace(
@@ -264,7 +223,6 @@ xrep_abt_find_freespace(
struct xfs_btree_cur *cur;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t agend;
- xfs_agblock_t nr_blocks;
int error;
xfs_bitmap_init(&ra->not_allocbt_blocks);
@@ -274,7 +232,7 @@ xrep_abt_find_freespace(
* mappings, all the OWN_AG blocks, and all the rmapbt extents.
*/
cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
- error = xfs_rmap_query_all(cur, xrep_abt_walk_rmap, &ra);
+ error = xfs_rmap_query_all(cur, xrep_abt_walk_rmap, ra);
xfs_btree_del_cursor(cur, error);
if (error)
goto err;
@@ -294,200 +252,376 @@ xrep_abt_find_freespace(
/* Collect all the AGFL blocks. */
error = xfs_agfl_walk(mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
- sc->sa.agfl_bp, xrep_abt_walk_agfl, &ra);
+ sc->sa.agfl_bp, xrep_abt_walk_agfl, ra);
if (error)
goto err;
- /*
- * Do we have enough space to rebuild both freespace btrees? We won't
- * touch the AG if we've exceeded the per-AG reservation or if we don't
- * have enough free space to store the free space information.
- */
- nr_blocks = 2 * xfs_allocbt_calc_size(mp,
- xfbma_length(ra->free_records));
- if (!xrep_ag_has_space(sc->sa.pag, 0, XFS_AG_RESV_NONE) ||
- ra->nr_blocks < nr_blocks) {
- error = -ENOSPC;
- goto err;
- }
-
/* Compute the old bnobt/cntbt blocks. */
error = xfs_bitmap_disunion(&ra->old_allocbt_blocks,
&ra->not_allocbt_blocks);
+ if (error)
+ goto err;
+
+ ra->nr_real_records = xfbma_length(ra->free_records);
err:
xfs_bitmap_destroy(&ra->not_allocbt_blocks);
return error;
}
/*
- * Reset the global free block counter and the per-AG counters to make it look
- * like this AG has no free space.
+ * We're going to use the observed free space records to reserve blocks for the
+ * new free space btrees, so we play an iterative game where we try to converge
+ * on the number of blocks we need:
+ *
+ * 1. Estimate how many blocks we'll need to store the records.
+ * 2. If the first free record has more blocks than we need, we're done.
+ * We will have to re-sort the records prior to building the cntbt.
+ * 3. If that record has exactly the number of blocks we need, null out the
+ * record. We're done.
+ * 4. Otherwise, we still need more blocks. Null out the record, subtract its
+ * length from the number of blocks we need, and go back to step 1.
+ *
+ * Fortunately, we don't have to do any transaction work to play this game, so
+ * we don't have to tear down the staging cursors.
*/
STATIC int
-xrep_abt_reset_counters(
- struct xfs_scrub *sc,
- int *log_flags)
+xrep_abt_reserve_space(
+ struct xrep_abt *ra,
+ struct xfs_btree_cur *bno_cur,
+ struct xfs_btree_bload *bno_bload,
+ struct xfs_btree_cur *cnt_cur,
+ struct xfs_btree_bload *cnt_bload,
+ bool *need_resort)
{
- struct xfs_perag *pag = sc->sa.pag;
- struct xfs_agf *agf;
- xfs_agblock_t new_btblks;
- xfs_agblock_t to_free;
+ struct xfs_scrub *sc = ra->sc;
+ uint64_t record_nr = xfbma_length(ra->free_records) - 1;
+ unsigned int allocated = 0;
+ int error = 0;
- /*
- * Since we're abandoning the old bnobt/cntbt, we have to decrease
- * fdblocks by the # of blocks in those trees. btreeblks counts the
- * non-root blocks of the free space and rmap btrees. Do this before
- * resetting the AGF counters.
- */
- agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ *need_resort = false;
+ do {
+ struct xrep_abt_extent rae;
+ uint64_t required;
+ unsigned int desired;
+ unsigned int found;
- /* rmap_blocks accounts root block, btreeblks doesn't */
- new_btblks = be32_to_cpu(agf->agf_rmap_blocks) - 1;
+ /* Compute how many blocks we'll need. */
+ error = xfs_btree_bload_init(cnt_cur, cnt_bload,
+ ra->nr_real_records, 0, 0);
+ if (error)
+ break;
- /* btreeblks doesn't account bno/cnt root blocks */
- to_free = pag->pagf_btreeblks + 2;
+ error = xfs_btree_bload_init(bno_cur, bno_bload,
+ ra->nr_real_records, 0, 0);
+ if (error)
+ break;
- /* and don't account for the blocks we aren't freeing */
- to_free -= new_btblks;
+ /* How many btree blocks do we need to store all records? */
+ required = cnt_bload->nr_blocks + bno_bload->nr_blocks;
+ ASSERT(required < INT_MAX);
- /*
- * Reset the per-AG info, both incore and ondisk. Mark the incore
- * state stale in case we fail out of here.
- */
- ASSERT(pag->pagf_init);
- pag->pagf_init = 0;
- pag->pagf_btreeblks = new_btblks;
- pag->pagf_freeblks = 0;
- pag->pagf_longest = 0;
+ /* If we've reserved enough blocks, we're done. */
+ if (allocated >= required)
+ break;
- agf->agf_btreeblks = cpu_to_be32(new_btblks);
- agf->agf_freeblks = 0;
- agf->agf_longest = 0;
- *log_flags |= XFS_AGF_BTREEBLKS | XFS_AGF_LONGEST | XFS_AGF_FREEBLKS;
+ desired = required - allocated;
- return 0;
+ /* We need space but there's none left; bye! */
+ if (ra->nr_real_records == 0) {
+ error = -ENOSPC;
+ break;
+ }
+
+ /* Grab the first record from the list. */
+ error = xfbma_get(ra->free_records, record_nr, &rae);
+ if (error)
+ break;
+
+ ASSERT(rae.len <= UINT_MAX);
+ found = min_t(unsigned int, rae.len, desired);
+
+ error = xrep_newbt_add_reservation(&ra->new_bnobt_info,
+ XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, rae.bno),
+ found);
+ if (error)
+ break;
+ allocated += found;
+ ra->nr_blocks -= found;
+
+ if (rae.len > desired) {
+ /*
+ * Record has more space than we need. The number of
+ * free records doesn't change, so shrink the free
+ * record and exit the loop.
+ */
+ rae.bno += desired;
+ rae.len -= desired;
+ error = xfbma_set(ra->free_records, record_nr, &rae);
+ if (error)
+ break;
+ *need_resort = true;
+ break;
+ } else {
+ /*
+ * We're going to use up the entire record, so nullify
+ * it and move on to the next one. This changes the
+ * number of free records, so we must go around the
+ * loop once more to re-run _bload_init.
+ */
+ error = xfbma_nullify(ra->free_records, record_nr);
+ if (error)
+ break;
+ ra->nr_real_records--;
+ record_nr--;
+ }
+ } while (1);
+
+ return error;
}
-/* Initialize a new free space btree root and implant into AGF. */
+/*
+ * Deal with all the space we reserved. Blocks that were allocated for the
+ * free space btrees need to have a (deferred) rmap added for the OWN_AG
+ * allocation, and blocks that didn't get used can be freed via the usual
+ * (deferred) means.
+ */
STATIC int
-xrep_abt_reset_btree(
- struct xrep_abt *ra,
- xfs_btnum_t btnum)
+xrep_abt_dispose_reservations(
+ struct xrep_abt *ra)
{
+ struct xrep_newbt_resv *resv, *n;
struct xfs_scrub *sc = ra->sc;
- struct xfs_buf *bp;
- struct xfs_perag *pag = sc->sa.pag;
- struct xfs_mount *mp = sc->mp;
- struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
- const struct xfs_buf_ops *ops;
- xfs_agblock_t agbno;
- int error;
+ int error = 0;
+
+ for_each_xrep_newbt_reservation(&ra->new_bnobt_info, resv, n) {
+ /* Add a deferred rmap for each extent we used. */
+ if (resv->used > 0) {
+ error = xfs_rmap_alloc_extent(sc->tp, resv->fsbno,
+ resv->used, XFS_RMAP_OWN_AG, false);
+ if (error)
+ break;
+ }
- /* Allocate new root block. */
- agbno = xrep_abt_alloc_block(ra);
- if (agbno == NULLAGBLOCK)
- return -ENOSPC;
-
- switch (btnum) {
- case XFS_BTNUM_BNOi:
- ops = &xfs_bnobt_buf_ops;
- break;
- case XFS_BTNUM_CNTi:
- ops = &xfs_cntbt_buf_ops;
- break;
- default:
- ASSERT(0);
- return -EFSCORRUPTED;
+ /*
+ * Add a deferred free for each block we didn't use and now
+ * have to add to the free space since the new btrees are
+ * online.
+ */
+ if (resv->used < resv->len)
+ __xfs_bmap_add_free(sc->tp, resv->fsbno + resv->used,
+ resv->len - resv->used, NULL, true);
}
- /* Initialize new tree root. */
- error = xrep_init_btblock(sc, XFS_AGB_TO_FSB(mp, sc->sa.agno, agbno),
- &bp, btnum, ops);
- if (error)
- return error;
+ for_each_xrep_newbt_reservation(&ra->new_bnobt_info, resv, n) {
+ list_del(&resv->list);
+ kmem_free(resv);
+ }
- /* Implant into AGF. */
- agf->agf_roots[btnum] = cpu_to_be32(agbno);
- agf->agf_levels[btnum] = cpu_to_be32(1);
+ return error;
+}
- /* Add rmap records for the btree roots */
- error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
- &XFS_RMAP_OINFO_AG);
- if (error)
- return error;
+/* Retrieve free space data for bulk load. */
+STATIC int
+xrep_abt_get_data(
+ struct xfs_btree_cur *cur,
+ void *priv)
+{
+ struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a;
+ struct xrep_abt *ra = priv;
+ int error;
- /* Reset the incore state. */
- pag->pagf_levels[btnum] = 1;
+ do {
+ error = xfbma_get(ra->free_records, ra->iter++, arec);
+ } while (error == 0 && xfbma_is_null(ra->free_records, arec));
- return 0;
+ ra->longest = max(ra->longest, arec->ar_blockcount);
+ return error;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_abt_bload_alloc(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ void *priv)
+{
+ struct xrep_abt *ra = priv;
+
+ return xrep_newbt_alloc_block(cur, &ra->new_bnobt_info, ptr);
}
-/* Initialize new bnobt/cntbt roots and implant them into the AGF. */
+/*
+ * Reset the AGF counters to reflect the free space btrees that we just
+ * rebuilt, then reinitialize the per-AG data.
+ */
STATIC int
-xrep_abt_reset_btrees(
+xrep_abt_reset_counters(
struct xrep_abt *ra,
- int *log_flags)
+ unsigned int freesp_btreeblks)
{
- int error;
+ struct xfs_scrub *sc = ra->sc;
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_agf *agf;
+ struct xfs_buf *bp;
- error = xrep_abt_reset_btree(ra, XFS_BTNUM_BNOi);
- if (error)
- return error;
- error = xrep_abt_reset_btree(ra, XFS_BTNUM_CNTi);
- if (error)
- return error;
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
- *log_flags |= XFS_AGF_ROOTS | XFS_AGF_LEVELS;
- return 0;
+ /*
+ * Mark the pagf information stale and use the accessor function to
+ * forcibly reload it from the values we just logged. We still own the
+ * AGF buffer so we can safely ignore bp.
+ */
+ ASSERT(pag->pagf_init);
+ pag->pagf_init = 0;
+
+ agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
+ (be32_to_cpu(agf->agf_rmap_blocks) - 1));
+ agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
+ agf->agf_longest = cpu_to_be32(ra->longest);
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
+ XFS_AGF_LONGEST |
+ XFS_AGF_FREEBLKS);
+
+ return xfs_alloc_read_agf(sc->mp, sc->tp, sc->sa.agno, 0, &bp);
}
/*
- * Make our new freespace btree roots permanent so that we can start freeing
- * unused space back into the AG.
+ * Use the collected free space information to stage new free space btrees.
+ * If this is successful we'll return with the new btree root
+ * information logged to the repair transaction but not yet committed.
*/
STATIC int
-xrep_abt_commit_new(
- struct xrep_abt *ra,
- int log_flags)
+xrep_abt_build_new_trees(
+ struct xrep_abt *ra)
{
+ struct xfs_btree_bload bno_bload;
+ struct xfs_btree_bload cnt_bload;
struct xfs_scrub *sc = ra->sc;
+ struct xfs_btree_cur *bno_cur;
+ struct xfs_btree_cur *cnt_cur;
+ bool need_resort;
int error;
- xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, log_flags);
-
- /* Invalidate the old freespace btree blocks and commit. */
- error = xrep_invalidate_blocks(sc, &ra->old_allocbt_blocks);
+ /*
+ * Sort the free extents by length so that we can set up the free space
+ * btrees in as few extents as possible. This reduces the amount of
+ * deferred rmap / free work we have to do at the end.
+ */
+ error = xfbma_sort(ra->free_records, xrep_cntbt_extent_cmp);
if (error)
return error;
- error = xrep_roll_ag_trans(sc);
+
+ /*
+ * Prepare to construct the new btree by reserving disk space for the
+ * new btree and setting up all the accounting information we'll need
+ * to root the new btree while it's under construction and before we
+ * attach it to the AG header.
+ */
+ xrep_newbt_init_bare(&ra->new_bnobt_info, sc);
+ xrep_newbt_init_bare(&ra->new_cntbt_info, sc);
+
+ /* Allocate cursors for the staged btrees. */
+ bno_cur = xfs_allocbt_stage_cursor(sc->mp, sc->tp,
+ &ra->new_bnobt_info.afake, sc->sa.agno, XFS_BTNUM_BNO);
+ cnt_cur = xfs_allocbt_stage_cursor(sc->mp, sc->tp,
+ &ra->new_cntbt_info.afake, sc->sa.agno, XFS_BTNUM_CNT);
+
+ /* Reserve the space we'll need for the new btrees. */
+ error = xrep_abt_reserve_space(ra, bno_cur, &bno_bload, cnt_cur,
+ &cnt_bload, &need_resort);
+ if (error)
+ goto out_cur;
+
+ /*
+ * If we need to re-sort the free extents by length, do so so that we
+ * can put the records into the cntbt in the correct order.
+ */
+ if (need_resort) {
+ error = xfbma_sort(ra->free_records, xrep_cntbt_extent_cmp);
+ if (error)
+ goto out_cur;
+ }
+
+ /* Load the free space by length tree. */
+ ra->iter = 0;
+ ra->longest = 0;
+ error = xfs_btree_bload(cnt_cur, &cnt_bload, xrep_abt_get_data,
+ xrep_abt_bload_alloc, ra);
+ if (error)
+ goto out_cur;
+
+ /* Re-sort the free extents by block number so so that we can put the
+ * records into the bnobt in the correct order.
+ */
+ error = xfbma_sort(ra->free_records, xrep_bnobt_extent_cmp);
+ if (error)
+ goto out_cur;
+
+ /* Load the free space by block number tree. */
+ ra->iter = 0;
+ error = xfs_btree_bload(bno_cur, &bno_bload, xrep_abt_get_data,
+ xrep_abt_bload_alloc, ra);
+ if (error)
+ goto out_cur;
+
+ /*
+ * Install the new btrees in the AG header. After this point the old
+ * btree is no longer accessible and the new tree is live.
+ *
+ * Note: We re-read the AGF here to ensure the buffer type is set
+ * properly. Since we built a new tree without attaching to the AGF
+ * buffer, the buffer item may have fallen off the buffer. This ought
+ * to succeed since the AGF is held across transaction rolls.
+ */
+ error = xfs_read_agf(sc->mp, sc->tp, sc->sa.agno, 0, &sc->sa.agf_bp);
+ if (error)
+ goto out_cur;
+
+ /* Commit our new btrees. */
+ xfs_allocbt_commit_staged_btree(bno_cur, sc->sa.agf_bp);
+ xfs_btree_del_cursor(bno_cur, 0);
+ xfs_allocbt_commit_staged_btree(cnt_cur, sc->sa.agf_bp);
+ xfs_btree_del_cursor(cnt_cur, 0);
+
+ /* Reset the AGF counters now that we've changed the btree shape. */
+ error = xrep_abt_reset_counters(ra, (bno_bload.nr_blocks - 1) +
+ (cnt_bload.nr_blocks - 1));
+ if (error)
+ goto out_newbt;
+
+ /* Dispose of any unused blocks and the accounting infomation. */
+ error = xrep_abt_dispose_reservations(ra);
if (error)
return error;
- /* Now that we've succeeded, mark the incore state valid again. */
- sc->sa.pag->pagf_init = 1;
- return 0;
+ return xrep_roll_ag_trans(sc);
+
+out_cur:
+ xfs_btree_del_cursor(cnt_cur, error);
+ xfs_btree_del_cursor(bno_cur, error);
+out_newbt:
+ xrep_abt_dispose_reservations(ra);
+ return error;
}
-/* Build new free space btrees and dispose of the old one. */
+/*
+ * Now that we've logged the roots of the new btrees, invalidate all of the
+ * old blocks and free them.
+ */
STATIC int
-xrep_abt_rebuild_trees(
+xrep_abt_remove_old_trees(
struct xrep_abt *ra)
{
- struct xrep_abt_extent rae;
struct xfs_scrub *sc = ra->sc;
int error;
- /*
- * Insert the longest free extent in case it's necessary to
- * refresh the AGFL with multiple blocks. If there is no longest
- * extent, we had exactly the free space we needed; we're done.
- */
- error = xrep_abt_get_longest(ra, &rae);
- if (!error && rae.len > 0) {
- error = xrep_abt_free_extent(&rae, ra);
- if (error)
- return error;
- }
+ /* Invalidate the old freespace btree blocks and commit. */
+ error = xrep_invalidate_blocks(sc, &ra->old_allocbt_blocks);
+ if (error)
+ return error;
+ error = xrep_roll_ag_trans(sc);
+ if (error)
+ return error;
/* Free all the OWN_AG blocks that are not in the rmapbt/agfl. */
error = xrep_reap_extents(sc, &ra->old_allocbt_blocks,
@@ -495,8 +629,8 @@ xrep_abt_rebuild_trees(
if (error)
return error;
- /* Insert records into the new btrees. */
- return xfbma_iter_del(ra->free_records, xrep_abt_free_extent, ra);
+ sc->flags |= XREP_RESET_PERAG_RESV;
+ return 0;
}
/* Repair the freespace btrees for some AG. */
@@ -506,7 +640,6 @@ xrep_allocbt(
{
struct xrep_abt *ra;
struct xfs_mount *mp = sc->mp;
- int log_flags = 0;
int error;
/* We require the rmapbt to rebuild anything. */
@@ -543,36 +676,14 @@ xrep_allocbt(
if (error)
goto out_bitmap;
- /* Make sure we got some free space. */
- if (xfbma_length(ra->free_records) == 0) {
- error = -ENOSPC;
- goto out_bitmap;
- }
-
- /*
- * Sort the free extents by block number to avoid bnobt splits when we
- * rebuild the free space btrees.
- */
- error = xfbma_sort(ra->free_records, xrep_abt_extent_cmp);
+ /* Rebuild the free space information. */
+ error = xrep_abt_build_new_trees(ra);
if (error)
goto out_bitmap;
- /*
- * Blow out the old free space btrees. This is the point at which
- * we are no longer able to bail out gracefully.
- */
- error = xrep_abt_reset_counters(sc, &log_flags);
- if (error)
- goto out_bitmap;
- error = xrep_abt_reset_btrees(ra, &log_flags);
- if (error)
- goto out_bitmap;
- error = xrep_abt_commit_new(ra, log_flags);
- if (error)
- goto out_bitmap;
+ /* Kill the old trees. */
+ error = xrep_abt_remove_old_trees(ra);
- /* Now rebuild the freespace information. */
- error = xrep_abt_rebuild_trees(ra);
out_bitmap:
xfs_bitmap_destroy(&ra->old_allocbt_blocks);
xfbma_destroy(ra->free_records);
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 3ecef3883b08..e14279deb0e1 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -378,6 +378,19 @@ xrep_newbt_init(
INIT_LIST_HEAD(&xnr->reservations);
}
+/*
+ * Initialize accounting resources for staging a new btree. Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+ struct xrep_newbt *xnr,
+ struct xfs_scrub *sc)
+{
+ xrep_newbt_init(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+ XFS_AG_RESV_NONE);
+}
+
/* Add a space reservation manually. */
int
xrep_newbt_add_reservation(
@@ -510,7 +523,7 @@ xrep_newbt_alloc_block(
*/
if (xnr->last_resv == NULL) {
list_for_each_entry(resv, &xnr->reservations, list) {
- if (resv->used < xnr->last_resv->len) {
+ if (resv->used < resv->len) {
xnr->last_resv = resv;
break;
}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 547d916ba367..241ddd8fe6dd 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -124,9 +124,13 @@ struct xrep_newbt {
enum xfs_ag_resv_type resv;
};
+#define for_each_xrep_newbt_reservation(xnr, resv, n) \
+ list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list)
+
void xrep_newbt_init(struct xrep_newbt *xba, struct xfs_scrub *sc,
const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
enum xfs_ag_resv_type resv);
+void xrep_newbt_init_bare(struct xrep_newbt *xba, struct xfs_scrub *sc);
int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno,
xfs_extlen_t len);
int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks);