summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-07-14 11:06:03 -0700
committerDarrick J. Wong <djwong@kernel.org>2022-11-09 19:07:25 -0800
commit5ff6cdc9e0b79b552ac3b1c861a13f47907a587e (patch)
tree3c04f492c28a818fe4f2fdcbe37c745fb6fc825d
parent60cef46bccc678235a132d2c1ece93ad8cb1a5aa (diff)
xfs: improve xfarray quicksort pivotbig-array_2022-11-09
Now that we have the means to do insertion sorts of small in-memory subsets of an xfarray, use it to improve the quicksort pivot algorithm by reading 7 records into memory and finding the median of that. This should prevent bad partitioning when a[lo] and a[hi] end up next to each other in the final sort, which can happen when sorting for cntbt repair when the free space is extremely fragmented (e.g. generic/176). This doesn't speed up the average quicksort run by much, but it will (hopefully) avoid the quadratic time collapse for which quicksort is famous. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/scrub/xfarray.c198
-rw-r--r--fs/xfs/scrub/xfarray.h19
2 files changed, 148 insertions, 69 deletions
diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c
index bed1d2c760c8..2afd36c9ca61 100644
--- a/fs/xfs/scrub/xfarray.c
+++ b/fs/xfs/scrub/xfarray.c
@@ -428,6 +428,14 @@ static inline xfarray_idx_t *xfarray_sortinfo_hi(struct xfarray_sortinfo *si)
return xfarray_sortinfo_lo(si) + si->max_stack_depth;
}
+/* Size of each element in the quicksort pivot array. */
+static inline size_t
+xfarray_pivot_rec_sz(
+ struct xfarray *array)
+{
+ return round_up(array->obj_size, 8) + sizeof(xfarray_idx_t);
+}
+
/* Allocate memory to handle the sort. */
static inline int
xfarray_sortinfo_alloc(
@@ -438,9 +446,17 @@ xfarray_sortinfo_alloc(
{
struct xfarray_sortinfo *si;
size_t nr_bytes = sizeof(struct xfarray_sortinfo);
+ size_t pivot_rec_sz = xfarray_pivot_rec_sz(array);
int max_stack_depth;
/*
+ * The median-of-nine pivot algorithm doesn't work if a subset has
+ * fewer than 9 items. Make sure the in-memory sort will always take
+ * over for subsets where this wouldn't be the case.
+ */
+ BUILD_BUG_ON(XFARRAY_QSORT_PIVOT_NR >= XFARRAY_ISORT_NR);
+
+ /*
* Tail-call recursion during the partitioning phase means that
* quicksort will never recurse more than log2(nr) times. We need one
* extra level of stack to hold the initial parameters. In-memory
@@ -454,8 +470,10 @@ xfarray_sortinfo_alloc(
/* Each level of quicksort uses a lo and a hi index */
nr_bytes += max_stack_depth * sizeof(xfarray_idx_t) * 2;
- /* Scratchpad for in-memory sort, or one record for the pivot */
- nr_bytes += (XFARRAY_ISORT_NR * array->obj_size);
+ /* Scratchpad for in-memory sort, or finding the pivot */
+ nr_bytes += max_t(size_t,
+ (XFARRAY_QSORT_PIVOT_NR + 1) * pivot_rec_sz,
+ XFARRAY_ISORT_NR * array->obj_size);
si = kvzalloc(nr_bytes, XCHK_GFP_FLAGS);
if (!si)
@@ -642,14 +660,43 @@ static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si)
return xfarray_sortinfo_hi(si) + si->max_stack_depth;
}
+/* Return a pointer to the start of the pivot array. */
+static inline void *
+xfarray_sortinfo_pivot_array(
+ struct xfarray_sortinfo *si)
+{
+ return xfarray_sortinfo_pivot(si) + si->array->obj_size;
+}
+
+/* The xfarray record is stored at the start of each pivot array element. */
+static inline void *
+xfarray_pivot_array_rec(
+ void *pa,
+ size_t pa_recsz,
+ unsigned int pa_idx)
+{
+ return pa + (pa_recsz * pa_idx);
+}
+
+/* The xfarray index is stored at the end of each pivot array element. */
+static inline xfarray_idx_t *
+xfarray_pivot_array_idx(
+ void *pa,
+ size_t pa_recsz,
+ unsigned int pa_idx)
+{
+ return xfarray_pivot_array_rec(pa, pa_recsz, pa_idx + 1) -
+ sizeof(xfarray_idx_t);
+}
+
/*
* Find a pivot value for quicksort partitioning, swap it with a[lo], and save
* the cached pivot record for the next step.
*
- * Select the median value from a[lo], a[mid], and a[hi]. Put the median in
- * a[lo], the lowest in a[mid], and the highest in a[hi]. Using the median of
- * the three reduces the chances that we pick the worst case pivot value, since
- * it's likely that our array values are nearly sorted.
+ * Load evenly-spaced records within the given range into memory, sort them,
+ * and choose the pivot from the median record. Using multiple points will
+ * improve the quality of the pivot selection, and hopefully avoid the worst
+ * quicksort behavior, since our array values are nearly always evenly sorted.
*/
STATIC int
xfarray_qsort_pivot(
@@ -657,76 +704,99 @@ xfarray_qsort_pivot(
xfarray_idx_t lo,
xfarray_idx_t hi)
{
- void *a = xfarray_sortinfo_pivot(si);
- void *b = xfarray_scratch(si->array);
- xfarray_idx_t mid = lo + ((hi - lo) / 2);
+ void *pivot = xfarray_sortinfo_pivot(si);
+ void *parray = xfarray_sortinfo_pivot_array(si);
+ void *recp;
+ xfarray_idx_t *idxp;
+ xfarray_idx_t step = (hi - lo) / (XFARRAY_QSORT_PIVOT_NR - 1);
+ size_t pivot_rec_sz = xfarray_pivot_rec_sz(si->array);
+ int i, j;
int error;
- /* if a[mid] < a[lo], swap a[mid] and a[lo]. */
- error = xfarray_sort_load(si, mid, a);
- if (error)
- return error;
- error = xfarray_sort_load(si, lo, b);
- if (error)
- return error;
- if (xfarray_sort_cmp(si, a, b) < 0) {
- error = xfarray_sort_store(si, lo, a);
- if (error)
- return error;
- error = xfarray_sort_store(si, mid, b);
- if (error)
- return error;
- }
+ ASSERT(step > 0);
- /* if a[hi] < a[mid], swap a[mid] and a[hi]. */
- error = xfarray_sort_load(si, hi, a);
- if (error)
- return error;
- error = xfarray_sort_load(si, mid, b);
- if (error)
- return error;
- if (xfarray_sort_cmp(si, a, b) < 0) {
- error = xfarray_sort_store(si, mid, a);
- if (error)
- return error;
- error = xfarray_sort_store(si, hi, b);
- if (error)
- return error;
- } else {
- goto move_front;
+ /*
+ * Load the xfarray indexes of the records we intend to sample into the
+ * pivot array.
+ */
+ idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, 0);
+ *idxp = lo;
+ for (i = 1; i < XFARRAY_QSORT_PIVOT_NR - 1; i++) {
+ idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
+ *idxp = lo + (i * step);
}
+ idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
+ XFARRAY_QSORT_PIVOT_NR - 1);
+ *idxp = hi;
- /* if a[mid] < a[lo], swap a[mid] and a[lo]. */
- error = xfarray_sort_load(si, mid, a);
- if (error)
- return error;
- error = xfarray_sort_load(si, lo, b);
- if (error)
- return error;
- if (xfarray_sort_cmp(si, a, b) < 0) {
- error = xfarray_sort_store(si, lo, a);
- if (error)
- return error;
- error = xfarray_sort_store(si, mid, b);
+ /* Load the selected xfarray records into the pivot array. */
+ for (i = 0; i < XFARRAY_QSORT_PIVOT_NR; i++) {
+ xfarray_idx_t idx;
+
+ recp = xfarray_pivot_array_rec(parray, pivot_rec_sz, i);
+ idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
+
+ /* No unset records; load directly into the array. */
+ if (likely(si->array->unset_slots == 0)) {
+ error = xfarray_sort_load(si, *idxp, recp);
+ if (error)
+ return error;
+ continue;
+ }
+
+ /*
+ * Load non-null records into the scratchpad without changing
+ * the xfarray_idx_t in the pivot array.
+ */
+ idx = *idxp;
+ xfarray_sort_bump_loads(si);
+ error = xfarray_load_next(si->array, &idx, recp);
if (error)
return error;
}
-move_front:
+ xfarray_sort_bump_heapsorts(si);
+ sort(parray, XFARRAY_QSORT_PIVOT_NR, pivot_rec_sz, si->cmp_fn, NULL);
+
/*
- * Move our selected pivot to a[lo]. Recall that a == si->pivot, so
- * this leaves us with the pivot cached in the sortinfo structure.
+ * We sorted the pivot array records (which includes the xfarray
+ * indices) in xfarray record order. The median element of the pivot
+ * array contains the xfarray record that we will use as the pivot.
+ * Copy that xfarray record to the designated space.
*/
- error = xfarray_sort_load(si, lo, b);
- if (error)
- return error;
- error = xfarray_sort_load(si, mid, a);
- if (error)
- return error;
- error = xfarray_sort_store(si, mid, b);
+ recp = xfarray_pivot_array_rec(parray, pivot_rec_sz,
+ XFARRAY_QSORT_PIVOT_NR / 2);
+ memcpy(pivot, recp, si->array->obj_size);
+
+ /* If the pivot record we chose was already in a[lo] then we're done. */
+ idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
+ XFARRAY_QSORT_PIVOT_NR / 2);
+ if (*idxp == lo)
+ return 0;
+
+ /*
+ * Find the cached copy of a[lo] in the pivot array so that we can swap
+ * a[lo] and a[pivot].
+ */
+ for (i = 0, j = -1; i < XFARRAY_QSORT_PIVOT_NR; i++) {
+ idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
+ if (*idxp == lo)
+ j = i;
+ }
+ if (j < 0) {
+ ASSERT(j >= 0);
+ return -EFSCORRUPTED;
+ }
+
+ /* Swap a[lo] and a[pivot]. */
+ error = xfarray_sort_store(si, lo, pivot);
if (error)
return error;
- return xfarray_sort_store(si, lo, a);
+
+ recp = xfarray_pivot_array_rec(parray, pivot_rec_sz, j);
+ idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
+ XFARRAY_QSORT_PIVOT_NR / 2);
+ return xfarray_sort_store(si, *idxp, recp);
}
/*
@@ -837,7 +907,7 @@ xfarray_sort_load_cached(
* particularly expensive in the kernel.
*
* 2. For arrays with records in arbitrary or user-controlled order, choose the
- * pivot element using a median-of-three decision tree. This reduces the
+ * pivot element using a median-of-nine decision tree. This reduces the
* probability of selecting a bad pivot value which causes worst case
* behavior (i.e. partition sizes of 1).
*
diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h
index d2b5e55c6886..7545eb48680c 100644
--- a/fs/xfs/scrub/xfarray.h
+++ b/fs/xfs/scrub/xfarray.h
@@ -63,6 +63,9 @@ typedef cmp_func_t xfarray_cmp_fn;
#define XFARRAY_ISORT_SHIFT (4)
#define XFARRAY_ISORT_NR (1U << XFARRAY_ISORT_SHIFT)
+/* Evalulate this many points to find the qsort pivot. */
+#define XFARRAY_QSORT_PIVOT_NR (9)
+
struct xfarray_sortinfo {
struct xfarray *array;
@@ -95,7 +98,6 @@ struct xfarray_sortinfo {
uint64_t compares;
uint64_t heapsorts;
#endif
-
/*
* Extra bytes are allocated beyond the end of the structure to store
* quicksort information. C does not permit multiple VLAs per struct,
@@ -118,11 +120,18 @@ struct xfarray_sortinfo {
* xfarray_rec_t scratch[ISORT_NR];
*
* Otherwise, we want to partition the records to partition the array.
- * We store the chosen pivot record here and use the xfarray scratchpad
- * to rearrange the array around the pivot:
- *
- * xfarray_rec_t pivot;
+ * We store the chosen pivot record at the start of the scratchpad area
+ * and use the rest to sample some records to estimate the median.
+ * The format of the qsort_pivot array enables us to use the kernel
+ * heapsort function to place the median value in the middle.
*
+ * struct {
+ * xfarray_rec_t pivot;
+ * struct {
+ * xfarray_rec_t rec; (rounded up to 8 bytes)
+ * xfarray_idx_t idx;
+ * } qsort_pivot[QSORT_PIVOT_NR];
+ * };
* }
*/
};