summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKuan-Wei Chiu <visitorckw@gmail.com>2024-04-07 11:39:04 +0800
committerKent Overstreet <kent.overstreet@linux.dev>2024-04-13 14:33:15 -0400
commit7d47f79a607e0be5ef333a8aa15ddc2fcfc151d3 (patch)
tree5139cd451d139f5c8bf241ccc997bdd12a21e287
parentee1276829da498c92f9c4e42e3d327aaa732fcf3 (diff)
bcachefs: Optimize eytzinger0_sort() with bottom-up heapsort
This optimization reduces the average number of comparisons required from 2*n*log2(n) - 3*n + o(n) to n*log2(n) + 0.37*n + o(n). When n is sufficiently large, it results in approximately 50% fewer comparisons. Currently, eytzinger0_sort employs the textbook version of heapsort, where during the heapify process, each level requires two comparisons to determine the maximum among three elements. In contrast, the bottom-up heapsort, during heapify, only compares two children at each level until reaching a leaf node. Then, it backtracks from the leaf node to find the correct position. Since heapify typically continues until very close to the leaf node, the standard heapify requires about 2*log2(n) comparisons, while the bottom-up variant only needs log2(n) comparisons. The experimental data presented below is based on an array generated by get_random_u32(). | N | comparisons(old) | comparisons(new) | time(old) | time(new) | |-------|------------------|------------------|-----------|-----------| | 10000 | 235381 | 136615 | 25545 us | 20366 us | | 20000 | 510694 | 293425 | 31336 us | 18312 us | | 30000 | 800384 | 457412 | 35042 us | 27386 us | | 40000 | 1101617 | 626831 | 48779 us | 38253 us | | 50000 | 1409762 | 799637 | 62238 us | 46950 us | | 60000 | 1721191 | 974521 | 75588 us | 58367 us | | 70000 | 2038536 | 1152171 | 90823 us | 68778 us | | 80000 | 2362958 | 1333472 | 104165 us | 78625 us | | 90000 | 2690900 | 1516065 | 116111 us | 89573 us | | 100000| 3019413 | 1699879 | 133638 us | 100998 us | Refs: BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average, QUICKSORT (if n is not very small) Ingo Wegener Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993 https://doi.org/10.1016/0304-3975(93)90364-Y Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/eytzinger.c105
1 files changed, 88 insertions, 17 deletions
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c
index 0f955c3c76a7..2eaffe37b5e7 100644
--- a/fs/bcachefs/eytzinger.c
+++ b/fs/bcachefs/eytzinger.c
@@ -171,7 +171,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
swap_r_func_t swap_func,
const void *priv)
{
- int i, c, r;
+ int i, j, k;
/* called from 'sort' without swap function, let's pick the default */
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
@@ -188,17 +188,22 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
/* heapify */
for (i = n / 2 - 1; i >= 0; --i) {
- for (r = i; r * 2 + 1 < n; r = c) {
- c = r * 2 + 1;
+ /* Find the sift-down path all the way to the leaves. */
+ for (j = i; k = j * 2 + 1, k + 1 < n;)
+ j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
- if (c + 1 < n &&
- eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
- c++;
+ /* Special case for the last leaf with no sibling. */
+ if (j * 2 + 2 == n)
+ j = j * 2 + 1;
- if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
- break;
+ /* Backtrack to the correct location. */
+ while (j != i && eytzinger0_do_cmp(base, n, size, cmp_func, priv, i, j) >= 0)
+ j = (j - 1) / 2;
- eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ /* Shift the element into its correct place. */
+ for (k = j; j != i;) {
+ j = (j - 1) / 2;
+ eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
}
}
@@ -206,17 +211,22 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
for (i = n - 1; i > 0; --i) {
eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
- for (r = 0; r * 2 + 1 < i; r = c) {
- c = r * 2 + 1;
+ /* Find the sift-down path all the way to the leaves. */
+ for (j = 0; k = j * 2 + 1, k + 1 < i;)
+ j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
- if (c + 1 < i &&
- eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
- c++;
+ /* Special case for the last leaf with no sibling. */
+ if (j * 2 + 2 == i)
+ j = j * 2 + 1;
- if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
- break;
+ /* Backtrack to the correct location. */
+ while (j && eytzinger0_do_cmp(base, n, size, cmp_func, priv, 0, j) >= 0)
+ j = (j - 1) / 2;
- eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ /* Shift the element into its correct place. */
+ for (k = j; j;) {
+ j = (j - 1) / 2;
+ eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
}
}
}
@@ -232,3 +242,64 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
}
+
+#if 0
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/ktime.h>
+
+static u64 cmp_count;
+
+static int mycmp(const void *a, const void *b)
+{
+ u32 _a = *(u32 *)a;
+ u32 _b = *(u32 *)b;
+
+ cmp_count++;
+ if (_a < _b)
+ return -1;
+ else if (_a > _b)
+ return 1;
+ else
+ return 0;
+}
+
+static int test(void)
+{
+ size_t N, i;
+ ktime_t start, end;
+ s64 delta;
+ u32 *arr;
+
+ for (N = 10000; N <= 100000; N += 10000) {
+ arr = kmalloc_array(N, sizeof(u32), GFP_KERNEL);
+ cmp_count = 0;
+
+ for (i = 0; i < N; i++)
+ arr[i] = get_random_u32();
+
+ start = ktime_get();
+ eytzinger0_sort(arr, N, sizeof(u32), mycmp, NULL);
+ end = ktime_get();
+
+ delta = ktime_us_delta(end, start);
+ printk(KERN_INFO "time: %lld\n", delta);
+ printk(KERN_INFO "comparisons: %lld\n", cmp_count);
+
+ u32 prev = 0;
+
+ eytzinger0_for_each(i, N) {
+ if (prev > arr[i])
+ goto err;
+ prev = arr[i];
+ }
+
+ kfree(arr);
+ }
+ return 0;
+
+err:
+ kfree(arr);
+ return -1;
+}
+#endif