From f27b135285d032d9570a1accb00412e111b38878 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Nov 2023 02:20:42 -0500 Subject: Update bcachefs sources to 71a5b27e017d bcachefs: Make backpointer fsck wb flush check more rigorous Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- Makefile | 9 +- include/linux/mean_and_variance.h | 198 -------------------------------------- libbcachefs/backpointers.c | 31 +++--- libbcachefs/btree_gc.c | 3 - libbcachefs/btree_iter.c | 2 - libbcachefs/btree_locking.c | 14 +-- libbcachefs/btree_types.h | 1 - libbcachefs/data_update.c | 2 +- libbcachefs/mean_and_variance.c | 159 ++++++++++++++++++++++++++++++ libbcachefs/mean_and_variance.h | 198 ++++++++++++++++++++++++++++++++++++++ libbcachefs/util.c | 2 +- libbcachefs/util.h | 3 +- linux/mean_and_variance.c | 158 ------------------------------ 14 files changed, 389 insertions(+), 393 deletions(-) delete mode 100644 include/linux/mean_and_variance.h create mode 100644 libbcachefs/mean_and_variance.c create mode 100644 libbcachefs/mean_and_variance.h delete mode 100644 linux/mean_and_variance.c diff --git a/.bcachefs_revision b/.bcachefs_revision index 9acffb57..b0f07a3d 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -676dd269f0f8d59f730ba8e96474448576e4f677 +71a5b27e017df6ebae391da58857b22fdc406276 diff --git a/Makefile b/Makefile index 3bd84874..738c9e20 100644 --- a/Makefile +++ b/Makefile @@ -218,10 +218,11 @@ update-bcachefs-sources: git add include/linux/kmemleak.h cp $(LINUX_DIR)/lib/math/int_sqrt.c linux/ git add linux/int_sqrt.c - cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/ - git add linux/mean_and_variance.c - cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/ - git add include/linux/mean_and_variance.h + rm libbcachefs/mean_and_variance_test.c +# cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/ +# git add linux/mean_and_variance.c +# cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/ +# git add include/linux/mean_and_variance.h cp $(LINUX_DIR)/scripts/Makefile.compiler ./ git add Makefile.compiler $(RM) libbcachefs/*.mod.c diff --git a/include/linux/mean_and_variance.h b/include/linux/mean_and_variance.h deleted file mode 100644 index 64750501..00000000 --- a/include/linux/mean_and_variance.h +++ /dev/null @@ -1,198 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef MEAN_AND_VARIANCE_H_ -#define MEAN_AND_VARIANCE_H_ - -#include -#include -#include -#include - -#define SQRT_U64_MAX 4294967295ULL - -/* - * u128_u: u128 user mode, because not all architectures support a real int128 - * type - */ - -#ifdef __SIZEOF_INT128__ - -typedef struct { - unsigned __int128 v; -} __aligned(16) u128_u; - -static inline u128_u u64_to_u128(u64 a) -{ - return (u128_u) { .v = a }; -} - -static inline u64 u128_lo(u128_u a) -{ - return a.v; -} - -static inline u64 u128_hi(u128_u a) -{ - return a.v >> 64; -} - -static inline u128_u u128_add(u128_u a, u128_u b) -{ - a.v += b.v; - return a; -} - -static inline u128_u u128_sub(u128_u a, u128_u b) -{ - a.v -= b.v; - return a; -} - -static inline u128_u u128_shl(u128_u a, s8 shift) -{ - a.v <<= shift; - return a; -} - -static inline u128_u u128_square(u64 a) -{ - u128_u b = u64_to_u128(a); - - b.v *= b.v; - return b; -} - -#else - -typedef struct { - u64 hi, lo; -} __aligned(16) u128_u; - -/* conversions */ - -static inline u128_u u64_to_u128(u64 a) -{ - return (u128_u) { .lo = a }; -} - -static inline u64 u128_lo(u128_u a) -{ - return a.lo; -} - -static inline u64 u128_hi(u128_u a) -{ - return a.hi; -} - -/* arithmetic */ - -static inline u128_u u128_add(u128_u a, u128_u b) -{ - u128_u c; - - c.lo = a.lo + b.lo; - c.hi = a.hi + b.hi + (c.lo < a.lo); - return c; -} - -static inline u128_u u128_sub(u128_u a, u128_u b) -{ - u128_u c; - - c.lo = a.lo - b.lo; - c.hi = a.hi - b.hi - (c.lo > a.lo); - return c; -} - -static inline u128_u u128_shl(u128_u i, s8 shift) -{ - u128_u r; - - r.lo = i.lo << shift; - if (shift < 64) - r.hi = (i.hi << shift) | (i.lo >> (64 - shift)); - else { - r.hi = i.lo << (shift - 64); - r.lo = 0; - } - return r; -} - -static inline u128_u u128_square(u64 i) -{ - u128_u r; - u64 h = i >> 32, l = i & U32_MAX; - - r = u128_shl(u64_to_u128(h*h), 64); - r = u128_add(r, u128_shl(u64_to_u128(h*l), 32)); - r = u128_add(r, u128_shl(u64_to_u128(l*h), 32)); - r = u128_add(r, u64_to_u128(l*l)); - return r; -} - -#endif - -static inline u128_u u64s_to_u128(u64 hi, u64 lo) -{ - u128_u c = u64_to_u128(hi); - - c = u128_shl(c, 64); - c = u128_add(c, u64_to_u128(lo)); - return c; -} - -u128_u u128_div(u128_u n, u64 d); - -struct mean_and_variance { - s64 n; - s64 sum; - u128_u sum_squares; -}; - -/* expontentially weighted variant */ -struct mean_and_variance_weighted { - bool init; - u8 weight; /* base 2 logarithim */ - s64 mean; - u64 variance; -}; - -/** - * fast_divpow2() - fast approximation for n / (1 << d) - * @n: numerator - * @d: the power of 2 denominator. - * - * note: this rounds towards 0. - */ -static inline s64 fast_divpow2(s64 n, u8 d) -{ - return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d; -} - -/** - * mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1 - * and return it. - * @s1: the mean_and_variance to update. - * @v1: the new sample. - * - * see linked pdf equation 12. - */ -static inline void -mean_and_variance_update(struct mean_and_variance *s, s64 v) -{ - s->n++; - s->sum += v; - s->sum_squares = u128_add(s->sum_squares, u128_square(abs(v))); -} - -s64 mean_and_variance_get_mean(struct mean_and_variance s); -u64 mean_and_variance_get_variance(struct mean_and_variance s1); -u32 mean_and_variance_get_stddev(struct mean_and_variance s); - -void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v); - -s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s); -u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s); -u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s); - -#endif // MEAN_AND_VAIRANCE_H_ diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 2fb96fa9..9b5f5809 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -3,6 +3,7 @@ #include "bbpos.h" #include "alloc_background.h" #include "backpointers.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -404,18 +405,13 @@ int bch2_check_btree_backpointers(struct bch_fs *c) return ret; } -struct bpos_level { - unsigned level; - struct bpos pos; -}; - static int check_bp_exists(struct btree_trans *trans, struct bpos bucket, struct bch_backpointer bp, struct bkey_s_c orig_k, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter bp_iter = { NULL }; @@ -439,14 +435,19 @@ static int check_bp_exists(struct btree_trans *trans, if (bp_k.k->type != KEY_TYPE_backpointer || memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { - if (last_flushed->level != bp.level || - !bpos_eq(last_flushed->pos, orig_k.k->p)) { + if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) || + bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) || + memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) { + if (bp.level) { + bch2_trans_unlock(trans); + bch2_btree_interior_updates_flush(c); + } + ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) goto err; - last_flushed->level = bp.level; - last_flushed->pos = orig_k.k->p; + bch2_bkey_buf_reassemble(last_flushed, c, orig_k); ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } @@ -477,7 +478,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed, + struct bkey_buf *last_flushed, struct bkey_s_c k) { struct bch_fs *c = trans->c; @@ -511,7 +512,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, enum btree_id btree_id, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed, + struct bkey_buf *last_flushed, int *level) { struct bch_fs *c = trans->c; @@ -616,10 +617,13 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct btree_iter iter; enum btree_id btree_id; struct bkey_s_c k; + struct bkey_buf last_flushed; int ret = 0; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, @@ -664,6 +668,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, } } + bch2_bkey_buf_exit(&last_flushed, c); return 0; } diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index d1615607..5a7b72a4 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1087,9 +1087,6 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) unsigned i; int ret = 0; - if (initial) - trans->is_initial_gc = true; - for (i = 0; i < BTREE_ID_NR; i++) ids[i] = i; bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index f430ca83..4d673d47 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2870,8 +2870,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) struct btree_trans *trans; struct btree_transaction_stats *s; - bch2_assert_btree_nodes_not_locked(); - trans = bch2_trans_alloc(c); memset(trans, 0, sizeof(*trans)); diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 89f14b5a..1eca320e 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -10,15 +10,16 @@ void bch2_btree_lock_init(struct btree_bkey_cached_common *b, enum six_lock_init_flags flags) { __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags); -#ifdef CONFIG_DEBUG_LOCK_ALLOC - lockdep_set_no_check_recursion(&b->lock.dep_map); -#endif + lockdep_set_novalidate_class(&b->lock); } #ifdef CONFIG_LOCKDEP void bch2_assert_btree_nodes_not_locked(void) { +#if 0 + //Re-enable when lock_class_is_held() is merged: BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); +#endif } #endif @@ -769,13 +770,6 @@ void bch2_trans_unlock(struct btree_trans *trans) trans_for_each_path(trans, path) __bch2_btree_path_unlock(trans, path); - - /* - * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking - * btree nodes, it implements its own walking: - */ - if (!trans->is_initial_gc) - bch2_assert_btree_nodes_not_locked(); } void bch2_trans_unlock_long(struct btree_trans *trans) diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 2326bceb..ca752660 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -399,7 +399,6 @@ struct btree_trans { bool memory_allocation_failure:1; bool journal_transaction_names:1; bool journal_replay_not_finished:1; - bool is_initial_gc:1; bool notrace_relock_fail:1; bool write_locked:1; enum bch_errcode restarted:16; diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 08c664ca..0d58a872 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -314,7 +314,7 @@ next: } continue; nowork: - if (m->stats && m->stats) { + if (m->stats) { BUG_ON(k.k->p.offset <= iter.pos.offset); atomic64_inc(&m->stats->keys_raced); atomic64_add(k.k->p.offset - iter.pos.offset, diff --git a/libbcachefs/mean_and_variance.c b/libbcachefs/mean_and_variance.c new file mode 100644 index 00000000..1f0801e2 --- /dev/null +++ b/libbcachefs/mean_and_variance.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Functions for incremental mean and variance. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Copyright © 2022 Daniel B. Hill + * + * Author: Daniel B. Hill + * + * Description: + * + * This is includes some incremental algorithms for mean and variance calculation + * + * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + * + * Create a struct and if it's the weighted variant set the w field (weight = 2^k). + * + * Use mean_and_variance[_weighted]_update() on the struct to update it's state. + * + * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation + * is deferred to these functions for performance reasons. + * + * see lib/math/mean_and_variance_test.c for examples of usage. + * + * DO NOT access the mean and variance fields of the weighted variants directly. + * DO NOT change the weight after calling update. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "mean_and_variance.h" + +u128_u u128_div(u128_u n, u64 d) +{ + u128_u r; + u64 rem; + u64 hi = u128_hi(n); + u64 lo = u128_lo(n); + u64 h = hi & ((u64) U32_MAX << 32); + u64 l = (hi & (u64) U32_MAX) << 32; + + r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64); + r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32)); + r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem))); + return r; +} +EXPORT_SYMBOL_GPL(u128_div); + +/** + * mean_and_variance_get_mean() - get mean from @s + */ +s64 mean_and_variance_get_mean(struct mean_and_variance s) +{ + return s.n ? div64_u64(s.sum, s.n) : 0; +} +EXPORT_SYMBOL_GPL(mean_and_variance_get_mean); + +/** + * mean_and_variance_get_variance() - get variance from @s1 + * + * see linked pdf equation 12. + */ +u64 mean_and_variance_get_variance(struct mean_and_variance s1) +{ + if (s1.n) { + u128_u s2 = u128_div(s1.sum_squares, s1.n); + u64 s3 = abs(mean_and_variance_get_mean(s1)); + + return u128_lo(u128_sub(s2, u128_square(s3))); + } else { + return 0; + } +} +EXPORT_SYMBOL_GPL(mean_and_variance_get_variance); + +/** + * mean_and_variance_get_stddev() - get standard deviation from @s + */ +u32 mean_and_variance_get_stddev(struct mean_and_variance s) +{ + return int_sqrt64(mean_and_variance_get_variance(s)); +} +EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev); + +/** + * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update() + * @s1: .. + * @s2: .. + * + * see linked pdf: function derived from equations 140-143 where alpha = 2^w. + * values are stored bitshifted for performance and added precision. + */ +void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x) +{ + // previous weighted variance. + u8 w = s->weight; + u64 var_w0 = s->variance; + // new value weighted. + s64 x_w = x << w; + s64 diff_w = x_w - s->mean; + s64 diff = fast_divpow2(diff_w, w); + // new mean weighted. + s64 u_w1 = s->mean + diff; + + if (!s->init) { + s->mean = x_w; + s->variance = 0; + } else { + s->mean = u_w1; + s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w; + } + s->init = true; +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update); + +/** + * mean_and_variance_weighted_get_mean() - get mean from @s + */ +s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s) +{ + return fast_divpow2(s.mean, s.weight); +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean); + +/** + * mean_and_variance_weighted_get_variance() -- get variance from @s + */ +u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s) +{ + // always positive don't need fast divpow2 + return s.variance >> s.weight; +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance); + +/** + * mean_and_variance_weighted_get_stddev() - get standard deviation from @s + */ +u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s) +{ + return int_sqrt64(mean_and_variance_weighted_get_variance(s)); +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev); + +MODULE_AUTHOR("Daniel B. Hill"); +MODULE_LICENSE("GPL"); diff --git a/libbcachefs/mean_and_variance.h b/libbcachefs/mean_and_variance.h new file mode 100644 index 00000000..64750501 --- /dev/null +++ b/libbcachefs/mean_and_variance.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef MEAN_AND_VARIANCE_H_ +#define MEAN_AND_VARIANCE_H_ + +#include +#include +#include +#include + +#define SQRT_U64_MAX 4294967295ULL + +/* + * u128_u: u128 user mode, because not all architectures support a real int128 + * type + */ + +#ifdef __SIZEOF_INT128__ + +typedef struct { + unsigned __int128 v; +} __aligned(16) u128_u; + +static inline u128_u u64_to_u128(u64 a) +{ + return (u128_u) { .v = a }; +} + +static inline u64 u128_lo(u128_u a) +{ + return a.v; +} + +static inline u64 u128_hi(u128_u a) +{ + return a.v >> 64; +} + +static inline u128_u u128_add(u128_u a, u128_u b) +{ + a.v += b.v; + return a; +} + +static inline u128_u u128_sub(u128_u a, u128_u b) +{ + a.v -= b.v; + return a; +} + +static inline u128_u u128_shl(u128_u a, s8 shift) +{ + a.v <<= shift; + return a; +} + +static inline u128_u u128_square(u64 a) +{ + u128_u b = u64_to_u128(a); + + b.v *= b.v; + return b; +} + +#else + +typedef struct { + u64 hi, lo; +} __aligned(16) u128_u; + +/* conversions */ + +static inline u128_u u64_to_u128(u64 a) +{ + return (u128_u) { .lo = a }; +} + +static inline u64 u128_lo(u128_u a) +{ + return a.lo; +} + +static inline u64 u128_hi(u128_u a) +{ + return a.hi; +} + +/* arithmetic */ + +static inline u128_u u128_add(u128_u a, u128_u b) +{ + u128_u c; + + c.lo = a.lo + b.lo; + c.hi = a.hi + b.hi + (c.lo < a.lo); + return c; +} + +static inline u128_u u128_sub(u128_u a, u128_u b) +{ + u128_u c; + + c.lo = a.lo - b.lo; + c.hi = a.hi - b.hi - (c.lo > a.lo); + return c; +} + +static inline u128_u u128_shl(u128_u i, s8 shift) +{ + u128_u r; + + r.lo = i.lo << shift; + if (shift < 64) + r.hi = (i.hi << shift) | (i.lo >> (64 - shift)); + else { + r.hi = i.lo << (shift - 64); + r.lo = 0; + } + return r; +} + +static inline u128_u u128_square(u64 i) +{ + u128_u r; + u64 h = i >> 32, l = i & U32_MAX; + + r = u128_shl(u64_to_u128(h*h), 64); + r = u128_add(r, u128_shl(u64_to_u128(h*l), 32)); + r = u128_add(r, u128_shl(u64_to_u128(l*h), 32)); + r = u128_add(r, u64_to_u128(l*l)); + return r; +} + +#endif + +static inline u128_u u64s_to_u128(u64 hi, u64 lo) +{ + u128_u c = u64_to_u128(hi); + + c = u128_shl(c, 64); + c = u128_add(c, u64_to_u128(lo)); + return c; +} + +u128_u u128_div(u128_u n, u64 d); + +struct mean_and_variance { + s64 n; + s64 sum; + u128_u sum_squares; +}; + +/* expontentially weighted variant */ +struct mean_and_variance_weighted { + bool init; + u8 weight; /* base 2 logarithim */ + s64 mean; + u64 variance; +}; + +/** + * fast_divpow2() - fast approximation for n / (1 << d) + * @n: numerator + * @d: the power of 2 denominator. + * + * note: this rounds towards 0. + */ +static inline s64 fast_divpow2(s64 n, u8 d) +{ + return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d; +} + +/** + * mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1 + * and return it. + * @s1: the mean_and_variance to update. + * @v1: the new sample. + * + * see linked pdf equation 12. + */ +static inline void +mean_and_variance_update(struct mean_and_variance *s, s64 v) +{ + s->n++; + s->sum += v; + s->sum_squares = u128_add(s->sum_squares, u128_square(abs(v))); +} + +s64 mean_and_variance_get_mean(struct mean_and_variance s); +u64 mean_and_variance_get_variance(struct mean_and_variance s1); +u32 mean_and_variance_get_stddev(struct mean_and_variance s); + +void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v); + +s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s); +u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s); +u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s); + +#endif // MEAN_AND_VAIRANCE_H_ diff --git a/libbcachefs/util.c b/libbcachefs/util.c index c3303b02..e85b1f46 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -22,9 +22,9 @@ #include #include #include -#include #include "eytzinger.h" +#include "mean_and_variance.h" #include "util.h" static const char si_units[] = "?kMGTPEZY"; diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 54e309d9..b93d5f48 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -17,7 +17,8 @@ #include #include #include -#include + +#include "mean_and_variance.h" #include "darray.h" diff --git a/linux/mean_and_variance.c b/linux/mean_and_variance.c deleted file mode 100644 index eb5f2ba0..00000000 --- a/linux/mean_and_variance.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Functions for incremental mean and variance. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * Copyright © 2022 Daniel B. Hill - * - * Author: Daniel B. Hill - * - * Description: - * - * This is includes some incremental algorithms for mean and variance calculation - * - * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf - * - * Create a struct and if it's the weighted variant set the w field (weight = 2^k). - * - * Use mean_and_variance[_weighted]_update() on the struct to update it's state. - * - * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation - * is deferred to these functions for performance reasons. - * - * see lib/math/mean_and_variance_test.c for examples of usage. - * - * DO NOT access the mean and variance fields of the weighted variants directly. - * DO NOT change the weight after calling update. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -u128_u u128_div(u128_u n, u64 d) -{ - u128_u r; - u64 rem; - u64 hi = u128_hi(n); - u64 lo = u128_lo(n); - u64 h = hi & ((u64) U32_MAX << 32); - u64 l = (hi & (u64) U32_MAX) << 32; - - r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64); - r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32)); - r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem))); - return r; -} -EXPORT_SYMBOL_GPL(u128_div); - -/** - * mean_and_variance_get_mean() - get mean from @s - */ -s64 mean_and_variance_get_mean(struct mean_and_variance s) -{ - return s.n ? div64_u64(s.sum, s.n) : 0; -} -EXPORT_SYMBOL_GPL(mean_and_variance_get_mean); - -/** - * mean_and_variance_get_variance() - get variance from @s1 - * - * see linked pdf equation 12. - */ -u64 mean_and_variance_get_variance(struct mean_and_variance s1) -{ - if (s1.n) { - u128_u s2 = u128_div(s1.sum_squares, s1.n); - u64 s3 = abs(mean_and_variance_get_mean(s1)); - - return u128_lo(u128_sub(s2, u128_square(s3))); - } else { - return 0; - } -} -EXPORT_SYMBOL_GPL(mean_and_variance_get_variance); - -/** - * mean_and_variance_get_stddev() - get standard deviation from @s - */ -u32 mean_and_variance_get_stddev(struct mean_and_variance s) -{ - return int_sqrt64(mean_and_variance_get_variance(s)); -} -EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev); - -/** - * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update() - * @s1: .. - * @s2: .. - * - * see linked pdf: function derived from equations 140-143 where alpha = 2^w. - * values are stored bitshifted for performance and added precision. - */ -void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x) -{ - // previous weighted variance. - u8 w = s->weight; - u64 var_w0 = s->variance; - // new value weighted. - s64 x_w = x << w; - s64 diff_w = x_w - s->mean; - s64 diff = fast_divpow2(diff_w, w); - // new mean weighted. - s64 u_w1 = s->mean + diff; - - if (!s->init) { - s->mean = x_w; - s->variance = 0; - } else { - s->mean = u_w1; - s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w; - } - s->init = true; -} -EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update); - -/** - * mean_and_variance_weighted_get_mean() - get mean from @s - */ -s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s) -{ - return fast_divpow2(s.mean, s.weight); -} -EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean); - -/** - * mean_and_variance_weighted_get_variance() -- get variance from @s - */ -u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s) -{ - // always positive don't need fast divpow2 - return s.variance >> s.weight; -} -EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance); - -/** - * mean_and_variance_weighted_get_stddev() - get standard deviation from @s - */ -u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s) -{ - return int_sqrt64(mean_and_variance_weighted_get_variance(s)); -} -EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev); - -MODULE_AUTHOR("Daniel B. Hill"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3