diff options
author | Daniel Hill <daniel@gluo.nz> | 2022-08-06 14:48:49 +1200 |
---|---|---|
committer | Daniel Hill <daniel@gluo.nz> | 2022-09-04 15:28:11 +1200 |
commit | c80870627dd4a4c0719d0f8fa2c3d432c446dbac (patch) | |
tree | 63602cd5c3eb1b3661247d1944f8ce9f33698391 | |
parent | 8a438c24d3db8930b0e64c8a383669874336d8a7 (diff) |
lib: add mean and variance module.
This module provides a fast 64bit implementation of basic statistics
functions, including mean, variance and standard deviation in both
weighted and unweighted variants, the unweighted variant has a 32bit
limitation per sample to prevent overflow when squaring.
Signed-off-by: Daniel Hill <daniel@gluo.nz>
-rw-r--r-- | MAINTAINERS | 9 | ||||
-rw-r--r-- | include/linux/mean_and_variance.h | 37 | ||||
-rw-r--r-- | lib/math/Kconfig | 8 | ||||
-rw-r--r-- | lib/math/Makefile | 2 | ||||
-rw-r--r-- | lib/math/mean_and_variance.c | 183 | ||||
-rw-r--r-- | lib/math/mean_and_variance_test.c | 161 |
6 files changed, 400 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 64379c699903..5dd800e78290 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12311,6 +12311,15 @@ F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt F: drivers/net/ieee802154/mcr20a.c F: drivers/net/ieee802154/mcr20a.h +MEAN AND VARIANCE LIBRARY +M: Daniel B. Hill <daniel@gluo.nz> +M: Kent Overstreet <kent.overstreet@linux.dev> +S: Maintained +T: git https://github.com/YellowOnion/linux/ +F: lib/math/mean_and_variance.c +F: lib/math/mean_and_variance_test.c +F: include/linux/mean_and_variance.h + MEASUREMENT COMPUTING CIO-DAC IIO DRIVER M: William Breathitt Gray <vilhelm.gray@gmail.com> L: linux-iio@vger.kernel.org diff --git a/include/linux/mean_and_variance.h b/include/linux/mean_and_variance.h new file mode 100644 index 000000000000..0a64c1be9d20 --- /dev/null +++ b/include/linux/mean_and_variance.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef STATS_H_ +#define STATS_H_ + +#include <linux/types.h> + +#define SQRT_U64_MAX 4294967295ULL + +struct mean_and_variance { + s64 n; + s64 sum; + u64 sum_squares; +}; + +/* expontentially weighted variant */ +struct mean_and_variance_weighted { + bool init; + u8 w; + s64 mean; + u64 variance; +}; + +#ifdef CONFIG_MEAN_AND_VARIANCE_UNIT_TEST +s64 fast_divpow2(s64 n, u8 d); +#endif + +struct mean_and_variance mean_and_variance_update(struct mean_and_variance s1, s64 v1); + s64 mean_and_variance_get_mean(struct mean_and_variance s); + u64 mean_and_variance_get_variance(struct mean_and_variance s1); + u32 mean_and_variance_get_stddev(struct mean_and_variance s); + +struct mean_and_variance_weighted mean_and_variance_weighted_update(struct mean_and_variance_weighted s1, s64 v1); + s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s); + u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s); + u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s); + +#endif // STATS_H_ diff --git a/lib/math/Kconfig b/lib/math/Kconfig index 0634b428d0cb..12fe28622e82 100644 --- a/lib/math/Kconfig +++ b/lib/math/Kconfig @@ -15,3 +15,11 @@ config PRIME_NUMBERS config RATIONAL tristate + +config MEAN_AND_VARIANCE + tristate + +config MEAN_AND_VARIANCE_UNIT_TEST + tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS + depends on MEAN_AND_VARIANCE && KUNIT + default KUNIT_ALL_TESTS diff --git a/lib/math/Makefile b/lib/math/Makefile index bfac26ddfc22..2ef1487e01c2 100644 --- a/lib/math/Makefile +++ b/lib/math/Makefile @@ -4,6 +4,8 @@ obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o obj-$(CONFIG_RATIONAL) += rational.o +obj-$(CONFIG_MEAN_AND_VARIANCE) += mean_and_variance.o obj-$(CONFIG_TEST_DIV64) += test_div64.o obj-$(CONFIG_RATIONAL_KUNIT_TEST) += rational-test.o +obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o diff --git a/lib/math/mean_and_variance.c b/lib/math/mean_and_variance.c new file mode 100644 index 000000000000..f15aefc0b4d0 --- /dev/null +++ b/lib/math/mean_and_variance.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Functions for incremental mean and variance. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Copyright © 2022 Daniel B. Hill + * + * Author: Daniel B. Hill <daniel@gluo.nz> + * + * Description: + * + * This is includes some incremental algorithms for mean and variance calculation + * + * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + * + * Create a struct and if it's the weighted variant set the w field (weight = 2^k). + * + * Use mean_and_variance[_weighted]_update() on the struct to update it's state. + * + * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation + * is deferred to these functions for performance reasons. + * + * see lib/math/mean_and_variance_test.c for examples of usage. + * + * DO NOT access the mean and variance fields of the weighted variants directly. + * DO NOT change the weight after calling update. + */ + +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/limits.h> +#include <linux/math.h> +#include <linux/math64.h> +#include <linux/mean_and_variance.h> +#include <linux/module.h> +#include <linux/printbuf.h> + + +/** + * fast_divpow2() - fast approximation for n / (1 << d) + * @n: numerator + * @d: the power of 2 denominator. + * + * note: this rounds towards 0. + */ +inline s64 fast_divpow2(s64 n, u8 d) +{ + return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d; // + (n < 0 ? 1 : 0); +} + +/** + * mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1 + * and return it. + * @s1: the mean_and_variance to update. + * @v1: the new sample. + * + * see linked pdf equation 12. + */ +struct mean_and_variance mean_and_variance_update(struct mean_and_variance s1, s64 v1) +{ + struct mean_and_variance s2; + u64 v2 = abs(v1); + + if (v2 > SQRT_U64_MAX) { + v2 = SQRT_U64_MAX; + WARN(true, "stats overflow! %lld^2 > U64_MAX", v1); + } + + s2.n = s1.n + 1; + s2.sum = s1.sum + v1; + s2.sum_squares = s1.sum_squares + v2*v2; + return s2; +} +EXPORT_SYMBOL_GPL(mean_and_variance_update); + +/** + * mean_and_variance_get_mean() - get mean from @s + */ +s64 mean_and_variance_get_mean(struct mean_and_variance s) +{ + return div64_u64(s.sum, s.n); +} +EXPORT_SYMBOL_GPL(mean_and_variance_get_mean); + +/** + * mean_and_variance_get_variance() - get variance from @s1 + * + * see linked pdf equation 12. + */ +u64 mean_and_variance_get_variance(struct mean_and_variance s1) +{ + u64 s2 = s1.sum_squares / s1.n; + u64 s3 = abs(mean_and_variance_get_mean(s1)); + + WARN(s3 > SQRT_U64_MAX, "stats overflow %lld ^2 > S64_MAX", s3); + return s2 - s3*s3; +} +EXPORT_SYMBOL_GPL(mean_and_variance_get_variance); + +/** + * mean_and_variance_get_stddev() - get standard deviation from @s + */ +u32 mean_and_variance_get_stddev(struct mean_and_variance s) +{ + return int_sqrt64(mean_and_variance_get_variance(s)); +} +EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev); + +/** + * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update() + * @s1: .. + * @s2: .. + * + * see linked pdf: function derived from equations 140-143 where alpha = 2^w. + * values are stored bitshifted for performance and added precision. + */ +struct mean_and_variance_weighted mean_and_variance_weighted_update(struct mean_and_variance_weighted s1, s64 v1) +{ + struct mean_and_variance_weighted s2; + s64 m = s1.mean; + u64 var = s1.variance; + u8 w = s2.w = s1.w; + s64 v2 = v1 << w; + s64 d1 = (v2 - m); + s64 d2 = fast_divpow2(d1, w); + u64 d3 = (d1*d1) >> w; + + if (!s1.init) { + s2.mean = v2; + s2.variance = 0; + } else { + s2.mean = m + d2; + s2.variance = var + ((d3 - (d3 >> w) - var) >> w); + } + s2.init = true; + + #ifdef CONFIG_STATS_UNIT_TEST + pr_debug("v1 = %lld, v2 = %lld, d1 = %lld, d2 = %lld, d3 = %llu, m = %lld, var = %llu", + v1, v2, d1, d2, d3, s2.mean, s2.variance); + #endif + return s2; +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update); + +/** + * mean_and_variance_weighted_get_mean() - get mean from @s + */ +s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s) +{ + return fast_divpow2(s.mean, s.w); +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean); + +/** + * mean_and_variance_weighted_get_variance() -- get variance from @s + */ +u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s) +{ + // always positive don't need fast divpow2 + return s.variance >> s.w; +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance); + +/** + * mean_and_variance_weighted_get_stddev() - get standard deviation from @s + */ +u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s) +{ + return int_sqrt64(mean_and_variance_weighted_get_variance(s)); +} +EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev); + +MODULE_AUTHOR("Daniel B. Hill"); +MODULE_LICENSE("GPL"); diff --git a/lib/math/mean_and_variance_test.c b/lib/math/mean_and_variance_test.c new file mode 100644 index 000000000000..215e128acd86 --- /dev/null +++ b/lib/math/mean_and_variance_test.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <kunit/test.h> +#include <linux/mean_and_variance.h> + +#define MAX_SQR (SQRT_U64_MAX*SQRT_U64_MAX) + +static void mean_and_variance_basic_test(struct kunit *test) +{ + struct mean_and_variance s = {}; + + s = mean_and_variance_update(s, 2); + s = mean_and_variance_update(s, 2); + + KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(s), 2); + KUNIT_EXPECT_EQ(test, mean_and_variance_get_variance(s), 0); + KUNIT_EXPECT_EQ(test, s.n, 2); + + s = mean_and_variance_update(s, 4); + s = mean_and_variance_update(s, 4); + + KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(s), 3); + KUNIT_EXPECT_EQ(test, mean_and_variance_get_variance(s), 1); + KUNIT_EXPECT_EQ(test, s.n, 4); + + /* + * Test overflow bounds + */ + s = (struct mean_and_variance){}; + + s = mean_and_variance_update(s, SQRT_U64_MAX); + + KUNIT_EXPECT_EQ_MSG(test, + s.sum_squares, + MAX_SQR, + "%llu == %llu, sqrt: %llu == %llu", + s.sum_squares, + MAX_SQR, + int_sqrt64(s.sum_squares), + SQRT_U64_MAX); + + s = (struct mean_and_variance){}; + + s = mean_and_variance_update(s, -(s64)SQRT_U64_MAX); + + KUNIT_EXPECT_EQ_MSG(test, + s.sum_squares, + MAX_SQR, + "%llu == %llu, sqrt: %llu == %llu", + s.sum_squares, + MAX_SQR, + int_sqrt64(s.sum_squares), + SQRT_U64_MAX); + + s = (struct mean_and_variance){}; + + s = mean_and_variance_update(s, (SQRT_U64_MAX + 1)); + + KUNIT_EXPECT_EQ(test, s.sum_squares, MAX_SQR); + + s = (struct mean_and_variance){}; + + s = mean_and_variance_update(s, (-(s64)SQRT_U64_MAX) - 1); + + KUNIT_EXPECT_EQ(test, s.sum_squares, MAX_SQR); +} + +/* + * Test values computed using a spreadsheet from the psuedocode at the bottom: + * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + */ + +static void mean_and_variance_weighted_test(struct kunit *test) +{ + struct mean_and_variance_weighted s = {}; + + s.w = 2; + + s = mean_and_variance_weighted_update(s, 10); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 10); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0); + + s = mean_and_variance_weighted_update(s, 20); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 12); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18); + + s = mean_and_variance_weighted_update(s, 30); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 16); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 71); + + s = (struct mean_and_variance_weighted){}; + s.w = 2; + + s = mean_and_variance_weighted_update(s, -10); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -10); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0); + + s = mean_and_variance_weighted_update(s, -20); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -12); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18); + + s = mean_and_variance_weighted_update(s, -30); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -16); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 71); + +} + +static void mean_and_variance_weighted_advanced_test(struct kunit *test) +{ + struct mean_and_variance_weighted s = {}; + s64 i; + + s.w = 8; + for (i = 10; i <= 100; i += 10) + s = mean_and_variance_weighted_update(s, i); + + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 11); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107); + + s = (struct mean_and_variance_weighted){}; + + s.w = 8; + for (i = -10; i >= -100; i -= 10) + s = mean_and_variance_weighted_update(s, i); + + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -11); + KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107); + +} + +static void mean_and_variance_fast_divpow2(struct kunit *test) +{ + s64 i; + u8 d; + + for (i = 0; i < 100; i++) { + d = 0; + KUNIT_EXPECT_EQ(test, fast_divpow2(i, d), div_u64(i, 1LLU << d)); + KUNIT_EXPECT_EQ(test, abs(fast_divpow2(-i, d)), div_u64(i, 1LLU << d)); + for (d = 1; d < 32; d++) { + KUNIT_EXPECT_EQ_MSG(test, abs(fast_divpow2(i, d)), + div_u64(i, 1 << d), "%lld %u", i, d); + KUNIT_EXPECT_EQ_MSG(test, abs(fast_divpow2(-i, d)), + div_u64(i, 1 << d), "%lld %u", -i, d); + } + } +} + +static struct kunit_case mean_and_variance_test_cases[] = { + KUNIT_CASE(mean_and_variance_basic_test), + KUNIT_CASE(mean_and_variance_weighted_test), + KUNIT_CASE(mean_and_variance_weighted_advanced_test), + KUNIT_CASE(mean_and_variance_fast_divpow2), + {} +}; + +static struct kunit_suite mean_and_variance_test_suite = { +.name = "statistics", +.test_cases = mean_and_variance_test_cases +}; + +kunit_test_suite(mean_and_variance_test_suite); |