summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuren Baghdasaryan <surenb@google.com>2022-07-31 10:48:17 -0700
committerKent Overstreet <kent.overstreet@linux.dev>2023-07-11 21:20:26 -0400
commitc5143c4c460789a4763003bd58f5ebf5324c09b6 (patch)
tree1cf32675ae3e8cbd442e18bd37963080ebe99a29
parented66df043e9558b088d5ff71780b9a46b1bab2b0 (diff)
lib: add allocation tagging support for memory allocation profiling
Introduce CONFIG_MEM_ALLOC_PROFILING which provides definitions to easily instrument memory allocators. It registers an "alloc_tags" codetag type with /proc/allocinfo interface to output allocation tag information when the feature is enabled. CONFIG_MEM_ALLOC_PROFILING_DEBUG is provided for debugging the memory allocation profiling instrumentation. Memory allocation profiling can be enabled or disabled at runtime using /proc/sys/vm/mem_profiling sysctl when CONFIG_MEM_ALLOC_PROFILING_DEBUG=n. CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT enables memory allocation profiling by default. Co-developed-by: Kent Overstreet <kent.overstreet@linux.dev> Signed-off-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst16
-rw-r--r--Documentation/filesystems/proc.rst28
-rw-r--r--include/asm-generic/codetag.lds.h14
-rw-r--r--include/asm-generic/vmlinux.lds.h3
-rw-r--r--include/linux/alloc_tag.h109
-rw-r--r--include/linux/sched.h24
-rw-r--r--lib/Kconfig.debug24
-rw-r--r--lib/Makefile2
-rw-r--r--lib/alloc_tag.c160
-rw-r--r--scripts/module.lds.S7
10 files changed, 387 insertions, 0 deletions
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 45ba1f4dc004..0a012ac13a38 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
+- mem_profiling (only if CONFIG_MEM_ALLOC_PROFILING=y)
- memory_failure_early_kill
- memory_failure_recovery
- min_free_kbytes
@@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation.
The default value is 65530.
+mem_profiling
+==============
+
+Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)
+
+1: Enable memory profiling.
+
+0: Disabld memory profiling.
+
+Enabling memory profiling introduces a small performance overhead for all
+memory allocations.
+
+The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+
+
memory_failure_early_kill:
==========================
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 7897a7dafcbc..810f851e698b 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -683,6 +683,7 @@ files are there, and which are missing.
============ ===============================================================
File Content
============ ===============================================================
+ allocinfo Memory allocations profiling information
apm Advanced power management info
buddyinfo Kernel memory allocator information (see text) (2.5)
bus Directory containing bus specific information
@@ -942,6 +943,33 @@ also be allocatable although a lot of filesystem metadata may have to be
reclaimed to achieve this.
+allocinfo
+~~~~~~~
+
+Provides information about memory allocations at all locations in the code
+base. Each allocation in the code is identified by its source file, line
+number, module and the function calling the allocation. The number of bytes
+allocated at each location is reported.
+
+Example output.
+
+::
+
+ > cat /proc/allocinfo
+
+ 153MiB mm/slub.c:1826 module:slub func:alloc_slab_page
+ 6.08MiB mm/slab_common.c:950 module:slab_common func:_kmalloc_order
+ 5.09MiB mm/memcontrol.c:2814 module:memcontrol func:alloc_slab_obj_exts
+ 4.54MiB mm/page_alloc.c:5777 module:page_alloc func:alloc_pages_exact
+ 1.32MiB include/asm-generic/pgalloc.h:63 module:pgtable func:__pte_alloc_one
+ 1.16MiB fs/xfs/xfs_log_priv.h:700 module:xfs func:xlog_kvmalloc
+ 1.00MiB mm/swap_cgroup.c:48 module:swap_cgroup func:swap_cgroup_prepare
+ 734KiB fs/xfs/kmem.c:20 module:xfs func:kmem_alloc
+ 640KiB kernel/rcu/tree.c:3184 module:tree func:fill_page_cache_func
+ 640KiB drivers/char/virtio_console.c:452 module:virtio_console func:alloc_buf
+ ...
+
+
meminfo
~~~~~~~
diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h
new file mode 100644
index 000000000000..64f536b80380
--- /dev/null
+++ b/include/asm-generic/codetag.lds.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GENERIC_CODETAG_LDS_H
+#define __ASM_GENERIC_CODETAG_LDS_H
+
+#define SECTION_WITH_BOUNDARIES(_name) \
+ . = ALIGN(8); \
+ __start_##_name = .; \
+ KEEP(*(_name)) \
+ __stop_##_name = .;
+
+#define CODETAG_SECTIONS() \
+ SECTION_WITH_BOUNDARIES(alloc_tags)
+
+#endif /* __ASM_GENERIC_CODETAG_LDS_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index da9e5629ea43..47dd57ca755c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -50,6 +50,8 @@
* [__nosave_begin, __nosave_end] for the nosave data
*/
+#include <asm-generic/codetag.lds.h>
+
#ifndef LOAD_OFFSET
#define LOAD_OFFSET 0
#endif
@@ -374,6 +376,7 @@
. = ALIGN(8); \
BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes) \
BOUNDED_SECTION_BY(__dyndbg, ___dyndbg) \
+ CODETAG_SECTIONS() \
LIKELY_PROFILE() \
BRANCH_PROFILE() \
TRACE_PRINTKS() \
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
new file mode 100644
index 000000000000..3165eecdf6d0
--- /dev/null
+++ b/include/linux/alloc_tag.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * allocation tagging
+ */
+#ifndef _LINUX_ALLOC_TAG_H
+#define _LINUX_ALLOC_TAG_H
+
+#include <linux/bug.h>
+#include <linux/codetag.h>
+#include <linux/container_of.h>
+#include <asm/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/static_key.h>
+
+/*
+ * An instance of this structure is created in a special ELF section at every
+ * allocation callsite. At runtime, the special section is treated as
+ * an array of these. Embedded codetag utilizes codetag framework.
+ */
+struct alloc_tag {
+ struct codetag ct;
+ u64 __percpu *bytes_allocated;
+} __aligned(8);
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+
+static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
+{
+ return container_of(ct, struct alloc_tag, ct);
+}
+
+#define DEFINE_ALLOC_TAG(_alloc_tag, _old) \
+ static struct alloc_tag _alloc_tag __used __aligned(8) \
+ __section("alloc_tags") = { .ct = CODE_TAG_INIT }; \
+ struct alloc_tag * __maybe_unused _old = alloc_tag_save(&_alloc_tag)
+
+DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+ mem_alloc_profiling_key);
+
+static inline bool mem_alloc_profiling_enabled(void)
+{
+ return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+ &mem_alloc_profiling_key);
+}
+
+static inline u64 alloc_tag_read(struct alloc_tag *tag)
+{
+ u64 v = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ v += *per_cpu_ptr(tag->bytes_allocated, cpu);
+
+ return v;
+}
+
+static inline void __alloc_tag_sub(union codetag_ref *ref, size_t bytes)
+{
+ struct alloc_tag *tag;
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+ WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
+#endif
+ if (!ref || !ref->ct)
+ return;
+
+ tag = ct_to_alloc_tag(ref->ct);
+
+ this_cpu_add(*tag->bytes_allocated, -bytes);
+ ref->ct = NULL;
+}
+
+static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
+{
+ __alloc_tag_sub(ref, bytes);
+}
+
+static inline void alloc_tag_sub_noalloc(union codetag_ref *ref, size_t bytes)
+{
+ __alloc_tag_sub(ref, bytes);
+}
+
+static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
+{
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+ WARN_ONCE(ref && ref->ct,
+ "alloc_tag was not cleared (got tag for %s:%u)\n",\
+ ref->ct->filename, ref->ct->lineno);
+
+ WARN_ONCE(!tag, "current->alloc_tag not set");
+#endif
+ if (!ref || !tag)
+ return;
+
+ ref->ct = &tag->ct;
+ this_cpu_add(*tag->bytes_allocated, bytes);
+}
+
+#else
+
+#define DEFINE_ALLOC_TAG(_alloc_tag, _old)
+static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}
+static inline void alloc_tag_sub_noalloc(union codetag_ref *ref, size_t bytes) {}
+static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
+ size_t bytes) {}
+
+#endif
+
+#endif /* _LINUX_ALLOC_TAG_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a44699cbb44..d05349628071 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -763,6 +763,10 @@ struct task_struct {
unsigned int flags;
unsigned int ptrace;
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ struct alloc_tag *alloc_tag;
+#endif
+
#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
@@ -802,6 +806,7 @@ struct task_struct {
struct task_group *sched_task_group;
#endif
+
#ifdef CONFIG_UCLAMP_TASK
/*
* Clamp values requested for a scheduling entity.
@@ -2445,4 +2450,23 @@ static inline void sched_core_fork(struct task_struct *p) { }
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
+{
+ swap(current->alloc_tag, tag);
+ return tag;
+}
+
+static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
+{
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+ WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
+#endif
+ current->alloc_tag = old;
+}
+#else
+static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) { return NULL; }
+#define alloc_tag_restore(_tag, _old)
+#endif
+
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9ff30538f043..2c4754df569b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -961,6 +961,30 @@ config CODE_TAGGING
bool
select KALLSYMS
+config MEM_ALLOC_PROFILING
+ bool "Enable memory allocation profiling"
+ default n
+ depends on PROC_FS
+ select CODE_TAGGING
+ help
+ Track allocation source code and record total allocation size
+ initiated at that code location. The mechanism can be used to track
+ memory leaks with a low performance and memory impact.
+
+config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+ bool "Enable memory allocation profiling by default"
+ default y
+ depends on MEM_ALLOC_PROFILING
+
+config MEM_ALLOC_PROFILING_DEBUG
+ bool "Memory allocation profiler debugging"
+ default n
+ depends on MEM_ALLOC_PROFILING
+ select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+ help
+ Adds warnings with helpful error messages for memory allocation
+ profiling.
+
source "lib/Kconfig.kasan"
source "lib/Kconfig.kfence"
source "lib/Kconfig.kmsan"
diff --git a/lib/Makefile b/lib/Makefile
index b567823ac69a..7dc3a7216515 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -227,6 +227,8 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_CODE_TAGGING) += codetag.o
+obj-$(CONFIG_MEM_ALLOC_PROFILING) += alloc_tag.o
+
lib-$(CONFIG_GENERIC_BUG) += bug.o
obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
new file mode 100644
index 000000000000..30cf3f1b003c
--- /dev/null
+++ b/lib/alloc_tag.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/alloc_tag.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static struct codetag_type *alloc_tag_cttype;
+
+DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+ mem_alloc_profiling_key);
+
+static void *allocinfo_start(struct seq_file *m, loff_t *pos)
+{
+ struct codetag_iterator *iter;
+ struct codetag *ct;
+ loff_t node = *pos;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ m->private = iter;
+ if (!iter)
+ return NULL;
+
+ codetag_lock_module_list(alloc_tag_cttype, true);
+ *iter = codetag_get_ct_iter(alloc_tag_cttype);
+ while ((ct = codetag_next_ct(iter)) != NULL && node)
+ node--;
+
+ return ct ? iter : NULL;
+}
+
+static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+ struct codetag *ct = codetag_next_ct(iter);
+
+ (*pos)++;
+ if (!ct)
+ return NULL;
+
+ return iter;
+}
+
+static void allocinfo_stop(struct seq_file *m, void *arg)
+{
+ struct codetag_iterator *iter = (struct codetag_iterator *)m->private;
+
+ if (iter) {
+ codetag_lock_module_list(alloc_tag_cttype, false);
+ kfree(iter);
+ }
+}
+
+static void alloc_tag_to_text(char *buf, struct codetag *ct)
+{
+ struct alloc_tag *tag = ct_to_alloc_tag(ct);
+ char val[10];
+
+ string_get_size(alloc_tag_read(tag), 1,
+ STRING_SIZE_BASE2|STRING_SIZE_NOSPACE,
+ val, sizeof(val));
+
+ buf += sprintf(buf, "%8s ", val);
+ buf += codetag_to_text(buf, ct);
+}
+
+static int allocinfo_show(struct seq_file *m, void *arg)
+{
+ struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+ char buf[1024];
+
+ alloc_tag_to_text(buf, iter->ct);
+ seq_printf(m, "%s\n", buf);
+ return 0;
+}
+
+static const struct seq_operations allocinfo_seq_op = {
+ .start = allocinfo_start,
+ .next = allocinfo_next,
+ .stop = allocinfo_stop,
+ .show = allocinfo_show,
+};
+
+static void __init procfs_init(void)
+{
+ proc_create_seq("allocinfo", 0444, NULL, &allocinfo_seq_op);
+}
+
+static void alloc_tag_module_load(struct codetag_type *cttype, struct codetag_module *cmod)
+{
+ struct codetag_iterator iter = codetag_get_ct_iter(cttype);
+ struct codetag *ct;
+
+ for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) {
+ if (iter.cmod != cmod)
+ continue;
+
+ ct_to_alloc_tag(ct)->bytes_allocated = alloc_percpu(u64);
+ }
+}
+
+static bool alloc_tag_module_unload(struct codetag_type *cttype, struct codetag_module *cmod)
+{
+ struct codetag_iterator iter = codetag_get_ct_iter(cttype);
+ bool module_unused = true;
+ struct alloc_tag *tag;
+ struct codetag *ct;
+ size_t bytes;
+
+ for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) {
+ if (iter.cmod != cmod)
+ continue;
+
+ tag = ct_to_alloc_tag(ct);
+ bytes = alloc_tag_read(tag);
+
+ if (!WARN(bytes, "%s:%u module %s func:%s has %zu allocated at module unload",
+ ct->filename, ct->lineno, ct->modname, ct->function, bytes))
+ free_percpu(tag->bytes_allocated);
+ else
+ module_unused = false;
+ }
+
+ return module_unused;
+}
+
+static struct ctl_table memory_allocation_profiling_sysctls[] = {
+ {
+ .procname = "mem_profiling",
+ .data = &mem_alloc_profiling_key,
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+ .mode = 0444,
+#else
+ .mode = 0644,
+#endif
+ .proc_handler = proc_do_static_key,
+ },
+ { }
+};
+
+static int __init alloc_tag_init(void)
+{
+ const struct codetag_type_desc desc = {
+ .section = "alloc_tags",
+ .tag_size = sizeof(struct alloc_tag),
+ .module_load = alloc_tag_module_load,
+ .module_unload = alloc_tag_module_unload,
+ };
+
+ alloc_tag_cttype = codetag_register_type(&desc);
+ if (IS_ERR_OR_NULL(alloc_tag_cttype))
+ return PTR_ERR(alloc_tag_cttype);
+
+ register_sysctl_init("vm", memory_allocation_profiling_sysctls);
+ procfs_init();
+
+ return 0;
+}
+module_init(alloc_tag_init);
diff --git a/scripts/module.lds.S b/scripts/module.lds.S
index bf5bcf2836d8..45c67a0994f3 100644
--- a/scripts/module.lds.S
+++ b/scripts/module.lds.S
@@ -9,6 +9,8 @@
#define DISCARD_EH_FRAME *(.eh_frame)
#endif
+#include <asm-generic/codetag.lds.h>
+
SECTIONS {
/DISCARD/ : {
*(.discard)
@@ -47,12 +49,17 @@ SECTIONS {
.data : {
*(.data .data.[0-9a-zA-Z_]*)
*(.data..L*)
+ CODETAG_SECTIONS()
}
.rodata : {
*(.rodata .rodata.[0-9a-zA-Z_]*)
*(.rodata..L*)
}
+#else
+ .data : {
+ CODETAG_SECTIONS()
+ }
#endif
}