diff options
author | Reed Riley <reed@riley.engineer> | 2024-04-15 21:08:13 -0700 |
---|---|---|
committer | Reed Riley <reed@riley.engineer> | 2024-04-15 21:08:13 -0700 |
commit | e5e8f203d77f90d1af8ebfd794eb1593582b85cc (patch) | |
tree | 4b247a45e411ea62d5af495eec775d3d48c40676 | |
parent | 6432d7745c1d0eba610fed85a8a6bac1864e858d (diff) | |
parent | 72cfb036b00a2c9f59a5d6ff1bc2d039ffb67490 (diff) |
Merge branch 'memalloc_prof_v6' of https://github.com/surenbaghdasaryan/linux
124 files changed, 2316 insertions, 652 deletions
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index c59889de122b..e86c968a7a0e 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm: - legacy_va_layout - lowmem_reserve_ratio - max_map_count +- mem_profiling (only if CONFIG_MEM_ALLOC_PROFILING=y) - memory_failure_early_kill - memory_failure_recovery - min_free_kbytes @@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation. The default value is 65530. +mem_profiling +============== + +Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y) + +1: Enable memory profiling. + +0: Disable memory profiling. + +Enabling memory profiling introduces a small performance overhead for all +memory allocations. + +The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT. + + memory_failure_early_kill: ========================== diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index c6a6b9df2104..5d2fc58b5b1f 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -688,6 +688,7 @@ files are there, and which are missing. ============ =============================================================== File Content ============ =============================================================== + allocinfo Memory allocations profiling information apm Advanced power management info bootconfig Kernel command line obtained from boot config, and, if there were kernel parameters from the @@ -953,6 +954,34 @@ also be allocatable although a lot of filesystem metadata may have to be reclaimed to achieve this. +allocinfo +~~~~~~~ + +Provides information about memory allocations at all locations in the code +base. Each allocation in the code is identified by its source file, line +number, module (if originates from a loadable module) and the function calling +the allocation. The number of bytes allocated and number of calls at each +location are reported. + +Example output. + +:: + + > sort -rn /proc/allocinfo + 127664128 31168 mm/page_ext.c:270 func:alloc_page_ext + 56373248 4737 mm/slub.c:2259 func:alloc_slab_page + 14880768 3633 mm/readahead.c:247 func:page_cache_ra_unbounded + 14417920 3520 mm/mm_init.c:2530 func:alloc_large_system_hash + 13377536 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs + 11718656 2861 mm/filemap.c:1919 func:__filemap_get_folio + 9192960 2800 kernel/fork.c:307 func:alloc_thread_stack_node + 4206592 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable + 4136960 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start + 3940352 962 mm/memory.c:4214 func:alloc_anon_folio + 2894464 22613 fs/kernfs/dir.c:615 func:__kernfs_new_node + ... + + meminfo ~~~~~~~ diff --git a/Documentation/mm/allocation-profiling.rst b/Documentation/mm/allocation-profiling.rst new file mode 100644 index 000000000000..d3b733b41ae6 --- /dev/null +++ b/Documentation/mm/allocation-profiling.rst @@ -0,0 +1,100 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +MEMORY ALLOCATION PROFILING +=========================== + +Low overhead (suitable for production) accounting of all memory allocations, +tracked by file and line number. + +Usage: +kconfig options: +- CONFIG_MEM_ALLOC_PROFILING + +- CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT + +- CONFIG_MEM_ALLOC_PROFILING_DEBUG + adds warnings for allocations that weren't accounted because of a + missing annotation + +Boot parameter: + sysctl.vm.mem_profiling=0|1|never + + When set to "never", memory allocation profiling overhead is minimized and it + cannot be enabled at runtime (sysctl becomes read-only). + When CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=y, default value is "1". + When CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=n, default value is "never". + +sysctl: + /proc/sys/vm/mem_profiling + +Runtime info: + /proc/allocinfo + +Example output:: + + root@moria-kvm:~# sort -g /proc/allocinfo|tail|numfmt --to=iec + 2.8M 22648 fs/kernfs/dir.c:615 func:__kernfs_new_node + 3.8M 953 mm/memory.c:4214 func:alloc_anon_folio + 4.0M 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start + 4.1M 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable + 6.0M 1532 mm/filemap.c:1919 func:__filemap_get_folio + 8.8M 2785 kernel/fork.c:307 func:alloc_thread_stack_node + 13M 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs + 14M 3520 mm/mm_init.c:2530 func:alloc_large_system_hash + 15M 3656 mm/readahead.c:247 func:page_cache_ra_unbounded + 55M 4887 mm/slub.c:2259 func:alloc_slab_page + 122M 31168 mm/page_ext.c:270 func:alloc_page_ext + +=================== +Theory of operation +=================== + +Memory allocation profiling builds off of code tagging, which is a library for +declaring static structs (that typically describe a file and line number in +some way, hence code tagging) and then finding and operating on them at runtime, +- i.e. iterating over them to print them in debugfs/procfs. + +To add accounting for an allocation call, we replace it with a macro +invocation, alloc_hooks(), that +- declares a code tag +- stashes a pointer to it in task_struct +- calls the real allocation function +- and finally, restores the task_struct alloc tag pointer to its previous value. + +This allows for alloc_hooks() calls to be nested, with the most recent one +taking effect. This is important for allocations internal to the mm/ code that +do not properly belong to the outer allocation context and should be counted +separately: for example, slab object extension vectors, or when the slab +allocates pages from the page allocator. + +Thus, proper usage requires determining which function in an allocation call +stack should be tagged. There are many helper functions that essentially wrap +e.g. kmalloc() and do a little more work, then are called in multiple places; +we'll generally want the accounting to happen in the callers of these helpers, +not in the helpers themselves. + +To fix up a given helper, for example foo(), do the following: +- switch its allocation call to the _noprof() version, e.g. kmalloc_noprof() + +- rename it to foo_noprof() + +- define a macro version of foo() like so: + + #define foo(...) alloc_hooks(foo_noprof(__VA_ARGS__)) + +It's also possible to stash a pointer to an alloc tag in your own data structures. + +Do this when you're implementing a generic data structure that does allocations +"on behalf of" some other code - for example, the rhashtable code. This way, +instead of seeing a large line in /proc/allocinfo for rhashtable.c, we can +break it out by rhashtable type. + +To do so: +- Hook your data structure's init function, like any other allocation function. + +- Within your init function, use the convenience macro alloc_tag_record() to + record alloc tag in your data structure. + +- Then, use the following form for your allocations: + alloc_hooks_tag(ht->your_saved_tag, kmalloc_noprof(...)) diff --git a/Documentation/mm/index.rst b/Documentation/mm/index.rst index 31d2ac306438..48b9b559ca7b 100644 --- a/Documentation/mm/index.rst +++ b/Documentation/mm/index.rst @@ -26,6 +26,7 @@ see the :doc:`admin guide <../admin-guide/mm/index>`. page_cache shmfs oom + allocation-profiling Legacy Documentation ==================== diff --git a/MAINTAINERS b/MAINTAINERS index c0091a206fd2..796ea3fe982d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5255,6 +5255,13 @@ S: Supported F: Documentation/process/code-of-conduct-interpretation.rst F: Documentation/process/code-of-conduct.rst +CODE TAGGING +M: Suren Baghdasaryan <surenb@google.com> +M: Kent Overstreet <kent.overstreet@linux.dev> +S: Maintained +F: include/linux/codetag.h +F: lib/codetag.c + COMEDI DRIVERS M: Ian Abbott <abbotti@mev.co.uk> M: H Hartley Sweeten <hsweeten@visionengravers.com> @@ -14156,6 +14163,16 @@ F: mm/memblock.c F: mm/mm_init.c F: tools/testing/memblock/ +MEMORY ALLOCATION PROFILING +M: Suren Baghdasaryan <surenb@google.com> +M: Kent Overstreet <kent.overstreet@linux.dev> +L: linux-mm@kvack.org +S: Maintained +F: include/linux/alloc_tag.h +F: include/linux/codetag_ctx.h +F: lib/alloc_tag.c +F: lib/pgalloc_tag.c + MEMORY CONTROLLER DRIVERS M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> L: linux-kernel@vger.kernel.org diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index c81183935e97..7fcf3e9b7103 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -929,7 +929,7 @@ const struct dma_map_ops alpha_pci_ops = { .dma_supported = alpha_pci_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/alpha/lib/checksum.c b/arch/alpha/lib/checksum.c index 3f35c3ed6948..c29b98ef9c82 100644 --- a/arch/alpha/lib/checksum.c +++ b/arch/alpha/lib/checksum.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <asm/byteorder.h> +#include <asm/checksum.h> static inline unsigned short from64to16(unsigned long x) { diff --git a/arch/alpha/lib/fpreg.c b/arch/alpha/lib/fpreg.c index 7c08b225261c..3d32165043f8 100644 --- a/arch/alpha/lib/fpreg.c +++ b/arch/alpha/lib/fpreg.c @@ -8,6 +8,7 @@ #include <linux/compiler.h> #include <linux/export.h> #include <linux/preempt.h> +#include <asm/fpu.h> #include <asm/thread_info.h> #if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) diff --git a/arch/alpha/lib/memcpy.c b/arch/alpha/lib/memcpy.c index cbac3dc6d963..0e536a1a39ff 100644 --- a/arch/alpha/lib/memcpy.c +++ b/arch/alpha/lib/memcpy.c @@ -18,6 +18,7 @@ #include <linux/types.h> #include <linux/export.h> +#include <linux/string.h> /* * This should be done in one go with ldq_u*2/mask/stq_u. Do it diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index fe28fc1f759d..dab42d066d06 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -32,6 +32,7 @@ #include <linux/kallsyms.h> #include <linux/proc_fs.h> #include <linux/export.h> +#include <linux/vmalloc.h> #include <asm/hardware/cache-l2x0.h> #include <asm/hardware/cache-uniphier.h> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 72c82a4d63ac..480e307501bb 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -26,6 +26,7 @@ #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> #include <linux/irq.h> +#include <linux/vmalloc.h> #include <linux/atomic.h> #include <asm/cacheflush.h> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 9afcc690fe73..4a92096db34e 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -10,6 +10,7 @@ #include <linux/efi.h> #include <linux/init.h> #include <linux/screen_info.h> +#include <linux/vmalloc.h> #include <asm/efi.h> #include <asm/stacktrace.h> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 6c82aea1c993..54062656dc7b 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -10,6 +10,7 @@ #define _ASM_LOONGARCH_KFENCE_H #include <linux/kfence.h> +#include <linux/vmalloc.h> #include <asm/pgtable.h> #include <asm/tlb.h> diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index eabddb89d221..c97b089b9902 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -617,7 +617,7 @@ const struct dma_map_ops jazz_dma_ops = { .sync_sg_for_device = jazz_dma_sync_sg_for_device, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(jazz_dma_ops); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 8920862ffd79..f0ae39e77e37 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -216,6 +216,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 1185efebf032..65468d0829e1 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -26,6 +26,7 @@ #include <linux/iommu.h> #include <linux/sched.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/iommu.h> #include <asm/pci-bridge.h> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3a440004b97d..a197d4c2244b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -16,6 +16,7 @@ #include <linux/highmem.h> #include <linux/suspend.h> #include <linux/dma-direct.h> +#include <linux/vmalloc.h> #include <asm/swiotlb.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index d6b5f5ecd515..56dc6b29a3e7 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -695,7 +695,7 @@ static const struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; @@ -709,7 +709,7 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 90ff85c879bf..477c1d5e1737 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -611,7 +611,7 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 54260c16f991..11c0d2e0becf 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -19,6 +19,7 @@ #include <linux/libfdt.h> #include <linux/types.h> #include <linux/memblock.h> +#include <linux/vmalloc.h> #include <asm/setup.h> int arch_kimage_file_post_load_cleanup(struct kimage *image) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 2f08c14a933d..71a8b8945b26 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -6,6 +6,7 @@ #include <linux/extable.h> #include <linux/slab.h> #include <linux/stop_machine.h> +#include <linux/vmalloc.h> #include <asm/ptrace.h> #include <linux/uaccess.h> #include <asm/sections.h> diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index 554447768bdd..bf983513dd33 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -21,6 +21,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/sysfs.h> +#include <linux/vmalloc.h> #include <crypto/sha2.h> #include <keys/user-type.h> #include <asm/debug.h> diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 1486350a4177..5d6d381aa0be 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -20,6 +20,7 @@ #include <linux/gfp.h> #include <linux/crash_dump.h> #include <linux/debug_locks.h> +#include <linux/vmalloc.h> #include <asm/asm-extable.h> #include <asm/diag.h> #include <asm/ipl.h> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 294cd2a40818..7452fc193b4f 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -42,6 +42,7 @@ #include <asm/early_ioremap.h> #include <asm/pgtable_types.h> #include <asm/shared/io.h> +#include <asm/special_insns.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 2ae98f754e59..c884deca839b 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -676,7 +676,7 @@ static const struct dma_map_ops gart_dma_ops = { .get_sgtable = dma_common_get_sgtable, .dma_supported = dma_direct_supported, .get_required_mask = dma_direct_get_required_mask, - .alloc_pages = dma_direct_alloc_pages, + .alloc_pages_op = dma_direct_alloc_pages, .free_pages = dma_direct_free_pages, }; diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 166692f2d501..27892e57c4ef 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -13,6 +13,7 @@ #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/sysfs.h> +#include <linux/vmalloc.h> #include <asm/sgx.h> #include "driver.h" #include "encl.h" diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index fe0c859873d1..ade0043ce56e 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,6 +18,7 @@ #include <linux/uaccess.h> #include <linux/smp.h> #include <linux/sched/task_stack.h> +#include <linux/vmalloc.h> #include <asm/cpu_entry_area.h> #include <asm/softirq_stack.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 622d12ec7f08..a4cc20d0036d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -20,6 +20,7 @@ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> #include <linux/mm.h> /* find_and_lock_vma() */ +#include <linux/vmalloc.h> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index fe6161299236..128aef8e5a19 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -6,6 +6,7 @@ #include <linux/bitfield.h> #include <linux/highmem.h> #include <linux/set_memory.h> +#include <linux/vmalloc.h> #include <drm/drm_cache.h> diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c index a70b01ccdf70..4d78b33eaa82 100644 --- a/drivers/gpu/drm/gma500/mmu.c +++ b/drivers/gpu/drm/gma500/mmu.c @@ -5,6 +5,7 @@ **************************************************************************/ #include <linux/highmem.h> +#include <linux/vmalloc.h> #include "mmu.h" #include "psb_drv.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 0ba955611dfb..8780aa243105 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -5,6 +5,7 @@ */ #include <drm/drm_cache.h> +#include <linux/vmalloc.h> #include "gt/intel_gt.h" #include "gt/intel_tlb.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c index b2a5882b8f81..075657018739 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -4,6 +4,7 @@ * Copyright © 2016 Intel Corporation */ +#include <linux/vmalloc.h> #include "mock_dmabuf.h" static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index bccc3a1200bc..1fb6ff77fd89 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/shmem_fs.h> +#include <linux/vmalloc.h> #include "i915_drv.h" #include "gem/i915_gem_object.h" diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 4dd52ac2043e..d800d267f0e9 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -30,6 +30,7 @@ #include <linux/firmware.h> #include <linux/crc32.h> +#include <linux/vmalloc.h> #include "i915_drv.h" #include "gvt.h" diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 094fca9b0e73..58cca4906f41 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -39,6 +39,7 @@ #include "trace.h" #include "gt/intel_gt_regs.h" +#include <linux/vmalloc.h> #if defined(VERBOSE_DEBUG) #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index efcb00472be2..ea9c30092767 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -52,6 +52,7 @@ #include "display/skl_watermark_regs.h" #include "display/vlv_dsi_pll_regs.h" #include "gt/intel_gt_regs.h" +#include <linux/vmalloc.h> /* XXX FIXME i915 has changed PP_XXX definition */ #define PCH_PP_STATUS _MMIO(0xc7200) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 5b5def6ddef7..780762f28aa4 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -33,6 +33,7 @@ * */ +#include <linux/vmalloc.h> #include "i915_drv.h" #include "i915_reg.h" #include "gvt.h" diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 08ad1bd651f1..63c751ca4119 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -34,6 +34,7 @@ #include "i915_drv.h" #include "gvt.h" #include "i915_pvinfo.h" +#include <linux/vmalloc.h> void populate_pvinfo_page(struct intel_vgpu *vgpu) { diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 9b6d87c8b583..5a01d60e5186 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -28,6 +28,7 @@ #include "gt/intel_context.h" #include "gt/intel_ring.h" #include "gt/shmem_utils.h" +#include <linux/vmalloc.h> /** * DOC: Intel GVT-g host support diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c index b7fef3c797e6..6563dcde109c 100644 --- a/drivers/gpu/drm/imagination/pvr_vm_mips.c +++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/types.h> +#include <linux/vmalloc.h> /** * pvr_vm_mips_init() - Initialise MIPS FW pagetable diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 4f2e3feabc0f..3e519869b632 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -4,6 +4,7 @@ */ #include <linux/dma-buf.h> +#include <linux/vmalloc.h> #include <drm/drm.h> #include <drm/drm_device.h> diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 3421e8389222..9ea0c64c26b5 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -9,6 +9,7 @@ #include <linux/shmem_fs.h> #include <linux/spinlock.h> #include <linux/pfn_t.h> +#include <linux/vmalloc.h> #include <drm/drm_prime.h> #include <drm/drm_vma_manager.h> diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index a07ede668cc1..a165cbcdd27b 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -21,6 +21,7 @@ #include <linux/dma-buf.h> #include <linux/pfn_t.h> +#include <linux/vmalloc.h> #include "v3d_drv.h" #include "uapi/drm/v3d_drm.h" diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c index ae2de914eb89..2731f6ded1c2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c @@ -54,6 +54,7 @@ #include "vmwgfx_drv.h" #include "vmwgfx_binding.h" #include "device_include/svga3d_reg.h" +#include <linux/vmalloc.h> #define VMW_BINDING_RT_BIT 0 #define VMW_BINDING_PS_BIT 1 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index 195ff8792e5a..dd4ca6a9c690 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -31,6 +31,7 @@ #include <drm/ttm/ttm_placement.h> #include <linux/sched/signal.h> +#include <linux/vmalloc.h> bool vmw_supports_3d(struct vmw_private *dev_priv) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c index 829df395c2ed..6e6beff9e262 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include <linux/vmalloc.h> #include "vmwgfx_devcaps.h" #include "vmwgfx_drv.h" diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index c7d90f96d16a..a7e209ca926b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -53,6 +53,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/version.h> +#include <linux/vmalloc.h> #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index cc3086e649eb..2e52d73eba48 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -35,6 +35,7 @@ #include <linux/sync_file.h> #include <linux/hashtable.h> +#include <linux/vmalloc.h> /* * Helper macro to get dx_ctx_node if available otherwise print an error diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index a1da5678c731..835d1eed8dd9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -31,6 +31,7 @@ #include <drm/vmwgfx_drm.h> #include <linux/pci.h> +#include <linux/vmalloc.h> int vmw_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index 3ad2b4cfd1f0..63112ed975c4 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -11,6 +11,7 @@ #include <linux/dma-buf.h> #include <linux/scatterlist.h> #include <linux/shmem_fs.h> +#include <linux/vmalloc.h> #include <drm/drm_gem.h> #include <drm/drm_prime.h> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 6136776482e6..96a32b213669 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -17,6 +17,7 @@ #include <asm/barrier.h> #include <asm/cpufeature.h> +#include <linux/vmalloc.h> #include "coresight-self-hosted-trace.h" #include "coresight-trbe.h" diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index e4cb26f6a943..07d087eecc17 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1723,7 +1723,7 @@ static const struct dma_map_ops iommu_dma_ops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, .free = iommu_dma_free, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, .free_noncontiguous = iommu_dma_free_noncontiguous, diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c index 2e2c3be8a0b4..e6eb98d70f3c 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/pci.h> #include <linux/etherdevice.h> +#include <linux/vmalloc.h> #include "octep_config.h" #include "octep_main.h" diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 2729a2c5acf9..11021c34e47e 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -3,6 +3,7 @@ #include <net/mana/gdma.h> #include <net/mana/hw_channel.h> +#include <linux/vmalloc.h> static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) { diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 9ce0d20a6c58..feef537257d0 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1022,7 +1022,7 @@ static const struct dma_map_ops ccio_ops = { .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 784037837f65..fc3863c09f83 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1090,7 +1090,7 @@ static const struct dma_map_ops sba_ops = { .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/drivers/platform/x86/uv_sysfs.c b/drivers/platform/x86/uv_sysfs.c index 38d1b692d3c0..40e010877189 100644 --- a/drivers/platform/x86/uv_sysfs.c +++ b/drivers/platform/x86/uv_sysfs.c @@ -11,6 +11,7 @@ #include <linux/device.h> #include <linux/slab.h> #include <linux/kobject.h> +#include <linux/vmalloc.h> #include <asm/uv/bios.h> #include <asm/uv/uv.h> #include <asm/uv/uv_hub.h> diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index d32ad46318cb..dabb91f0f75d 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -7,6 +7,8 @@ * */ +#include <linux/vmalloc.h> + #include "mpi3mr.h" /** diff --git a/drivers/staging/media/atomisp/pci/hmm/hmm.c b/drivers/staging/media/atomisp/pci/hmm/hmm.c index bb12644fd033..3e2899ad8517 100644 --- a/drivers/staging/media/atomisp/pci/hmm/hmm.c +++ b/drivers/staging/media/atomisp/pci/hmm/hmm.c @@ -205,7 +205,7 @@ static ia_css_ptr __hmm_alloc(size_t bytes, enum hmm_bo_type type, } dev_dbg(atomisp_dev, "pages: 0x%08x (%zu bytes), type: %d, vmalloc %p\n", - bo->start, bytes, type, vmalloc); + bo->start, bytes, type, vmalloc_noprof); return bo->start; diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 68e8f006dfdb..c51f5e4c3dd6 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -3,6 +3,7 @@ #include <linux/interval_tree.h> #include <linux/vfio.h> +#include <linux/vmalloc.h> #include <linux/pds/pds_common.h> #include <linux/pds/pds_core_if.h> diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c index fa5d9ca6be57..c088ee1f1180 100644 --- a/drivers/virt/acrn/mm.c +++ b/drivers/virt/acrn/mm.c @@ -12,6 +12,7 @@ #include <linux/io.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include "acrn_drv.h" diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 8e3223294442..e8355f55a8f7 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -21,6 +21,7 @@ #include <linux/bitmap.h> #include <linux/lockdep.h> #include <linux/log2.h> +#include <linux/vmalloc.h> #include <acpi/acpi_numa.h> diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c index 76f6f26265a3..29257d2639db 100644 --- a/drivers/xen/grant-dma-ops.c +++ b/drivers/xen/grant-dma-ops.c @@ -282,7 +282,7 @@ static int xen_grant_dma_supported(struct device *dev, u64 mask) static const struct dma_map_ops xen_grant_dma_ops = { .alloc = xen_grant_dma_alloc, .free = xen_grant_dma_free, - .alloc_pages = xen_grant_dma_alloc_pages, + .alloc_pages_op = xen_grant_dma_alloc_pages, .free_pages = xen_grant_dma_free_pages, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 0e6c6c25d154..1c4ef5111651 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -403,7 +403,7 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, .max_mapping_size = swiotlb_max_mapping_size, }; diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h new file mode 100644 index 000000000000..64f536b80380 --- /dev/null +++ b/include/asm-generic/codetag.lds.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GENERIC_CODETAG_LDS_H +#define __ASM_GENERIC_CODETAG_LDS_H + +#define SECTION_WITH_BOUNDARIES(_name) \ + . = ALIGN(8); \ + __start_##_name = .; \ + KEEP(*(_name)) \ + __stop_##_name = .; + +#define CODETAG_SECTIONS() \ + SECTION_WITH_BOUNDARIES(alloc_tags) + +#endif /* __ASM_GENERIC_CODETAG_LDS_H */ diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index bac63e874c7b..c27313414a82 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -991,7 +991,6 @@ static inline void iowrite64_rep(volatile void __iomem *addr, #ifdef __KERNEL__ -#include <linux/vmalloc.h> #define __io_virt(x) ((void __force *)(x)) /* diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f7749d0f2562..3e4497b5135a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -50,6 +50,8 @@ * [__nosave_begin, __nosave_end] for the nosave data */ +#include <asm-generic/codetag.lds.h> + #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 #endif @@ -366,6 +368,7 @@ . = ALIGN(8); \ BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes) \ BOUNDED_SECTION_BY(__dyndbg, ___dyndbg) \ + CODETAG_SECTIONS() \ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h new file mode 100644 index 000000000000..100ddf66eb8e --- /dev/null +++ b/include/linux/alloc_tag.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * allocation tagging + */ +#ifndef _LINUX_ALLOC_TAG_H +#define _LINUX_ALLOC_TAG_H + +#include <linux/bug.h> +#include <linux/codetag.h> +#include <linux/container_of.h> +#include <linux/preempt.h> +#include <asm/percpu.h> +#include <linux/cpumask.h> +#include <linux/static_key.h> + +struct alloc_tag_counters { + u64 bytes; + u64 calls; +}; + +/* + * An instance of this structure is created in a special ELF section at every + * allocation callsite. At runtime, the special section is treated as + * an array of these. Embedded codetag utilizes codetag framework. + */ +struct alloc_tag { + struct codetag ct; + struct alloc_tag_counters __percpu *counters; +} __aligned(8); + +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + +#define CODETAG_EMPTY ((void *)1) + +static inline bool is_codetag_empty(union codetag_ref *ref) +{ + return ref->ct == CODETAG_EMPTY; +} + +static inline void set_codetag_empty(union codetag_ref *ref) +{ + if (ref) + ref->ct = CODETAG_EMPTY; +} + +#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + +static inline bool is_codetag_empty(union codetag_ref *ref) { return false; } +static inline void set_codetag_empty(union codetag_ref *ref) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + +#ifdef CONFIG_MEM_ALLOC_PROFILING + +struct codetag_bytes { + struct codetag *ct; + s64 bytes; +}; + +size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep); + +static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct) +{ + return container_of(ct, struct alloc_tag, ct); +} + +#ifdef ARCH_NEEDS_WEAK_PER_CPU +/* + * When percpu variables are required to be defined as weak, static percpu + * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION). + */ +#error "Memory allocation profiling is incompatible with ARCH_NEEDS_WEAK_PER_CPU" +#endif + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section("alloc_tags") = { \ + .ct = CODE_TAG_INIT, \ + .counters = &_alloc_tag_cntr }; + +DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + mem_alloc_profiling_key); + +static inline bool mem_alloc_profiling_enabled(void) +{ + return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + &mem_alloc_profiling_key); +} + +static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag) +{ + struct alloc_tag_counters v = { 0, 0 }; + struct alloc_tag_counters *counter; + int cpu; + + for_each_possible_cpu(cpu) { + counter = per_cpu_ptr(tag->counters, cpu); + v.bytes += counter->bytes; + v.calls += counter->calls; + } + + return v; +} + +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG +static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) +{ + WARN_ONCE(ref && ref->ct, + "alloc_tag was not cleared (got tag for %s:%u)\n", + ref->ct->filename, ref->ct->lineno); + + WARN_ONCE(!tag, "current->alloc_tag not set"); +} + +static inline void alloc_tag_sub_check(union codetag_ref *ref) +{ + WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n"); +} +#else +static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {} +static inline void alloc_tag_sub_check(union codetag_ref *ref) {} +#endif + +/* Caller should verify both ref and tag to be valid */ +static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +{ + ref->ct = &tag->ct; + /* + * We need in increment the call counter every time we have a new + * allocation or when we split a large allocation into smaller ones. + * Each new reference for every sub-allocation needs to increment call + * counter because when we free each part the counter will be decremented. + */ + this_cpu_inc(tag->counters->calls); +} + +static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +{ + alloc_tag_add_check(ref, tag); + if (!ref || !tag) + return; + + __alloc_tag_ref_set(ref, tag); +} + +static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) +{ + alloc_tag_add_check(ref, tag); + if (!ref || !tag) + return; + + __alloc_tag_ref_set(ref, tag); + this_cpu_add(tag->counters->bytes, bytes); +} + +static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) +{ + struct alloc_tag *tag; + + alloc_tag_sub_check(ref); + if (!ref || !ref->ct) + return; + + if (is_codetag_empty(ref)) { + ref->ct = NULL; + return; + } + + tag = ct_to_alloc_tag(ref->ct); + + this_cpu_sub(tag->counters->bytes, bytes); + this_cpu_dec(tag->counters->calls); + + ref->ct = NULL; +} + +#define alloc_tag_record(p) ((p) = current->alloc_tag) + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +#define DEFINE_ALLOC_TAG(_alloc_tag) +static inline bool mem_alloc_profiling_enabled(void) { return false; } +static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, + size_t bytes) {} +static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {} +#define alloc_tag_record(p) do {} while (0) + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + +#define alloc_hooks_tag(_tag, _do_alloc) \ +({ \ + struct alloc_tag * __maybe_unused _old = alloc_tag_save(_tag); \ + typeof(_do_alloc) _res = _do_alloc; \ + alloc_tag_restore(_tag, _old); \ + _res; \ +}) + +#define alloc_hooks(_do_alloc) \ +({ \ + DEFINE_ALLOC_TAG(_alloc_tag); \ + alloc_hooks_tag(&_alloc_tag, _do_alloc); \ +}) + +#endif /* _LINUX_ALLOC_TAG_H */ diff --git a/include/linux/codetag.h b/include/linux/codetag.h new file mode 100644 index 000000000000..c2a579ccd455 --- /dev/null +++ b/include/linux/codetag.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * code tagging framework + */ +#ifndef _LINUX_CODETAG_H +#define _LINUX_CODETAG_H + +#include <linux/types.h> + +struct codetag_iterator; +struct codetag_type; +struct codetag_module; +struct seq_buf; +struct module; + +/* + * An instance of this structure is created in a special ELF section at every + * code location being tagged. At runtime, the special section is treated as + * an array of these. + */ +struct codetag { + unsigned int flags; /* used in later patches */ + unsigned int lineno; + const char *modname; + const char *function; + const char *filename; +} __aligned(8); + +union codetag_ref { + struct codetag *ct; +}; + +struct codetag_type_desc { + const char *section; + size_t tag_size; + void (*module_load)(struct codetag_type *cttype, + struct codetag_module *cmod); + bool (*module_unload)(struct codetag_type *cttype, + struct codetag_module *cmod); +}; + +struct codetag_iterator { + struct codetag_type *cttype; + struct codetag_module *cmod; + unsigned long mod_id; + struct codetag *ct; +}; + +#ifdef MODULE +#define CT_MODULE_NAME KBUILD_MODNAME +#else +#define CT_MODULE_NAME NULL +#endif + +#define CODE_TAG_INIT { \ + .modname = CT_MODULE_NAME, \ + .function = __func__, \ + .filename = __FILE__, \ + .lineno = __LINE__, \ + .flags = 0, \ +} + +void codetag_lock_module_list(struct codetag_type *cttype, bool lock); +bool codetag_trylock_module_list(struct codetag_type *cttype); +struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype); +struct codetag *codetag_next_ct(struct codetag_iterator *iter); + +void codetag_to_text(struct seq_buf *out, struct codetag *ct); + +struct codetag_type * +codetag_register_type(const struct codetag_type_desc *desc); + +#if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) +void codetag_load_module(struct module *mod); +bool codetag_unload_module(struct module *mod); +#else +static inline void codetag_load_module(struct module *mod) {} +static inline bool codetag_unload_module(struct module *mod) { return true; } +#endif + +#endif /* _LINUX_CODETAG_H */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4abc60f04209..9ee319851b5f 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -29,7 +29,7 @@ struct dma_map_ops { unsigned long attrs); void (*free)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); - struct page *(*alloc_pages)(struct device *dev, size_t size, + struct page *(*alloc_pages_op)(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 6aeebe0a6777..b24d62bad0b3 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -725,9 +725,9 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) return __real_memchr_inv(p, c, size); } -extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup) +extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup_noprof) __realloc_size(2); -__FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) +__FORTIFY_INLINE void *kmemdup_noprof(const void * const POS0 p, size_t size, gfp_t gfp) { const size_t p_size = __struct_size(p); @@ -737,6 +737,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL); return __real_kmemdup(p, size, gfp); } +#define kmemdup(...) alloc_hooks(kmemdup_noprof(__VA_ARGS__)) /** * strcpy - Copy a string into another string buffer diff --git a/include/linux/fs.h b/include/linux/fs.h index 00fc429b0af0..034f0c918eea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3083,11 +3083,7 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap, * This must be used for allocating filesystems specific inodes to set * up the inode reclaim context correctly. */ -static inline void * -alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp) -{ - return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp); -} +#define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp) extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c775ea3c6015..450c2cbcf04b 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -6,6 +6,8 @@ #include <linux/mmzone.h> #include <linux/topology.h> +#include <linux/alloc_tag.h> +#include <linux/sched.h> struct vm_area_struct; struct mempolicy; @@ -175,42 +177,46 @@ static inline void arch_free_page(struct page *page, int order) { } static inline void arch_alloc_page(struct page *page, int order) { } #endif -struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, +struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); -struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, +#define __alloc_pages(...) alloc_hooks(__alloc_pages_noprof(__VA_ARGS__)) + +struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); +#define __folio_alloc(...) alloc_hooks(__folio_alloc_noprof(__VA_ARGS__)) -unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, +unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, struct list_head *page_list, struct page **page_array); +#define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__)) -unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, +unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array); +#define alloc_pages_bulk_array_mempolicy(...) \ + alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__)) /* Bulk allocate order-0 pages */ -static inline unsigned long -alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) -{ - return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); -} +#define alloc_pages_bulk_list(_gfp, _nr_pages, _list) \ + __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL) -static inline unsigned long -alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) -{ - return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array); -} +#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array) \ + __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array) static inline unsigned long -alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) +alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, + struct page **page_array) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); + return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, NULL, page_array); } +#define alloc_pages_bulk_array_node(...) \ + alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__)) + static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) { gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); @@ -230,82 +236,104 @@ static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) * online. For more general interface, see alloc_pages_node(). */ static inline struct page * -__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) +__alloc_pages_node_noprof(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); warn_if_node_offline(nid, gfp_mask); - return __alloc_pages(gfp_mask, order, nid, NULL); + return __alloc_pages_noprof(gfp_mask, order, nid, NULL); } +#define __alloc_pages_node(...) alloc_hooks(__alloc_pages_node_noprof(__VA_ARGS__)) + static inline -struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) +struct folio *__folio_alloc_node_noprof(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); warn_if_node_offline(nid, gfp); - return __folio_alloc(gfp, order, nid, NULL); + return __folio_alloc_noprof(gfp, order, nid, NULL); } +#define __folio_alloc_node(...) alloc_hooks(__folio_alloc_node_noprof(__VA_ARGS__)) + /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and * online. */ -static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, - unsigned int order) +static inline struct page *alloc_pages_node_noprof(int nid, gfp_t gfp_mask, + unsigned int order) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - return __alloc_pages_node(nid, gfp_mask, order); + return __alloc_pages_node_noprof(nid, gfp_mask, order); } +#define alloc_pages_node(...) alloc_hooks(alloc_pages_node_noprof(__VA_ARGS__)) + #ifdef CONFIG_NUMA -struct page *alloc_pages(gfp_t gfp, unsigned int order); -struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, +struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order); +struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); -struct folio *folio_alloc(gfp_t gfp, unsigned int order); -struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, +struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); +struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); #else -static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) +static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { - return alloc_pages_node(numa_node_id(), gfp_mask, order); + return alloc_pages_node_noprof(numa_node_id(), gfp_mask, order); } -static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, +static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid) { - return alloc_pages(gfp, order); + return alloc_pages_noprof(gfp, order); } -static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) +static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); } -#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \ - folio_alloc(gfp, order) +#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage) \ + folio_alloc_noprof(gfp, order) #endif + +#define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) +#define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__)) +#define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__)) +#define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__)) + #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -static inline struct page *alloc_page_vma(gfp_t gfp, + +static inline struct page *alloc_page_vma_noprof(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { - struct folio *folio = vma_alloc_folio(gfp, 0, vma, addr, false); + struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr, false); return &folio->page; } +#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) + +extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); +#define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__)) -extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); -extern unsigned long get_zeroed_page(gfp_t gfp_mask); +extern unsigned long get_zeroed_page_noprof(gfp_t gfp_mask); +#define get_zeroed_page(...) alloc_hooks(get_zeroed_page_noprof(__VA_ARGS__)) + +void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask) __alloc_size(1); +#define alloc_pages_exact(...) alloc_hooks(alloc_pages_exact_noprof(__VA_ARGS__)) -void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); -#define __get_free_page(gfp_mask) \ - __get_free_pages((gfp_mask), 0) +__meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); +#define alloc_pages_exact_nid(...) \ + alloc_hooks(alloc_pages_exact_nid_noprof(__VA_ARGS__)) + +#define __get_free_page(gfp_mask) \ + __get_free_pages((gfp_mask), 0) -#define __get_dma_pages(gfp_mask, order) \ - __get_free_pages((gfp_mask) | GFP_DMA, (order)) +#define __get_dma_pages(gfp_mask, order) \ + __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); @@ -374,10 +402,14 @@ extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC /* The below functions must be run on a range from a single zone. */ -extern int alloc_contig_range(unsigned long start, unsigned long end, +extern int alloc_contig_range_noprof(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask); -extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, - int nid, nodemask_t *nodemask); +#define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__)) + +extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, + int nid, nodemask_t *nodemask); +#define alloc_contig_pages(...) alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__)) + #endif void free_contig_range(unsigned long pfn, unsigned long nr_pages); diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 868c8fb1bbc1..e36e168d8cfd 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -53,6 +53,9 @@ enum { #ifdef CONFIG_LOCKDEP ___GFP_NOLOCKDEP_BIT, #endif +#ifdef CONFIG_SLAB_OBJ_EXT + ___GFP_NO_OBJ_EXT_BIT, +#endif ___GFP_LAST_BIT }; @@ -93,6 +96,11 @@ enum { #else #define ___GFP_NOLOCKDEP 0 #endif +#ifdef CONFIG_SLAB_OBJ_EXT +#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT) +#else +#define ___GFP_NO_OBJ_EXT 0 +#endif /* * Physical address zone modifiers (see linux/mmzone.h - low four bits) @@ -133,12 +141,15 @@ enum { * node with no fallbacks or placement policy enforcements. * * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * + * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) #define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) +#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT) /** * DOC: Watermark modifiers diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 394fd0a887ae..3dfb69f97c67 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -349,15 +349,32 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; enum page_memcg_data_flags { - /* page->memcg_data is a pointer to an objcgs vector */ - MEMCG_DATA_OBJCGS = (1UL << 0), + /* page->memcg_data is a pointer to an slabobj_ext vector */ + MEMCG_DATA_OBJEXTS = (1UL << 0), /* page has been accounted as a non-slab kernel page */ MEMCG_DATA_KMEM = (1UL << 1), /* the next bit after the last actual flag */ __NR_MEMCG_DATA_FLAGS = (1UL << 2), }; -#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) +#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS + +#else /* CONFIG_MEMCG */ + +#define __FIRST_OBJEXT_FLAG (1UL << 0) + +#endif /* CONFIG_MEMCG */ + +enum objext_flags { + /* slabobj_ext vector failed to allocate */ + OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG, + /* the next bit after the last actual flag */ + __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), +}; + +#define OBJEXTS_FLAGS_MASK (__NR_OBJEXTS_FLAGS - 1) + +#ifdef CONFIG_MEMCG static inline bool folio_memcg_kmem(struct folio *folio); @@ -388,10 +405,10 @@ static inline struct mem_cgroup *__folio_memcg(struct folio *folio) unsigned long memcg_data = folio->memcg_data; VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); - VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio); VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -409,10 +426,10 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) unsigned long memcg_data = folio->memcg_data; VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); - VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio); VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); - return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct obj_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -469,11 +486,11 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg; - objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); return obj_cgroup_memcg(objcg); } - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -506,17 +523,17 @@ static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) */ unsigned long memcg_data = READ_ONCE(folio->memcg_data); - if (memcg_data & MEMCG_DATA_OBJCGS) + if (memcg_data & MEMCG_DATA_OBJEXTS) return NULL; if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg; - objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); return obj_cgroup_memcg(objcg); } - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } static inline struct mem_cgroup *page_memcg_check(struct page *page) @@ -552,7 +569,7 @@ retry: static inline bool folio_memcg_kmem(struct folio *folio) { VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page); - VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJEXTS, folio); return folio->memcg_data & MEMCG_DATA_KMEM; } @@ -1632,6 +1649,19 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, } #endif /* CONFIG_MEMCG */ +/* + * Extended information for slab objects stored as an array in page->memcg_data + * if MEMCG_DATA_OBJEXTS is set. + */ +struct slabobj_ext { +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup *objcg; +#endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + union codetag_ref ref; +#endif +} __aligned(8); + static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { __mod_lruvec_kmem_state(p, idx, 1); diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 16c5cc807ff6..7b151441341b 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -5,6 +5,8 @@ #ifndef _LINUX_MEMPOOL_H #define _LINUX_MEMPOOL_H +#include <linux/sched.h> +#include <linux/alloc_tag.h> #include <linux/wait.h> #include <linux/compiler.h> @@ -39,18 +41,32 @@ void mempool_exit(mempool_t *pool); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id); -int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + +int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); +#define mempool_init(...) \ + alloc_hooks(mempool_init_noprof(__VA_ARGS__)) extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); -extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, + +extern mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int nid); +#define mempool_create_node(...) \ + alloc_hooks(mempool_create_node_noprof(__VA_ARGS__)) + +#define mempool_create(_min_nr, _alloc_fn, _free_fn, _pool_data) \ + mempool_create_node(_min_nr, _alloc_fn, _free_fn, _pool_data, \ + GFP_KERNEL, NUMA_NO_NODE) extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); -extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; + +extern void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) __malloc; +#define mempool_alloc(...) \ + alloc_hooks(mempool_alloc_noprof(__VA_ARGS__)) + extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc; extern void mempool_free(void *element, mempool_t *pool); @@ -62,19 +78,10 @@ extern void mempool_free(void *element, mempool_t *pool); void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); -static inline int -mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc) -{ - return mempool_init(pool, min_nr, mempool_alloc_slab, - mempool_free_slab, (void *) kc); -} - -static inline mempool_t * -mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) -{ - return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, - (void *) kc); -} +#define mempool_init_slab_pool(_pool, _min_nr, _kc) \ + mempool_init(_pool, (_min_nr), mempool_alloc_slab, mempool_free_slab, (void *)(_kc)) +#define mempool_create_slab_pool(_min_nr, _kc) \ + mempool_create((_min_nr), mempool_alloc_slab, mempool_free_slab, (void *)(_kc)) /* * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the @@ -83,17 +90,12 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); void mempool_kfree(void *element, void *pool_data); -static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size) -{ - return mempool_init(pool, min_nr, mempool_kmalloc, - mempool_kfree, (void *) size); -} - -static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) -{ - return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, - (void *) size); -} +#define mempool_init_kmalloc_pool(_pool, _min_nr, _size) \ + mempool_init(_pool, (_min_nr), mempool_kmalloc, mempool_kfree, \ + (void *)(unsigned long)(_size)) +#define mempool_create_kmalloc_pool(_min_nr, _size) \ + mempool_create((_min_nr), mempool_kmalloc, mempool_kfree, \ + (void *)(unsigned long)(_size)) void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data); void mempool_kvfree(void *element, void *pool_data); @@ -115,16 +117,11 @@ static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size) void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data); void mempool_free_pages(void *element, void *pool_data); -static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order) -{ - return mempool_init(pool, min_nr, mempool_alloc_pages, - mempool_free_pages, (void *)(long)order); -} - -static inline mempool_t *mempool_create_page_pool(int min_nr, int order) -{ - return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages, - (void *)(long)order); -} +#define mempool_init_page_pool(_pool, _min_nr, _order) \ + mempool_init(_pool, (_min_nr), mempool_alloc_pages, \ + mempool_free_pages, (void *)(long)(_order)) +#define mempool_create_page_pool(_min_nr, _order) \ + mempool_create((_min_nr), mempool_alloc_pages, \ + mempool_free_pages, (void *)(long)(_order)) #endif /* _LINUX_MEMPOOL_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 0436b919f1c7..150de10e0825 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,7 @@ #include <linux/errno.h> #include <linux/mmdebug.h> #include <linux/gfp.h> +#include <linux/pgalloc_tag.h> #include <linux/bug.h> #include <linux/list.h> #include <linux/mmzone.h> @@ -3132,6 +3133,14 @@ extern void reserve_bootmem_region(phys_addr_t start, /* Free the reserved page into the buddy system, so it gets managed. */ static inline void free_reserved_page(struct page *page) { + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } ClearPageReserved(page); init_page_count(page); __free_page(page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5240bd7bca33..4ae4684d1add 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -169,7 +169,7 @@ struct page { /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; -#ifdef CONFIG_MEMCG +#ifdef CONFIG_SLAB_OBJ_EXT unsigned long memcg_data; #endif @@ -331,7 +331,7 @@ struct folio { }; atomic_t _mapcount; atomic_t _refcount; -#ifdef CONFIG_MEMCG +#ifdef CONFIG_SLAB_OBJ_EXT unsigned long memcg_data; #endif #if defined(WANT_PAGE_VIRTUAL) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index be98564191e6..07e0656898f9 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -4,7 +4,6 @@ #include <linux/types.h> #include <linux/stacktrace.h> -#include <linux/stackdepot.h> struct pglist_data; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2df35e65557d..35636e67e2e1 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -542,14 +542,17 @@ static inline void *detach_page_private(struct page *page) #endif #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); #else -static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) +static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) { - return folio_alloc(gfp, order); + return folio_alloc_noprof(gfp, order); } #endif +#define filemap_alloc_folio(...) \ + alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__)) + static inline struct page *__page_cache_alloc(gfp_t gfp) { return &filemap_alloc_folio(gfp, 0)->page; diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 30581e2e04cc..5802e1deef24 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -4,6 +4,8 @@ #ifndef _PDS_COMMON_H_ #define _PDS_COMMON_H_ +#include <linux/notifier.h> + #define PDS_CORE_DRV_NAME "pds_core" /* the device's internal addressing uses up to 52 bits */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8c677f185901..e54921c79c9a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -2,6 +2,7 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H +#include <linux/alloc_tag.h> #include <linux/mmdebug.h> #include <linux/preempt.h> #include <linux/smp.h> @@ -9,12 +10,17 @@ #include <linux/pfn.h> #include <linux/init.h> #include <linux/cleanup.h> +#include <linux/sched.h> #include <asm/percpu.h> /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES +#ifdef CONFIG_MEM_ALLOC_PROFILING +#define PERCPU_MODULE_RESERVE (8 << 12) +#else #define PERCPU_MODULE_RESERVE (8 << 10) +#endif #else #define PERCPU_MODULE_RESERVE 0 #endif @@ -121,7 +127,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); #endif -extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1); extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); @@ -129,14 +134,16 @@ extern bool is_kernel_percpu_address(unsigned long addr); extern void __init setup_per_cpu_areas(void); #endif -extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); -extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); -extern void free_percpu(void __percpu *__pdata); +extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, + gfp_t gfp) __alloc_size(1); extern size_t pcpu_alloc_size(void __percpu *__pdata); -DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) - -extern phys_addr_t per_cpu_ptr_to_phys(void *addr); +#define __alloc_percpu_gfp(_size, _align, _gfp) \ + alloc_hooks(pcpu_alloc_noprof(_size, _align, false, _gfp)) +#define __alloc_percpu(_size, _align) \ + alloc_hooks(pcpu_alloc_noprof(_size, _align, false, GFP_KERNEL)) +#define __alloc_reserved_percpu(_size, _align) \ + alloc_hooks(pcpu_alloc_noprof(_size, _align, true, GFP_KERNEL)) #define alloc_percpu_gfp(type, gfp) \ (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \ @@ -145,6 +152,12 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) +extern void free_percpu(void __percpu *__pdata); + +DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) + +extern phys_addr_t per_cpu_ptr_to_phys(void *addr); + extern unsigned long pcpu_nr_pages(void); #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h new file mode 100644 index 000000000000..62d8dad74b37 --- /dev/null +++ b/include/linux/pgalloc_tag.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * page allocation tagging + */ +#ifndef _LINUX_PGALLOC_TAG_H +#define _LINUX_PGALLOC_TAG_H + +#include <linux/alloc_tag.h> + +#ifdef CONFIG_MEM_ALLOC_PROFILING + +#include <linux/page_ext.h> + +extern struct page_ext_operations page_alloc_tagging_ops; +extern struct page_ext *page_ext_get(struct page *page); +extern void page_ext_put(struct page_ext *page_ext); + +static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) +{ + return (void *)page_ext + page_alloc_tagging_ops.offset; +} + +static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) +{ + return (void *)ref - page_alloc_tagging_ops.offset; +} + +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline union codetag_ref *get_page_tag_ref(struct page *page) +{ + if (page) { + struct page_ext *page_ext = page_ext_get(page); + + if (page_ext) + return codetag_ref_from_page_ext(page_ext); + } + return NULL; +} + +static inline void put_page_tag_ref(union codetag_ref *ref) +{ + page_ext_put(page_ext_from_codetag_ref(ref)); +} + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE * nr); + put_page_tag_ref(ref); + } + } +} + +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_sub(ref, PAGE_SIZE * nr); + put_page_tag_ref(ref); + } + } +} + +static inline void pgalloc_tag_split(struct page *page, unsigned int nr) +{ + int i; + struct page_ext *page_ext; + union codetag_ref *ref; + struct alloc_tag *tag; + + if (!mem_alloc_profiling_enabled()) + return; + + page_ext = page_ext_get(page); + if (unlikely(!page_ext)) + return; + + ref = codetag_ref_from_page_ext(page_ext); + if (!ref->ct) + goto out; + + tag = ct_to_alloc_tag(ref->ct); + page_ext = page_ext_next(page_ext); + for (i = 1; i < nr; i++) { + /* Set new reference to point to the original tag */ + alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag); + page_ext = page_ext_next(page_ext); + } +out: + page_ext_put(page_ext); +} + +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +{ + struct alloc_tag *tag = NULL; + + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + alloc_tag_sub_check(ref); + if (ref && ref->ct) + tag = ct_to_alloc_tag(ref->ct); + put_page_tag_ref(ref); + } + + return tag; +} + +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) +{ + if (mem_alloc_profiling_enabled() && tag) + this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); +} + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } +static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) {} +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} +static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + +#endif /* _LINUX_PGALLOC_TAG_H */ diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index b6f3797277ff..015c8298bebc 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -9,6 +9,7 @@ #ifndef _LINUX_RHASHTABLE_TYPES_H #define _LINUX_RHASHTABLE_TYPES_H +#include <linux/alloc_tag.h> #include <linux/atomic.h> #include <linux/compiler.h> #include <linux/mutex.h> @@ -88,6 +89,9 @@ struct rhashtable { struct mutex mutex; spinlock_t lock; atomic_t nelems; +#ifdef CONFIG_MEM_ALLOC_PROFILING + struct alloc_tag *alloc_tag; +#endif }; /** @@ -127,9 +131,12 @@ struct rhashtable_iter { bool end_of_table; }; -int rhashtable_init(struct rhashtable *ht, +int rhashtable_init_noprof(struct rhashtable *ht, const struct rhashtable_params *params); -int rhltable_init(struct rhltable *hlt, +#define rhashtable_init(...) alloc_hooks(rhashtable_init_noprof(__VA_ARGS__)) + +int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params); +#define rhltable_init(...) alloc_hooks(rhltable_init_noprof(__VA_ARGS__)) #endif /* _LINUX_RHASHTABLE_TYPES_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3c2abbc587b4..4118b3f959c3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -770,6 +770,10 @@ struct task_struct { unsigned int flags; unsigned int ptrace; +#ifdef CONFIG_MEM_ALLOC_PROFILING + struct alloc_tag *alloc_tag; +#endif + #ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; @@ -810,6 +814,7 @@ struct task_struct { struct task_group *sched_task_group; #endif + #ifdef CONFIG_UCLAMP_TASK /* * Clamp values requested for a scheduling entity. @@ -2187,4 +2192,23 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } extern void sched_set_stop_task(int cpu, struct task_struct *stop); +#ifdef CONFIG_MEM_ALLOC_PROFILING +static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) +{ + swap(current->alloc_tag, tag); + return tag; +} + +static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) +{ +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); +#endif + current->alloc_tag = old; +} +#else +#define alloc_tag_save(_tag) NULL +#define alloc_tag_restore(_tag, _old) do {} while (0) +#endif + #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index e53cbfa18325..373862f17694 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -56,6 +56,9 @@ enum _slab_flag_bits { #endif _SLAB_OBJECT_POISON, _SLAB_CMPXCHG_DOUBLE, +#ifdef CONFIG_SLAB_OBJ_EXT + _SLAB_NO_OBJ_EXT, +#endif _SLAB_FLAGS_LAST_BIT }; @@ -202,6 +205,13 @@ enum _slab_flag_bits { #endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ +/* Slab created using create_boot_cache */ +#ifdef CONFIG_SLAB_OBJ_EXT +#define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) +#else +#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED +#endif + /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * @@ -261,7 +271,10 @@ int kmem_cache_shrink(struct kmem_cache *s); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2); +void * __must_check krealloc_noprof(const void *objp, size_t new_size, + gfp_t flags) __realloc_size(2); +#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) + void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); @@ -513,7 +526,10 @@ static __always_inline unsigned int __kmalloc_index(size_t size, static_assert(PAGE_SHIFT <= 20); #define kmalloc_index(s) __kmalloc_index(s, true) -void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); +#include <linux/alloc_tag.h> + +void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); +#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__)) /** * kmem_cache_alloc - Allocate an object @@ -525,9 +541,14 @@ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_siz * * Return: pointer to the new object or %NULL in case of error */ -void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; -void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, - gfp_t gfpflags) __assume_slab_alignment __malloc; +void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, + gfp_t flags) __assume_slab_alignment __malloc; +#define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__)) + +void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) + void kmem_cache_free(struct kmem_cache *s, void *objp); /* @@ -538,29 +559,40 @@ void kmem_cache_free(struct kmem_cache *s, void *objp); * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); -int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p); + +int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); +#define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) static __always_inline void kfree_bulk(size_t size, void **p) { kmem_cache_free_bulk(NULL, size, p); } -void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment +void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); -void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment - __malloc; +#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__)) -void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) +void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, + int node) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) + +void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size) __assume_kmalloc_alignment __alloc_size(3); -void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_kmalloc_alignment +void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) __assume_kmalloc_alignment __alloc_size(4); -void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment +#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__)) + +#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__)) + +void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); +#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__)) -void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment +void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment __alloc_size(1); +#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__)) /** * kmalloc - allocate kernel memory @@ -616,37 +648,39 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * Try really hard to succeed the allocation but fail * eventually. */ -static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) +static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large(size, flags); + return kmalloc_large_noprof(size, flags); index = kmalloc_index(size); - return kmalloc_trace( + return kmalloc_trace_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } - return __kmalloc(size, flags); + return __kmalloc_noprof(size, flags); } +#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) -static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large_node(size, flags, node); + return kmalloc_large_node_noprof(size, flags, node); index = kmalloc_index(size); - return kmalloc_node_trace( + return kmalloc_node_trace_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } - return __kmalloc_node(size, flags, node); + return __kmalloc_node_noprof(size, flags, node); } +#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) /** * kmalloc_array - allocate memory for an array. @@ -654,16 +688,17 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) - return kmalloc(bytes, flags); - return __kmalloc(bytes, flags); + return kmalloc_noprof(bytes, flags); + return kmalloc_noprof(bytes, flags); } +#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) /** * krealloc_array - reallocate memory for an array. @@ -672,18 +707,19 @@ static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_ * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) */ -static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p, - size_t new_n, - size_t new_size, - gfp_t flags) +static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, + size_t new_n, + size_t new_size, + gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) return NULL; - return krealloc(p, bytes, flags); + return krealloc_noprof(p, bytes, flags); } +#define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) /** * kcalloc - allocate memory for an array. The memory is set to zero. @@ -691,16 +727,12 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p, * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags) -{ - return kmalloc_array(n, size, flags | __GFP_ZERO); -} +#define kcalloc(_n, _size, _flags) kmalloc_array(_n, _size, (_flags) | __GFP_ZERO) -void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, +void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, unsigned long caller) __alloc_size(1); -#define kmalloc_node_track_caller(size, flags, node) \ - __kmalloc_node_track_caller(size, flags, node, \ - _RET_IP_) +#define kmalloc_node_track_caller(...) \ + alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) /* * kmalloc_track_caller is a special version of kmalloc that records the @@ -710,11 +742,9 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, * allocator where we care about the real place the memory allocation * request comes from. */ -#define kmalloc_track_caller(size, flags) \ - __kmalloc_node_track_caller(size, flags, \ - NUMA_NO_NODE, _RET_IP_) +#define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) -static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, +static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { size_t bytes; @@ -722,75 +752,56 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) - return kmalloc_node(bytes, flags, node); - return __kmalloc_node(bytes, flags, node); + return kmalloc_node_noprof(bytes, flags, node); + return __kmalloc_node_noprof(bytes, flags, node); } +#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) -static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) -{ - return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); -} +#define kcalloc_node(_n, _size, _flags, _node) \ + kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node) /* * Shortcuts */ -static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) -{ - return kmem_cache_alloc(k, flags | __GFP_ZERO); -} +#define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) /** * kzalloc - allocate memory. The memory is set to zero. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags) +static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) { - return kmalloc(size, flags | __GFP_ZERO); + return kmalloc_noprof(size, flags | __GFP_ZERO); } +#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) +#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -/** - * kzalloc_node - allocate zeroed memory from a particular memory node. - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kmalloc). - * @node: memory node from which to allocate - */ -static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node) -{ - return kmalloc_node(size, flags | __GFP_ZERO, node); -} +extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1); +#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) -extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1); -static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags) -{ - return kvmalloc_node(size, flags, NUMA_NO_NODE); -} -static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node) -{ - return kvmalloc_node(size, flags | __GFP_ZERO, node); -} -static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags) -{ - return kvmalloc(size, flags | __GFP_ZERO); -} +#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) +#define kvzalloc(_size, _flags) kvmalloc(_size, _flags|__GFP_ZERO) -static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, _flags|__GFP_ZERO, _node) + +static inline __alloc_size(1, 2) void *kvmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - return kvmalloc(bytes, flags); + return kvmalloc_node_noprof(bytes, flags, NUMA_NO_NODE); } -static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags) -{ - return kvmalloc_array(n, size, flags | __GFP_ZERO); -} +#define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) +#define kvcalloc(_n, _size, _flags) kvmalloc_array(_n, _size, _flags|__GFP_ZERO) -extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) +extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags) __realloc_size(3); +#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) + extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T)) diff --git a/include/linux/string.h b/include/linux/string.h index 9ba8b4597009..793c27ad7c0d 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -282,7 +282,9 @@ extern void kfree_const(const void *x); extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); +extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_size(2); +#define kmemdup(...) alloc_hooks(kmemdup_noprof(__VA_ARGS__)) + extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 98ea90e90439..e4a631ec430b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -2,6 +2,8 @@ #ifndef _LINUX_VMALLOC_H #define _LINUX_VMALLOC_H +#include <linux/alloc_tag.h> +#include <linux/sched.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/list.h> @@ -138,26 +140,54 @@ extern unsigned long vmalloc_nr_pages(void); static inline unsigned long vmalloc_nr_pages(void) { return 0; } #endif -extern void *vmalloc(unsigned long size) __alloc_size(1); -extern void *vzalloc(unsigned long size) __alloc_size(1); -extern void *vmalloc_user(unsigned long size) __alloc_size(1); -extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1); -extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1); -extern void *vmalloc_32(unsigned long size) __alloc_size(1); -extern void *vmalloc_32_user(unsigned long size) __alloc_size(1); -extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -extern void *__vmalloc_node_range(unsigned long size, unsigned long align, +extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); +#define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) + +extern void *vzalloc_noprof(unsigned long size) __alloc_size(1); +#define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__)) + +extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1); +#define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__)) + +extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1); +#define vmalloc_node(...) alloc_hooks(vmalloc_node_noprof(__VA_ARGS__)) + +extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1); +#define vzalloc_node(...) alloc_hooks(vzalloc_node_noprof(__VA_ARGS__)) + +extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1); +#define vmalloc_32(...) alloc_hooks(vmalloc_32_noprof(__VA_ARGS__)) + +extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1); +#define vmalloc_32_user(...) alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__)) + +extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); +#define __vmalloc(...) alloc_hooks(__vmalloc_noprof(__VA_ARGS__)) + +extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) __alloc_size(1); -void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, +#define __vmalloc_node_range(...) alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__)) + +void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); -void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1); +#define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) + +void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); +#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) + +extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +#define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) + +extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2); +#define vmalloc_array(...) alloc_hooks(vmalloc_array_noprof(__VA_ARGS__)) + +extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +#define __vcalloc(...) alloc_hooks(__vcalloc_noprof(__VA_ARGS__)) -extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); -extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); -extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); -extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); +extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); +#define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 2e58d5e6ac0e..d67892944193 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -11,6 +11,7 @@ #include <rdma/ib_verbs.h> #include <rdma/rdmavt_cq.h> #include <rdma/rvt-abi.h> +#include <linux/vmalloc.h> /* * Atomic bit definitions for r_aflags. */ diff --git a/init/Kconfig b/init/Kconfig index aa02aec6aa7d..10d4a638d9ae 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -929,6 +929,9 @@ config NUMA_BALANCING_DEFAULT_ENABLED If set, automatic NUMA balancing will be enabled if running on a NUMA machine. +config SLAB_OBJ_EXT + bool + menuconfig CGROUPS bool "Control Group support" select KERNFS @@ -962,6 +965,7 @@ config MEMCG bool "Memory controller" select PAGE_COUNTER select EVENTFD + select SLAB_OBJ_EXT help Provides control over the memory footprint of tasks in a cgroup. diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 58db8fd70471..5e2d51e1cdf6 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -570,9 +570,9 @@ static struct page *__dma_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); - if (!ops->alloc_pages) + if (!ops->alloc_pages_op) return NULL; - return ops->alloc_pages(dev, size, dma_handle, dir, gfp); + return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); } struct page *dma_alloc_pages(struct device *dev, size_t size, diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index 8a689b4ff4f9..2f84896a7bcb 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -82,7 +82,7 @@ static struct test_item test_items[] = { ITEM_FUNC(kallsyms_test_func_static), ITEM_FUNC(kallsyms_test_func), ITEM_FUNC(kallsyms_test_func_weak), - ITEM_FUNC(vmalloc), + ITEM_FUNC(vmalloc_noprof), ITEM_FUNC(vfree), #ifdef CONFIG_KALLSYMS_ALL ITEM_DATA(kallsyms_test_var_bss_static), diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..2d25eebc549d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -56,6 +56,7 @@ #include <linux/dynamic_debug.h> #include <linux/audit.h> #include <linux/cfi.h> +#include <linux/codetag.h> #include <linux/debugfs.h> #include <uapi/linux/module.h> #include "internal.h" @@ -1210,15 +1211,19 @@ static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) return module_alloc(size); } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(void *ptr, enum mod_mem_type type, + bool unload_codetags) { + if (!unload_codetags && mod_mem_type_is_core_data(type)) + return; + if (mod_mem_use_vmalloc(type)) vfree(ptr); else module_memfree(ptr); } -static void free_mod_mem(struct module *mod) +static void free_mod_mem(struct module *mod, bool unload_codetags) { for_each_mod_mem_type(type) { struct module_memory *mod_mem = &mod->mem[type]; @@ -1229,19 +1234,27 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod_mem->base, type, + unload_codetags); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA, unload_codetags); } /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { + bool unload_codetags; + trace_module_free(mod); + unload_codetags = codetag_unload_module(mod); + if (!unload_codetags) + pr_warn("%s: memory allocation(s) from the module still alive, cannot unload cleanly\n", + mod->name); + mod_sysfs_teardown(mod); /* @@ -1283,7 +1296,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - free_mod_mem(mod); + free_mod_mem(mod, unload_codetags); } void *__symbol_get(const char *symbol) @@ -2296,7 +2309,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_enomem: for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod->mem[t].base, t, true); return ret; } @@ -2426,7 +2439,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); - free_mod_mem(mod); + free_mod_mem(mod, true); } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -2995,6 +3008,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Get rid of temporary copy. */ free_copy(info, flags); + codetag_load_module(mod); + /* Done! */ trace_module_load(mod); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c..9d2685ec4592 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -968,6 +968,37 @@ config DEBUG_STACKOVERFLOW If in doubt, say "N". +config CODE_TAGGING + bool + select KALLSYMS + +config MEM_ALLOC_PROFILING + bool "Enable memory allocation profiling" + default n + depends on PROC_FS + depends on !DEBUG_FORCE_WEAK_PER_CPU + select CODE_TAGGING + select PAGE_EXTENSION + select SLAB_OBJ_EXT + help + Track allocation source code and record total allocation size + initiated at that code location. The mechanism can be used to track + memory leaks with a low performance and memory impact. + +config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT + bool "Enable memory allocation profiling by default" + default y + depends on MEM_ALLOC_PROFILING + +config MEM_ALLOC_PROFILING_DEBUG + bool "Memory allocation profiler debugging" + default n + depends on MEM_ALLOC_PROFILING + select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT + help + Adds warnings with helpful error messages for memory allocation + profiling. + source "lib/Kconfig.kasan" source "lib/Kconfig.kfence" source "lib/Kconfig.kmsan" diff --git a/lib/Makefile b/lib/Makefile index ffc6b2341b45..2f4e17bfb299 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -233,6 +233,9 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ of-reconfig-notifier-error-inject.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +obj-$(CONFIG_CODE_TAGGING) += codetag.o +obj-$(CONFIG_MEM_ALLOC_PROFILING) += alloc_tag.o + lib-$(CONFIG_GENERIC_BUG) += bug.o obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c new file mode 100644 index 000000000000..e24830c44783 --- /dev/null +++ b/lib/alloc_tag.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/alloc_tag.h> +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/page_ext.h> +#include <linux/proc_fs.h> +#include <linux/seq_buf.h> +#include <linux/seq_file.h> + +static struct codetag_type *alloc_tag_cttype; + +DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + mem_alloc_profiling_key); + +static void *allocinfo_start(struct seq_file *m, loff_t *pos) +{ + struct codetag_iterator *iter; + struct codetag *ct; + loff_t node = *pos; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + m->private = iter; + if (!iter) + return NULL; + + codetag_lock_module_list(alloc_tag_cttype, true); + *iter = codetag_get_ct_iter(alloc_tag_cttype); + while ((ct = codetag_next_ct(iter)) != NULL && node) + node--; + + return ct ? iter : NULL; +} + +static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos) +{ + struct codetag_iterator *iter = (struct codetag_iterator *)arg; + struct codetag *ct = codetag_next_ct(iter); + + (*pos)++; + if (!ct) + return NULL; + + return iter; +} + +static void allocinfo_stop(struct seq_file *m, void *arg) +{ + struct codetag_iterator *iter = (struct codetag_iterator *)m->private; + + if (iter) { + codetag_lock_module_list(alloc_tag_cttype, false); + kfree(iter); + } +} + +static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct) +{ + struct alloc_tag *tag = ct_to_alloc_tag(ct); + struct alloc_tag_counters counter = alloc_tag_read(tag); + s64 bytes = counter.bytes; + + seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls); + codetag_to_text(out, ct); + seq_buf_putc(out, ' '); + seq_buf_putc(out, '\n'); +} + +static int allocinfo_show(struct seq_file *m, void *arg) +{ + struct codetag_iterator *iter = (struct codetag_iterator *)arg; + char *bufp; + size_t n = seq_get_buf(m, &bufp); + struct seq_buf buf; + + seq_buf_init(&buf, bufp, n); + alloc_tag_to_text(&buf, iter->ct); + seq_commit(m, seq_buf_used(&buf)); + return 0; +} + +static const struct seq_operations allocinfo_seq_op = { + .start = allocinfo_start, + .next = allocinfo_next, + .stop = allocinfo_stop, + .show = allocinfo_show, +}; + +size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep) +{ + struct codetag_iterator iter; + struct codetag *ct; + struct codetag_bytes n; + unsigned int i, nr = 0; + + if (can_sleep) + codetag_lock_module_list(alloc_tag_cttype, true); + else if (!codetag_trylock_module_list(alloc_tag_cttype)) + return 0; + + iter = codetag_get_ct_iter(alloc_tag_cttype); + while ((ct = codetag_next_ct(&iter))) { + struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct)); + + n.ct = ct; + n.bytes = counter.bytes; + + for (i = 0; i < nr; i++) + if (n.bytes > tags[i].bytes) + break; + + if (i < count) { + nr -= nr == count; + memmove(&tags[i + 1], + &tags[i], + sizeof(tags[0]) * (nr - i)); + nr++; + tags[i] = n; + } + } + + codetag_lock_module_list(alloc_tag_cttype, false); + + return nr; +} + +static void __init procfs_init(void) +{ + proc_create_seq("allocinfo", 0444, NULL, &allocinfo_seq_op); +} + +static bool alloc_tag_module_unload(struct codetag_type *cttype, + struct codetag_module *cmod) +{ + struct codetag_iterator iter = codetag_get_ct_iter(cttype); + struct alloc_tag_counters counter; + bool module_unused = true; + struct alloc_tag *tag; + struct codetag *ct; + + for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) { + if (iter.cmod != cmod) + continue; + + tag = ct_to_alloc_tag(ct); + counter = alloc_tag_read(tag); + + if (WARN(counter.bytes, + "%s:%u module %s func:%s has %llu allocated at module unload", + ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes)) + module_unused = false; + } + + return module_unused; +} + +#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT +static bool mem_profiling_support __meminitdata = true; +#else +static bool mem_profiling_support __meminitdata; +#endif + +static int __init setup_early_mem_profiling(char *str) +{ + bool enable; + + if (!str || !str[0]) + return -EINVAL; + + if (!strncmp(str, "never", 5)) { + enable = false; + mem_profiling_support = false; + } else { + int res; + + res = kstrtobool(str, &enable); + if (res) + return res; + + mem_profiling_support = true; + } + + if (enable != static_key_enabled(&mem_alloc_profiling_key)) { + if (enable) + static_branch_enable(&mem_alloc_profiling_key); + else + static_branch_disable(&mem_alloc_profiling_key); + } + + return 0; +} +early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling); + +static __init bool need_page_alloc_tagging(void) +{ + return mem_profiling_support; +} + +static __init void init_page_alloc_tagging(void) +{ +} + +struct page_ext_operations page_alloc_tagging_ops = { + .size = sizeof(union codetag_ref), + .need = need_page_alloc_tagging, + .init = init_page_alloc_tagging, +}; +EXPORT_SYMBOL(page_alloc_tagging_ops); + +static struct ctl_table memory_allocation_profiling_sysctls[] = { + { + .procname = "mem_profiling", + .data = &mem_alloc_profiling_key, +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + .mode = 0444, +#else + .mode = 0644, +#endif + .proc_handler = proc_do_static_key, + }, + { } +}; + +static int __init alloc_tag_init(void) +{ + const struct codetag_type_desc desc = { + .section = "alloc_tags", + .tag_size = sizeof(struct alloc_tag), + .module_unload = alloc_tag_module_unload, + }; + + alloc_tag_cttype = codetag_register_type(&desc); + if (IS_ERR_OR_NULL(alloc_tag_cttype)) + return PTR_ERR(alloc_tag_cttype); + + if (!mem_profiling_support) + memory_allocation_profiling_sysctls[0].mode = 0444; + register_sysctl_init("vm", memory_allocation_profiling_sysctls); + procfs_init(); + + return 0; +} +module_init(alloc_tag_init); diff --git a/lib/codetag.c b/lib/codetag.c new file mode 100644 index 000000000000..5ace625f2328 --- /dev/null +++ b/lib/codetag.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/codetag.h> +#include <linux/idr.h> +#include <linux/kallsyms.h> +#include <linux/module.h> +#include <linux/seq_buf.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +struct codetag_type { + struct list_head link; + unsigned int count; + struct idr mod_idr; + struct rw_semaphore mod_lock; /* protects mod_idr */ + struct codetag_type_desc desc; +}; + +struct codetag_range { + struct codetag *start; + struct codetag *stop; +}; + +struct codetag_module { + struct module *mod; + struct codetag_range range; +}; + +static DEFINE_MUTEX(codetag_lock); +static LIST_HEAD(codetag_types); + +void codetag_lock_module_list(struct codetag_type *cttype, bool lock) +{ + if (lock) + down_read(&cttype->mod_lock); + else + up_read(&cttype->mod_lock); +} + +bool codetag_trylock_module_list(struct codetag_type *cttype) +{ + return down_read_trylock(&cttype->mod_lock) != 0; +} + +struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype) +{ + struct codetag_iterator iter = { + .cttype = cttype, + .cmod = NULL, + .mod_id = 0, + .ct = NULL, + }; + + return iter; +} + +static inline struct codetag *get_first_module_ct(struct codetag_module *cmod) +{ + return cmod->range.start < cmod->range.stop ? cmod->range.start : NULL; +} + +static inline +struct codetag *get_next_module_ct(struct codetag_iterator *iter) +{ + struct codetag *res = (struct codetag *) + ((char *)iter->ct + iter->cttype->desc.tag_size); + + return res < iter->cmod->range.stop ? res : NULL; +} + +struct codetag *codetag_next_ct(struct codetag_iterator *iter) +{ + struct codetag_type *cttype = iter->cttype; + struct codetag_module *cmod; + struct codetag *ct; + + lockdep_assert_held(&cttype->mod_lock); + + if (unlikely(idr_is_empty(&cttype->mod_idr))) + return NULL; + + ct = NULL; + while (true) { + cmod = idr_find(&cttype->mod_idr, iter->mod_id); + + /* If module was removed move to the next one */ + if (!cmod) + cmod = idr_get_next_ul(&cttype->mod_idr, + &iter->mod_id); + + /* Exit if no more modules */ + if (!cmod) + break; + + if (cmod != iter->cmod) { + iter->cmod = cmod; + ct = get_first_module_ct(cmod); + } else + ct = get_next_module_ct(iter); + + if (ct) + break; + + iter->mod_id++; + } + + iter->ct = ct; + return ct; +} + +void codetag_to_text(struct seq_buf *out, struct codetag *ct) +{ + if (ct->modname) + seq_buf_printf(out, "%s:%u [%s] func:%s", + ct->filename, ct->lineno, + ct->modname, ct->function); + else + seq_buf_printf(out, "%s:%u func:%s", + ct->filename, ct->lineno, ct->function); +} + +static inline size_t range_size(const struct codetag_type *cttype, + const struct codetag_range *range) +{ + return ((char *)range->stop - (char *)range->start) / + cttype->desc.tag_size; +} + +#ifdef CONFIG_MODULES +static void *get_symbol(struct module *mod, const char *prefix, const char *name) +{ + DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN); + const char *buf; + void *ret; + + seq_buf_printf(&sb, "%s%s", prefix, name); + if (seq_buf_has_overflowed(&sb)) + return NULL; + + buf = seq_buf_str(&sb); + preempt_disable(); + ret = mod ? + (void *)find_kallsyms_symbol_value(mod, buf) : + (void *)kallsyms_lookup_name(buf); + preempt_enable(); + + return ret; +} + +static struct codetag_range get_section_range(struct module *mod, + const char *section) +{ + return (struct codetag_range) { + get_symbol(mod, "__start_", section), + get_symbol(mod, "__stop_", section), + }; +} + +static int codetag_module_init(struct codetag_type *cttype, struct module *mod) +{ + struct codetag_range range; + struct codetag_module *cmod; + int err; + + range = get_section_range(mod, cttype->desc.section); + if (!range.start || !range.stop) { + pr_warn("Failed to load code tags of type %s from the module %s\n", + cttype->desc.section, + mod ? mod->name : "(built-in)"); + return -EINVAL; + } + + /* Ignore empty ranges */ + if (range.start == range.stop) + return 0; + + BUG_ON(range.start > range.stop); + + cmod = kmalloc(sizeof(*cmod), GFP_KERNEL); + if (unlikely(!cmod)) + return -ENOMEM; + + cmod->mod = mod; + cmod->range = range; + + down_write(&cttype->mod_lock); + err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); + if (err >= 0) { + cttype->count += range_size(cttype, &range); + if (cttype->desc.module_load) + cttype->desc.module_load(cttype, cmod); + } + up_write(&cttype->mod_lock); + + if (err < 0) { + kfree(cmod); + return err; + } + + return 0; +} + +void codetag_load_module(struct module *mod) +{ + struct codetag_type *cttype; + + if (!mod) + return; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) + codetag_module_init(cttype, mod); + mutex_unlock(&codetag_lock); +} + +bool codetag_unload_module(struct module *mod) +{ + struct codetag_type *cttype; + bool unload_ok = true; + + if (!mod) + return true; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + struct codetag_module *found = NULL; + struct codetag_module *cmod; + unsigned long mod_id, tmp; + + down_write(&cttype->mod_lock); + idr_for_each_entry_ul(&cttype->mod_idr, cmod, tmp, mod_id) { + if (cmod->mod && cmod->mod == mod) { + found = cmod; + break; + } + } + if (found) { + if (cttype->desc.module_unload) + if (!cttype->desc.module_unload(cttype, cmod)) + unload_ok = false; + + cttype->count -= range_size(cttype, &cmod->range); + idr_remove(&cttype->mod_idr, mod_id); + kfree(cmod); + } + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); + + return unload_ok; +} + +#else /* CONFIG_MODULES */ +static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { return 0; } +#endif /* CONFIG_MODULES */ + +struct codetag_type * +codetag_register_type(const struct codetag_type_desc *desc) +{ + struct codetag_type *cttype; + int err; + + BUG_ON(desc->tag_size <= 0); + + cttype = kzalloc(sizeof(*cttype), GFP_KERNEL); + if (unlikely(!cttype)) + return ERR_PTR(-ENOMEM); + + cttype->desc = *desc; + idr_init(&cttype->mod_idr); + init_rwsem(&cttype->mod_lock); + + err = codetag_module_init(cttype, NULL); + if (unlikely(err)) { + kfree(cttype); + return ERR_PTR(err); + } + + mutex_lock(&codetag_lock); + list_add_tail(&cttype->link, &codetag_types); + mutex_unlock(&codetag_lock); + + return cttype; +} diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6ae2ba8e06a2..35d841cf2b43 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -130,7 +130,8 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, if (ntbl) return ntbl; - ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); + ntbl = alloc_hooks_tag(ht->alloc_tag, + kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO)); if (ntbl && leaf) { for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) @@ -157,7 +158,8 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, size = sizeof(*tbl) + sizeof(tbl->buckets[0]); - tbl = kzalloc(size, gfp); + tbl = alloc_hooks_tag(ht->alloc_tag, + kmalloc_noprof(size, gfp|__GFP_ZERO)); if (!tbl) return NULL; @@ -181,7 +183,9 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, int i; static struct lock_class_key __key; - tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp); + tbl = alloc_hooks_tag(ht->alloc_tag, + kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets), + gfp|__GFP_ZERO, NUMA_NO_NODE)); size = nbuckets; @@ -975,7 +979,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) } /** - * rhashtable_init - initialize a new hash table + * rhashtable_init_noprof - initialize a new hash table * @ht: hash table to be initialized * @params: configuration parameters * @@ -1016,7 +1020,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * .obj_hashfn = my_hash_fn, * }; */ -int rhashtable_init(struct rhashtable *ht, +int rhashtable_init_noprof(struct rhashtable *ht, const struct rhashtable_params *params) { struct bucket_table *tbl; @@ -1031,6 +1035,8 @@ int rhashtable_init(struct rhashtable *ht, spin_lock_init(&ht->lock); memcpy(&ht->p, params, sizeof(*params)); + alloc_tag_record(ht->alloc_tag); + if (params->min_size) ht->p.min_size = roundup_pow_of_two(params->min_size); @@ -1076,26 +1082,26 @@ int rhashtable_init(struct rhashtable *ht, return 0; } -EXPORT_SYMBOL_GPL(rhashtable_init); +EXPORT_SYMBOL_GPL(rhashtable_init_noprof); /** - * rhltable_init - initialize a new hash list table + * rhltable_init_noprof - initialize a new hash list table * @hlt: hash list table to be initialized * @params: configuration parameters * * Initializes a new hash list table. * - * See documentation for rhashtable_init. + * See documentation for rhashtable_init_noprof. */ -int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) +int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params) { int err; - err = rhashtable_init(&hlt->ht, params); + err = rhashtable_init_noprof(&hlt->ht, params); hlt->ht.rhlist = true; return err; } -EXPORT_SYMBOL_GPL(rhltable_init); +EXPORT_SYMBOL_GPL(rhltable_init_noprof); static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, void (*free_fn)(void *ptr, void *arg), diff --git a/mm/compaction.c b/mm/compaction.c index 807b58e6eb68..70b01190d2f3 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1851,7 +1851,7 @@ static void isolate_freepages(struct compact_control *cc) * This is a migrate-callback that "allocates" freepages by taking pages * from the isolated freelists in the block we are migrating to. */ -static struct folio *compaction_alloc(struct folio *src, unsigned long data) +static struct folio *compaction_alloc_noprof(struct folio *src, unsigned long data) { struct compact_control *cc = (struct compact_control *)data; struct folio *dst; @@ -1898,6 +1898,11 @@ again: return page_rmappable_folio(&dst->page); } +static struct folio *compaction_alloc(struct folio *src, unsigned long data) +{ + return alloc_hooks(compaction_alloc_noprof(src, data)); +} + /* * This is a migrate-callback that "frees" freepages back to the isolated * freelist. All pages on the freelist are from the same zone, so there is no diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 65c19025da3d..f1c9a2c5abc0 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -30,6 +30,7 @@ #include <linux/start_kernel.h> #include <linux/sched/mm.h> #include <linux/io.h> +#include <linux/vmalloc.h> #include <asm/cacheflush.h> #include <asm/pgalloc.h> diff --git a/mm/filemap.c b/mm/filemap.c index 30de18c4fd28..60890758d660 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -966,7 +966,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, EXPORT_SYMBOL_GPL(filemap_add_folio); #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) { int n; struct folio *folio; @@ -981,9 +981,9 @@ struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) return folio; } - return folio_alloc(gfp, order); + return folio_alloc_noprof(gfp, order); } -EXPORT_SYMBOL(filemap_alloc_folio); +EXPORT_SYMBOL(filemap_alloc_folio_noprof); #endif /* diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9859aa4f7553..673c46ebbc4f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -38,6 +38,7 @@ #include <linux/sched/sysctl.h> #include <linux/memory-tiers.h> #include <linux/compat.h> +#include <linux/pgalloc_tag.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -2946,6 +2947,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* Caller disabled irqs, so they are still disabled here */ split_page_owner(head, order, new_order); + pgalloc_tag_split(head, 1 << order); /* See comment in __split_huge_page_tail() */ if (folio_test_anon(folio)) { diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 8350f5c06f2e..964b8482275b 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -595,9 +595,9 @@ static unsigned long kfence_init_pool(void) continue; __folio_set_slab(slab_folio(slab)); -#ifdef CONFIG_MEMCG - slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg | - MEMCG_DATA_OBJCGS; +#ifdef CONFIG_MEMCG_KMEM + slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts | + MEMCG_DATA_OBJEXTS; #endif } @@ -645,8 +645,8 @@ reset_slab: if (!i || (i % 2)) continue; -#ifdef CONFIG_MEMCG - slab->memcg_data = 0; +#ifdef CONFIG_MEMCG_KMEM + slab->obj_exts = 0; #endif __folio_clear_slab(slab_folio(slab)); } @@ -1139,8 +1139,8 @@ void __kfence_free(void *addr) { struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); -#ifdef CONFIG_MEMCG - KFENCE_WARN_ON(meta->objcg); +#ifdef CONFIG_MEMCG_KMEM + KFENCE_WARN_ON(meta->obj_exts.objcg); #endif /* * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index f46fbb03062b..084f5f36e8e7 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -97,8 +97,8 @@ struct kfence_metadata { struct kfence_track free_track; /* For updating alloc_covered on frees. */ u32 alloc_stack_hash; -#ifdef CONFIG_MEMCG - struct obj_cgroup *objcg; +#ifdef CONFIG_MEMCG_KMEM + struct slabobj_ext obj_exts; #endif }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fabce2b50c69..6c2fd0bd61e6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2978,13 +2978,6 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg) } #ifdef CONFIG_MEMCG_KMEM -/* - * The allocated objcg pointers array is not accounted directly. - * Moreover, it should not come from DMA buffer and is not readily - * reclaimable. So those GFP bits should be masked off. - */ -#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ - __GFP_ACCOUNT | __GFP_NOFAIL) /* * mod_objcg_mlstate() may be called with irq enabled, so @@ -3004,62 +2997,27 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, rcu_read_unlock(); } -int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, - gfp_t gfp, bool new_slab) -{ - unsigned int objects = objs_per_slab(s, slab); - unsigned long memcg_data; - void *vec; - - gfp &= ~OBJCGS_CLEAR_MASK; - vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, - slab_nid(slab)); - if (!vec) - return -ENOMEM; - - memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS; - if (new_slab) { - /* - * If the slab is brand new and nobody can yet access its - * memcg_data, no synchronization is required and memcg_data can - * be simply assigned. - */ - slab->memcg_data = memcg_data; - } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) { - /* - * If the slab is already in use, somebody can allocate and - * assign obj_cgroups in parallel. In this case the existing - * objcg vector should be reused. - */ - kfree(vec); - return 0; - } - - kmemleak_not_leak(vec); - return 0; -} - static __always_inline struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) { /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in - * slab->memcg_data. + * slab->obj_exts. */ if (folio_test_slab(folio)) { - struct obj_cgroup **objcgs; + struct slabobj_ext *obj_exts; struct slab *slab; unsigned int off; slab = folio_slab(folio); - objcgs = slab_objcgs(slab); - if (!objcgs) + obj_exts = slab_obj_exts(slab); + if (!obj_exts) return NULL; off = obj_to_index(slab->slab_cache, slab, p); - if (objcgs[off]) - return obj_cgroup_memcg(objcgs[off]); + if (obj_exts[off].objcg) + return obj_cgroup_memcg(obj_exts[off].objcg); return NULL; } @@ -3067,7 +3025,7 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) /* * folio_memcg_check() is used here, because in theory we can encounter * a folio where the slab flag has been cleared already, but - * slab->memcg_data has not been freed yet + * slab->obj_exts has not been freed yet * folio_memcg_check() will guarantee that a proper memory * cgroup pointer or NULL will be returned. */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0fe77738d971..736f77746b02 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2200,15 +2200,15 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, */ preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); - page = __alloc_pages(preferred_gfp, order, nid, nodemask); + page = __alloc_pages_noprof(preferred_gfp, order, nid, nodemask); if (!page) - page = __alloc_pages(gfp, order, nid, NULL); + page = __alloc_pages_noprof(gfp, order, nid, NULL); return page; } /** - * alloc_pages_mpol - Allocate pages according to NUMA mempolicy. + * alloc_pages_mpol_noprof - Allocate pages according to NUMA mempolicy. * @gfp: GFP flags. * @order: Order of the page allocation. * @pol: Pointer to the NUMA mempolicy. @@ -2217,7 +2217,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, * * Return: The page on success or NULL if allocation fails. */ -struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, +struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *pol, pgoff_t ilx, int nid) { nodemask_t *nodemask; @@ -2248,7 +2248,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, * First, try to allocate THP only on local node, but * don't reclaim unnecessarily, just compact. */ - page = __alloc_pages_node(nid, + page = __alloc_pages_node_noprof(nid, gfp | __GFP_THISNODE | __GFP_NORETRY, order); if (page || !(gfp & __GFP_DIRECT_RECLAIM)) return page; @@ -2261,7 +2261,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, } } - page = __alloc_pages(gfp, order, nid, nodemask); + page = __alloc_pages_noprof(gfp, order, nid, nodemask); if (unlikely(pol->mode == MPOL_INTERLEAVE) && page) { /* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */ @@ -2277,7 +2277,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, } /** - * vma_alloc_folio - Allocate a folio for a VMA. + * vma_alloc_folio_noprof - Allocate a folio for a VMA. * @gfp: GFP flags. * @order: Order of the folio. * @vma: Pointer to VMA. @@ -2292,7 +2292,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, * * Return: The folio on success or NULL if allocation fails. */ -struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, +struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage) { struct mempolicy *pol; @@ -2300,15 +2300,15 @@ struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, struct page *page; pol = get_vma_policy(vma, addr, order, &ilx); - page = alloc_pages_mpol(gfp | __GFP_COMP, order, - pol, ilx, numa_node_id()); + page = alloc_pages_mpol_noprof(gfp | __GFP_COMP, order, + pol, ilx, numa_node_id()); mpol_cond_put(pol); return page_rmappable_folio(page); } -EXPORT_SYMBOL(vma_alloc_folio); +EXPORT_SYMBOL(vma_alloc_folio_noprof); /** - * alloc_pages - Allocate pages. + * alloc_pages_noprof - Allocate pages. * @gfp: GFP flags. * @order: Power of two of number of pages to allocate. * @@ -2321,7 +2321,7 @@ EXPORT_SYMBOL(vma_alloc_folio); * flags are used. * Return: The page on success or NULL if allocation fails. */ -struct page *alloc_pages(gfp_t gfp, unsigned int order) +struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order) { struct mempolicy *pol = &default_policy; @@ -2332,16 +2332,16 @@ struct page *alloc_pages(gfp_t gfp, unsigned int order) if (!in_interrupt() && !(gfp & __GFP_THISNODE)) pol = get_task_policy(current); - return alloc_pages_mpol(gfp, order, - pol, NO_INTERLEAVE_INDEX, numa_node_id()); + return alloc_pages_mpol_noprof(gfp, order, pol, NO_INTERLEAVE_INDEX, + numa_node_id()); } -EXPORT_SYMBOL(alloc_pages); +EXPORT_SYMBOL(alloc_pages_noprof); -struct folio *folio_alloc(gfp_t gfp, unsigned int order) +struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { - return page_rmappable_folio(alloc_pages(gfp | __GFP_COMP, order)); + return page_rmappable_folio(alloc_pages_noprof(gfp | __GFP_COMP, order)); } -EXPORT_SYMBOL(folio_alloc); +EXPORT_SYMBOL(folio_alloc_noprof); static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, @@ -2360,13 +2360,13 @@ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, for (i = 0; i < nodes; i++) { if (delta) { - nr_allocated = __alloc_pages_bulk(gfp, + nr_allocated = alloc_pages_bulk_noprof(gfp, interleave_nodes(pol), NULL, nr_pages_per_node + 1, NULL, page_array); delta--; } else { - nr_allocated = __alloc_pages_bulk(gfp, + nr_allocated = alloc_pages_bulk_noprof(gfp, interleave_nodes(pol), NULL, nr_pages_per_node, NULL, page_array); } @@ -2503,11 +2503,11 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); - nr_allocated = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes, + nr_allocated = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes, nr_pages, NULL, page_array); if (nr_allocated < nr_pages) - nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL, + nr_allocated += alloc_pages_bulk_noprof(gfp, numa_node_id(), NULL, nr_pages - nr_allocated, NULL, page_array + nr_allocated); return nr_allocated; @@ -2519,7 +2519,7 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, * It can accelerate memory allocation especially interleaving * allocate memory. */ -unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, +unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array) { struct mempolicy *pol = &default_policy; @@ -2543,8 +2543,8 @@ unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, nid = numa_node_id(); nodemask = policy_nodemask(gfp, pol, NO_INTERLEAVE_INDEX, &nid); - return __alloc_pages_bulk(gfp, nid, nodemask, - nr_pages, NULL, page_array); + return alloc_pages_bulk_noprof(gfp, nid, nodemask, + nr_pages, NULL, page_array); } int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) diff --git a/mm/mempool.c b/mm/mempool.c index 076c736f5f1f..602e6eba68d3 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -240,17 +240,17 @@ EXPORT_SYMBOL(mempool_init_node); * * Return: %0 on success, negative error code otherwise. */ -int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, - mempool_free_t *free_fn, void *pool_data) +int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data) { return mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, GFP_KERNEL, NUMA_NO_NODE); } -EXPORT_SYMBOL(mempool_init); +EXPORT_SYMBOL(mempool_init_noprof); /** - * mempool_create - create a memory pool + * mempool_create_node - create a memory pool * @min_nr: the minimum number of elements guaranteed to be * allocated for this pool. * @alloc_fn: user-defined element-allocation function. @@ -265,17 +265,9 @@ EXPORT_SYMBOL(mempool_init); * * Return: pointer to the created memory pool object or %NULL on error. */ -mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, - mempool_free_t *free_fn, void *pool_data) -{ - return mempool_create_node(min_nr, alloc_fn, free_fn, pool_data, - GFP_KERNEL, NUMA_NO_NODE); -} -EXPORT_SYMBOL(mempool_create); - -mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, - mempool_free_t *free_fn, void *pool_data, - gfp_t gfp_mask, int node_id) +mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, + gfp_t gfp_mask, int node_id) { mempool_t *pool; @@ -291,7 +283,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, return pool; } -EXPORT_SYMBOL(mempool_create_node); +EXPORT_SYMBOL(mempool_create_node_noprof); /** * mempool_resize - resize an existing memory pool @@ -374,7 +366,7 @@ out: EXPORT_SYMBOL(mempool_resize); /** - * mempool_alloc - allocate an element from a specific memory pool + * mempool_alloc_noprof - allocate an element from a specific memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * @gfp_mask: the usual allocation bitmask. @@ -387,7 +379,7 @@ EXPORT_SYMBOL(mempool_resize); * * Return: pointer to the allocated element or %NULL on error. */ -void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) +void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; @@ -454,7 +446,7 @@ repeat_alloc: finish_wait(&pool->wait, &wait); goto repeat_alloc; } -EXPORT_SYMBOL(mempool_alloc); +EXPORT_SYMBOL(mempool_alloc_noprof); /** * mempool_alloc_preallocated - allocate an element from preallocated elements @@ -562,7 +554,7 @@ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { struct kmem_cache *mem = pool_data; VM_BUG_ON(mem->ctor); - return kmem_cache_alloc(mem, gfp_mask); + return kmem_cache_alloc_noprof(mem, gfp_mask); } EXPORT_SYMBOL(mempool_alloc_slab); @@ -580,7 +572,7 @@ EXPORT_SYMBOL(mempool_free_slab); void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) { size_t size = (size_t)pool_data; - return kmalloc(size, gfp_mask); + return kmalloc_noprof(size, gfp_mask); } EXPORT_SYMBOL(mempool_kmalloc); @@ -610,7 +602,7 @@ EXPORT_SYMBOL(mempool_kvfree); void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) { int order = (int)(long)pool_data; - return alloc_pages(gfp_mask, order); + return alloc_pages_noprof(gfp_mask, order); } EXPORT_SYMBOL(mempool_alloc_pages); diff --git a/mm/mm_init.c b/mm/mm_init.c index 549e76af8f82..f45c2b32ba82 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -24,6 +24,7 @@ #include <linux/page_ext.h> #include <linux/pti.h> #include <linux/pgtable.h> +#include <linux/stackdepot.h> #include <linux/swap.h> #include <linux/cma.h> #include <linux/crash_dump.h> @@ -2566,7 +2567,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) void __init memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { - if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) { int nid = early_pfn_to_nid(pfn); @@ -2578,6 +2578,17 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, /* KMSAN will take care of these pages. */ return; } + + /* pages were reserved and not allocated */ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } + __free_pages_core(page, order); } diff --git a/mm/nommu.c b/mm/nommu.c index 5ec8f44e7ce9..69a6f3b4d156 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -137,28 +137,28 @@ void vfree(const void *addr) } EXPORT_SYMBOL(vfree); -void *__vmalloc(unsigned long size, gfp_t gfp_mask) +void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) { /* * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() * returns only a logical address. */ - return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); + return kmalloc_noprof(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); } -EXPORT_SYMBOL(__vmalloc); +EXPORT_SYMBOL(__vmalloc_noprof); -void *__vmalloc_node_range(unsigned long size, unsigned long align, +void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) { - return __vmalloc(size, gfp_mask); + return __vmalloc_noprof(size, gfp_mask); } -void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, +void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) { - return __vmalloc(size, gfp_mask); + return __vmalloc_noprof(size, gfp_mask); } static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) @@ -179,11 +179,11 @@ static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) return ret; } -void *vmalloc_user(unsigned long size) +void *vmalloc_user_noprof(unsigned long size) { return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO); } -EXPORT_SYMBOL(vmalloc_user); +EXPORT_SYMBOL(vmalloc_user_noprof); struct page *vmalloc_to_page(const void *addr) { @@ -217,13 +217,13 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count) * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vmalloc(unsigned long size) +void *vmalloc_noprof(unsigned long size) { - return __vmalloc(size, GFP_KERNEL); + return __vmalloc_noprof(size, GFP_KERNEL); } -EXPORT_SYMBOL(vmalloc); +EXPORT_SYMBOL(vmalloc_noprof); -void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc); +void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc_noprof); /* * vzalloc - allocate virtually contiguous memory with zero fill @@ -237,14 +237,14 @@ void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc) * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vzalloc(unsigned long size) +void *vzalloc_noprof(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_ZERO); + return __vmalloc_noprof(size, GFP_KERNEL | __GFP_ZERO); } -EXPORT_SYMBOL(vzalloc); +EXPORT_SYMBOL(vzalloc_noprof); /** - * vmalloc_node - allocate memory on a specific node + * vmalloc_node_noprof - allocate memory on a specific node * @size: allocation size * @node: numa node * @@ -254,14 +254,14 @@ EXPORT_SYMBOL(vzalloc); * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vmalloc_node(unsigned long size, int node) +void *vmalloc_node_noprof(unsigned long size, int node) { - return vmalloc(size); + return vmalloc_noprof(size); } -EXPORT_SYMBOL(vmalloc_node); +EXPORT_SYMBOL(vmalloc_node_noprof); /** - * vzalloc_node - allocate memory on a specific node with zero fill + * vzalloc_node_noprof - allocate memory on a specific node with zero fill * @size: allocation size * @node: numa node * @@ -272,27 +272,27 @@ EXPORT_SYMBOL(vmalloc_node); * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vzalloc_node(unsigned long size, int node) +void *vzalloc_node_noprof(unsigned long size, int node) { - return vzalloc(size); + return vzalloc_noprof(size); } -EXPORT_SYMBOL(vzalloc_node); +EXPORT_SYMBOL(vzalloc_node_noprof); /** - * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) + * vmalloc_32_noprof - allocate virtually contiguous memory (32bit addressable) * @size: allocation size * * Allocate enough 32bit PA addressable pages to cover @size from the * page level allocator and map them into contiguous kernel virtual space. */ -void *vmalloc_32(unsigned long size) +void *vmalloc_32_noprof(unsigned long size) { - return __vmalloc(size, GFP_KERNEL); + return __vmalloc_noprof(size, GFP_KERNEL); } -EXPORT_SYMBOL(vmalloc_32); +EXPORT_SYMBOL(vmalloc_32_noprof); /** - * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory + * vmalloc_32_user_noprof - allocate zeroed virtually contiguous 32bit memory * @size: allocation size * * The resulting memory area is 32bit addressable and zeroed so it can be @@ -301,15 +301,15 @@ EXPORT_SYMBOL(vmalloc_32); * VM_USERMAP is set on the corresponding VMA so that subsequent calls to * remap_vmalloc_range() are permissible. */ -void *vmalloc_32_user(unsigned long size) +void *vmalloc_32_user_noprof(unsigned long size) { /* * We'll have to sort out the ZONE_DMA bits for 64-bit, * but for now this can simply use vmalloc_user() directly. */ - return vmalloc_user(size); + return vmalloc_user_noprof(size); } -EXPORT_SYMBOL(vmalloc_32_user); +EXPORT_SYMBOL(vmalloc_32_user_noprof); void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 14d39f34d336..993352a5baeb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -54,6 +54,7 @@ #include <linux/khugepaged.h> #include <linux/delayacct.h> #include <linux/cacheinfo.h> +#include <linux/pgalloc_tag.h> #include <asm/div64.h> #include "internal.h" #include "shuffle.h" @@ -1101,6 +1102,7 @@ __always_inline bool free_pages_prepare(struct page *page, /* Do not let hwpoison pages hit pcplists/buddy */ reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, 1 << order); return false; } @@ -1140,6 +1142,7 @@ __always_inline bool free_pages_prepare(struct page *page, page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, 1 << order); if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), @@ -1533,6 +1536,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, set_page_owner(page, order, gfp_flags); page_table_check_alloc(page, order); + pgalloc_tag_add(page, current, 1 << order); } static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, @@ -2623,6 +2627,7 @@ void split_page(struct page *page, unsigned int order) for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); split_page_owner(page, order, 0); + pgalloc_tag_split(page, 1 << order); split_page_memcg(page, order, 0); } EXPORT_SYMBOL_GPL(split_page); @@ -4384,7 +4389,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, * * Returns the number of pages on the list or array. */ -unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, +unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, struct list_head *page_list, struct page **page_array) @@ -4520,7 +4525,7 @@ failed_irq: pcp_trylock_finish(UP_flags); failed: - page = __alloc_pages(gfp, 0, preferred_nid, nodemask); + page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask); if (page) { if (page_list) list_add(&page->lru, page_list); @@ -4531,13 +4536,13 @@ failed: goto out; } -EXPORT_SYMBOL_GPL(__alloc_pages_bulk); +EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof); /* * This is the 'heart' of the zoned buddy allocator. */ -struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, - nodemask_t *nodemask) +struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, + int preferred_nid, nodemask_t *nodemask) { struct page *page; unsigned int alloc_flags = ALLOC_WMARK_LOW; @@ -4599,38 +4604,38 @@ out: return page; } -EXPORT_SYMBOL(__alloc_pages); +EXPORT_SYMBOL(__alloc_pages_noprof); -struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, +struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask) { - struct page *page = __alloc_pages(gfp | __GFP_COMP, order, + struct page *page = __alloc_pages_noprof(gfp | __GFP_COMP, order, preferred_nid, nodemask); return page_rmappable_folio(page); } -EXPORT_SYMBOL(__folio_alloc); +EXPORT_SYMBOL(__folio_alloc_noprof); /* * Common helper functions. Never use with __GFP_HIGHMEM because the returned * address cannot represent highmem pages. Use alloc_pages and then kmap if * you need to access high mem. */ -unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) +unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order) { struct page *page; - page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order); + page = alloc_pages_noprof(gfp_mask & ~__GFP_HIGHMEM, order); if (!page) return 0; return (unsigned long) page_address(page); } -EXPORT_SYMBOL(__get_free_pages); +EXPORT_SYMBOL(get_free_pages_noprof); -unsigned long get_zeroed_page(gfp_t gfp_mask) +unsigned long get_zeroed_page_noprof(gfp_t gfp_mask) { - return __get_free_page(gfp_mask | __GFP_ZERO); + return get_free_pages_noprof(gfp_mask | __GFP_ZERO, 0); } -EXPORT_SYMBOL(get_zeroed_page); +EXPORT_SYMBOL(get_zeroed_page_noprof); /** * __free_pages - Free pages allocated with alloc_pages(). @@ -4656,12 +4661,15 @@ void __free_pages(struct page *page, unsigned int order) { /* get PageHead before we drop reference */ int head = PageHead(page); + struct alloc_tag *tag = pgalloc_tag_get(page); if (put_page_testzero(page)) free_the_page(page, order); - else if (!head) + else if (!head) { + pgalloc_tag_sub_pages(tag, (1 << order) - 1); while (order-- > 0) free_the_page(page + (1 << order), order); + } } EXPORT_SYMBOL(__free_pages); @@ -4820,6 +4828,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, struct page *last = page + nr; split_page_owner(page, order, 0); + pgalloc_tag_split(page, 1 << order); split_page_memcg(page, order, 0); while (page < --last) set_page_refcounted(last); @@ -4832,7 +4841,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, } /** - * alloc_pages_exact - allocate an exact number physically-contiguous pages. + * alloc_pages_exact_noprof - allocate an exact number physically-contiguous pages. * @size: the number of bytes to allocate * @gfp_mask: GFP flags for the allocation, must not contain __GFP_COMP * @@ -4846,7 +4855,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, * * Return: pointer to the allocated area or %NULL in case of error. */ -void *alloc_pages_exact(size_t size, gfp_t gfp_mask) +void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask) { unsigned int order = get_order(size); unsigned long addr; @@ -4854,13 +4863,13 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); - addr = __get_free_pages(gfp_mask, order); + addr = get_free_pages_noprof(gfp_mask, order); return make_alloc_exact(addr, order, size); } -EXPORT_SYMBOL(alloc_pages_exact); +EXPORT_SYMBOL(alloc_pages_exact_noprof); /** - * alloc_pages_exact_nid - allocate an exact number of physically-contiguous + * alloc_pages_exact_nid_noprof - allocate an exact number of physically-contiguous * pages on a node. * @nid: the preferred node ID where memory should be allocated * @size: the number of bytes to allocate @@ -4871,7 +4880,7 @@ EXPORT_SYMBOL(alloc_pages_exact); * * Return: pointer to the allocated area or %NULL in case of error. */ -void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) +void * __meminit alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask) { unsigned int order = get_order(size); struct page *p; @@ -4879,7 +4888,7 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); - p = alloc_pages_node(nid, gfp_mask, order); + p = alloc_pages_node_noprof(nid, gfp_mask, order); if (!p) return NULL; return make_alloc_exact((unsigned long)page_address(p), order, size); @@ -6316,7 +6325,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc, } /** - * alloc_contig_range() -- tries to allocate given range of pages + * alloc_contig_range_noprof() -- tries to allocate given range of pages * @start: start PFN to allocate * @end: one-past-the-last PFN to allocate * @migratetype: migratetype of the underlying pageblocks (either @@ -6336,7 +6345,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc, * pages which PFN is in [start, end) are allocated for the caller and * need to be freed with free_contig_range(). */ -int alloc_contig_range(unsigned long start, unsigned long end, +int alloc_contig_range_noprof(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask) { unsigned long outer_start, outer_end; @@ -6460,15 +6469,15 @@ done: undo_isolate_page_range(start, end, migratetype); return ret; } -EXPORT_SYMBOL(alloc_contig_range); +EXPORT_SYMBOL(alloc_contig_range_noprof); static int __alloc_contig_pages(unsigned long start_pfn, unsigned long nr_pages, gfp_t gfp_mask) { unsigned long end_pfn = start_pfn + nr_pages; - return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE, - gfp_mask); + return alloc_contig_range_noprof(start_pfn, end_pfn, MIGRATE_MOVABLE, + gfp_mask); } static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn, @@ -6503,7 +6512,7 @@ static bool zone_spans_last_pfn(const struct zone *zone, } /** - * alloc_contig_pages() -- tries to find and allocate contiguous range of pages + * alloc_contig_pages_noprof() -- tries to find and allocate contiguous range of pages * @nr_pages: Number of contiguous pages to allocate * @gfp_mask: GFP mask to limit search and used during compaction * @nid: Target node @@ -6523,8 +6532,8 @@ static bool zone_spans_last_pfn(const struct zone *zone, * * Return: pointer to contiguous pages on success, or NULL if not successful. */ -struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, + int nid, nodemask_t *nodemask) { unsigned long ret, pfn, flags; struct zonelist *zonelist; diff --git a/mm/page_ext.c b/mm/page_ext.c index 4548fcc66d74..e7d8f1a5589e 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -10,6 +10,7 @@ #include <linux/page_idle.h> #include <linux/page_table_check.h> #include <linux/rcupdate.h> +#include <linux/pgalloc_tag.h> /* * struct page extension @@ -82,6 +83,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = { #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + &page_alloc_tagging_ops, +#endif #ifdef CONFIG_PAGE_TABLE_CHECK &page_table_check_ops, #endif @@ -91,7 +95,16 @@ unsigned long page_ext_size; static unsigned long total_usage; +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG +/* + * To ensure correct allocation tagging for pages, page_ext should be available + * before the first page allocation. Otherwise early task stacks will be + * allocated before page_ext initialization and missing tags will be flagged. + */ +bool early_page_ext __meminitdata = true; +#else bool early_page_ext __meminitdata; +#endif static int __init setup_early_page_ext(char *str) { early_page_ext = true; diff --git a/mm/page_owner.c b/mm/page_owner.c index d17d1351ec84..5a77d792112a 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -484,7 +484,7 @@ static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret, if (!memcg_data) goto out_unlock; - if (memcg_data & MEMCG_DATA_OBJCGS) + if (memcg_data & MEMCG_DATA_OBJEXTS) ret += scnprintf(kbuf + ret, count - ret, "Slab cache page\n"); diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index cdd0aa597a81..7e42f0ca3b7b 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -32,6 +32,19 @@ struct pcpu_block_md { int nr_bits; /* total bits responsible for */ }; +struct pcpuobj_ext { +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup *cgroup; +#endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + union codetag_ref tag; +#endif +}; + +#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MEM_ALLOC_PROFILING) +#define NEED_PCPUOBJ_EXT +#endif + struct pcpu_chunk { #ifdef CONFIG_PERCPU_STATS int nr_alloc; /* # of allocations */ @@ -64,8 +77,8 @@ struct pcpu_chunk { int end_offset; /* additional area required to have the region end page aligned */ -#ifdef CONFIG_MEMCG_KMEM - struct obj_cgroup **obj_cgroups; /* vector of object cgroups */ +#ifdef NEED_PCPUOBJ_EXT + struct pcpuobj_ext *obj_exts; /* vector of object cgroups */ #endif int nr_pages; /* # of pages served by this chunk */ @@ -74,6 +87,15 @@ struct pcpu_chunk { unsigned long populated[]; /* populated bitmap */ }; +static inline bool need_pcpuobj_ext(void) +{ + if (IS_ENABLED(CONFIG_MEM_ALLOC_PROFILING)) + return true; + if (!mem_cgroup_kmem_disabled()) + return true; + return false; +} + extern spinlock_t pcpu_lock; extern struct list_head *pcpu_chunk_lists; diff --git a/mm/percpu.c b/mm/percpu.c index 4e11fc1e6def..dd7eeb370134 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1392,9 +1392,9 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, panic("%s: Failed to allocate %zu bytes\n", __func__, alloc_size); -#ifdef CONFIG_MEMCG_KMEM +#ifdef NEED_PCPUOBJ_EXT /* first chunk is free to use */ - chunk->obj_cgroups = NULL; + chunk->obj_exts = NULL; #endif pcpu_init_md_blocks(chunk); @@ -1463,12 +1463,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) if (!chunk->md_blocks) goto md_blocks_fail; -#ifdef CONFIG_MEMCG_KMEM - if (!mem_cgroup_kmem_disabled()) { - chunk->obj_cgroups = +#ifdef NEED_PCPUOBJ_EXT + if (need_pcpuobj_ext()) { + chunk->obj_exts = pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) * - sizeof(struct obj_cgroup *), gfp); - if (!chunk->obj_cgroups) + sizeof(struct pcpuobj_ext), gfp); + if (!chunk->obj_exts) goto objcg_fail; } #endif @@ -1480,7 +1480,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) return chunk; -#ifdef CONFIG_MEMCG_KMEM +#ifdef NEED_PCPUOBJ_EXT objcg_fail: pcpu_mem_free(chunk->md_blocks); #endif @@ -1498,8 +1498,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; -#ifdef CONFIG_MEMCG_KMEM - pcpu_mem_free(chunk->obj_cgroups); +#ifdef NEED_PCPUOBJ_EXT + pcpu_mem_free(chunk->obj_exts); #endif pcpu_mem_free(chunk->md_blocks); pcpu_mem_free(chunk->bound_map); @@ -1646,9 +1646,9 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, if (!objcg) return; - if (likely(chunk && chunk->obj_cgroups)) { + if (likely(chunk && chunk->obj_exts)) { obj_cgroup_get(objcg); - chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; + chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = objcg; rcu_read_lock(); mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, @@ -1663,13 +1663,13 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) { struct obj_cgroup *objcg; - if (unlikely(!chunk->obj_cgroups)) + if (unlikely(!chunk->obj_exts)) return; - objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT]; + objcg = chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup; if (!objcg) return; - chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; + chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = NULL; obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size)); @@ -1699,8 +1699,34 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) } #endif /* CONFIG_MEMCG_KMEM */ +#ifdef CONFIG_MEM_ALLOC_PROFILING +static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off, + size_t size) +{ + if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) { + alloc_tag_add(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, + current->alloc_tag, size); + } +} + +static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size) +{ + if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) + alloc_tag_sub(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, size); +} +#else +static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off, + size_t size) +{ +} + +static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size) +{ +} +#endif + /** - * pcpu_alloc - the percpu allocator + * pcpu_alloc_noprof - the percpu allocator * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) * @reserved: allocate from the reserved chunk if available @@ -1714,7 +1740,7 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, +void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, gfp_t gfp) { gfp_t pcpu_gfp; @@ -1881,6 +1907,8 @@ area_found: pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); + pcpu_alloc_tag_alloc_hook(chunk, off, size); + return ptr; fail_unlock: @@ -1909,61 +1937,7 @@ fail: return NULL; } - -/** - * __alloc_percpu_gfp - allocate dynamic percpu area - * @size: size of area to allocate in bytes - * @align: alignment of area (max PAGE_SIZE) - * @gfp: allocation flags - * - * Allocate zero-filled percpu area of @size bytes aligned at @align. If - * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can - * be called from any context but is a lot more likely to fail. If @gfp - * has __GFP_NOWARN then no warning will be triggered on invalid or failed - * allocation requests. - * - * RETURNS: - * Percpu pointer to the allocated area on success, NULL on failure. - */ -void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) -{ - return pcpu_alloc(size, align, false, gfp); -} -EXPORT_SYMBOL_GPL(__alloc_percpu_gfp); - -/** - * __alloc_percpu - allocate dynamic percpu area - * @size: size of area to allocate in bytes - * @align: alignment of area (max PAGE_SIZE) - * - * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL). - */ -void __percpu *__alloc_percpu(size_t size, size_t align) -{ - return pcpu_alloc(size, align, false, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(__alloc_percpu); - -/** - * __alloc_reserved_percpu - allocate reserved percpu area - * @size: size of area to allocate in bytes - * @align: alignment of area (max PAGE_SIZE) - * - * Allocate zero-filled percpu area of @size bytes aligned at @align - * from reserved percpu area if arch has set it up; otherwise, - * allocation is served from the same dynamic area. Might sleep. - * Might trigger writeouts. - * - * CONTEXT: - * Does GFP_KERNEL allocation. - * - * RETURNS: - * Percpu pointer to the allocated area on success, NULL on failure. - */ -void __percpu *__alloc_reserved_percpu(size_t size, size_t align) -{ - return pcpu_alloc(size, align, true, GFP_KERNEL); -} +EXPORT_SYMBOL_GPL(pcpu_alloc_noprof); /** * pcpu_balance_free - manage the amount of free chunks @@ -2302,6 +2276,8 @@ void free_percpu(void __percpu *ptr) spin_lock_irqsave(&pcpu_lock, flags); size = pcpu_free_area(chunk, off); + pcpu_alloc_tag_free_hook(chunk, off, size); + pcpu_memcg_free_hook(chunk, off, size); /* diff --git a/mm/show_mem.c b/mm/show_mem.c index 8dcfafbd283c..bdb439551eef 100644 --- a/mm/show_mem.c +++ b/mm/show_mem.c @@ -423,4 +423,30 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) #ifdef CONFIG_MEMORY_FAILURE printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); #endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + { + struct codetag_bytes tags[10]; + size_t i, nr; + + nr = alloc_tag_top_users(tags, ARRAY_SIZE(tags), false); + if (nr) { + pr_notice("Memory allocations:\n"); + for (i = 0; i < nr; i++) { + struct codetag *ct = tags[i].ct; + struct alloc_tag *tag = ct_to_alloc_tag(ct); + struct alloc_tag_counters counter = alloc_tag_read(tag); + + /* Same as alloc_tag_to_text() but w/o intermediate buffer */ + if (ct->modname) + pr_notice("%12lli %8llu %s:%u [%s] func:%s\n", + counter.bytes, counter.calls, ct->filename, + ct->lineno, ct->modname, ct->function); + else + pr_notice("%12lli %8llu %s:%u func:%s\n", + counter.bytes, counter.calls, ct->filename, + ct->lineno, ct->function); + } + } + } +#endif } diff --git a/mm/slab.h b/mm/slab.h index d2bc9b191222..65db525e93af 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -87,8 +87,8 @@ struct slab { unsigned int __unused; atomic_t __page_refcount; -#ifdef CONFIG_MEMCG - unsigned long memcg_data; +#ifdef CONFIG_SLAB_OBJ_EXT + unsigned long obj_exts; #endif }; @@ -97,8 +97,8 @@ struct slab { SLAB_MATCH(flags, __page_flags); SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ SLAB_MATCH(_refcount, __page_refcount); -#ifdef CONFIG_MEMCG -SLAB_MATCH(memcg_data, memcg_data); +#ifdef CONFIG_SLAB_OBJ_EXT +SLAB_MATCH(memcg_data, obj_exts); #endif #undef SLAB_MATCH static_assert(sizeof(struct slab) <= sizeof(struct page)); @@ -536,42 +536,41 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla return false; } -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_SLAB_OBJ_EXT + /* - * slab_objcgs - get the object cgroups vector associated with a slab + * slab_obj_exts - get the pointer to the slab object extension vector + * associated with a slab. * @slab: a pointer to the slab struct * - * Returns a pointer to the object cgroups vector associated with the slab, + * Returns a pointer to the object extension vector associated with the slab, * or NULL if no such vector has been associated yet. */ -static inline struct obj_cgroup **slab_objcgs(struct slab *slab) +static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) { - unsigned long memcg_data = READ_ONCE(slab->memcg_data); + unsigned long obj_exts = READ_ONCE(slab->obj_exts); - VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), +#ifdef CONFIG_MEMCG + VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS), slab_page(slab)); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab)); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); +#endif + return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK); } -int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, - gfp_t gfp, bool new_slab); -void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, - enum node_stat_item idx, int nr); -#else /* CONFIG_MEMCG_KMEM */ -static inline struct obj_cgroup **slab_objcgs(struct slab *slab) +#else /* CONFIG_SLAB_OBJ_EXT */ + +static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) { return NULL; } -static inline int memcg_alloc_slab_cgroups(struct slab *slab, - struct kmem_cache *s, gfp_t gfp, - bool new_slab) -{ - return 0; -} -#endif /* CONFIG_MEMCG_KMEM */ +#endif /* CONFIG_SLAB_OBJ_EXT */ + +#ifdef CONFIG_MEMCG_KMEM +void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, + enum node_stat_item idx, int nr); +#endif size_t __ksize(const void *objp); diff --git a/mm/slab_common.c b/mm/slab_common.c index f5234672f03c..3179a6aeffc5 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1189,7 +1189,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags) return (void *)p; } - ret = kmalloc_track_caller(new_size, flags); + ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_); if (ret && p) { /* Disable KASAN checks as the object's redzone is accessed. */ kasan_disable_current(); @@ -1213,7 +1213,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags) * * Return: pointer to the allocated memory or %NULL in case of error */ -void *krealloc(const void *p, size_t new_size, gfp_t flags) +void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags) { void *ret; @@ -1228,7 +1228,7 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) return ret; } -EXPORT_SYMBOL(krealloc); +EXPORT_SYMBOL(krealloc_noprof); /** * kfree_sensitive - Clear sensitive information in memory before freeing diff --git a/mm/slub.c b/mm/slub.c index 1bb2a93cf7b6..ffa64a0a55fc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1871,13 +1871,222 @@ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; } -#ifdef CONFIG_MEMCG_KMEM -static inline void memcg_free_slab_cgroups(struct slab *slab) +#ifdef CONFIG_SLAB_OBJ_EXT + +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + +static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) +{ + struct slabobj_ext *slab_exts; + struct slab *obj_exts_slab; + + obj_exts_slab = virt_to_slab(obj_exts); + slab_exts = slab_obj_exts(obj_exts_slab); + if (slab_exts) { + unsigned int offs = obj_to_index(obj_exts_slab->slab_cache, + obj_exts_slab, obj_exts); + /* codetag should be NULL */ + WARN_ON(slab_exts[offs].ref.ct); + set_codetag_empty(&slab_exts[offs].ref); + } +} + +static inline void mark_failed_objexts_alloc(struct slab *slab) +{ + slab->obj_exts = OBJEXTS_ALLOC_FAIL; +} + +static inline void handle_failed_objexts_alloc(unsigned long obj_exts, + struct slabobj_ext *vec, unsigned int objects) +{ + /* + * If vector previously failed to allocate then we have live + * objects with no tag reference. Mark all references in this + * vector as empty to avoid warnings later on. + */ + if (obj_exts & OBJEXTS_ALLOC_FAIL) { + unsigned int i; + + for (i = 0; i < objects; i++) + set_codetag_empty(&vec[i].ref); + } +} + +#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + +static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {} +static inline void mark_failed_objexts_alloc(struct slab *slab) {} +static inline void handle_failed_objexts_alloc(unsigned long obj_exts, + struct slabobj_ext *vec, unsigned int objects) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + +/* + * The allocated objcg pointers array is not accounted directly. + * Moreover, it should not come from DMA buffer and is not readily + * reclaimable. So those GFP bits should be masked off. + */ +#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ + __GFP_ACCOUNT | __GFP_NOFAIL) + +static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, + gfp_t gfp, bool new_slab) +{ + unsigned int objects = objs_per_slab(s, slab); + unsigned long new_exts; + unsigned long old_exts; + struct slabobj_ext *vec; + + gfp &= ~OBJCGS_CLEAR_MASK; + /* Prevent recursive extension vector allocation */ + gfp |= __GFP_NO_OBJ_EXT; + vec = kcalloc_node(objects, sizeof(struct slabobj_ext), gfp, + slab_nid(slab)); + if (!vec) { + /* Mark vectors which failed to allocate */ + if (new_slab) + mark_failed_objexts_alloc(slab); + + return -ENOMEM; + } + + new_exts = (unsigned long)vec; +#ifdef CONFIG_MEMCG + new_exts |= MEMCG_DATA_OBJEXTS; +#endif + old_exts = slab->obj_exts; + handle_failed_objexts_alloc(old_exts, vec, objects); + if (new_slab) { + /* + * If the slab is brand new and nobody can yet access its + * obj_exts, no synchronization is required and obj_exts can + * be simply assigned. + */ + slab->obj_exts = new_exts; + } else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { + /* + * If the slab is already in use, somebody can allocate and + * assign slabobj_exts in parallel. In this case the existing + * objcg vector should be reused. + */ + mark_objexts_empty(vec); + kfree(vec); + return 0; + } + + kmemleak_not_leak(vec); + return 0; +} + +static inline void free_slab_obj_exts(struct slab *slab) +{ + struct slabobj_ext *obj_exts; + + obj_exts = slab_obj_exts(slab); + if (!obj_exts) + return; + + /* + * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its + * corresponding extension will be NULL. alloc_tag_sub() will throw a + * warning if slab has extensions but the extension of an object is + * NULL, therefore replace NULL with CODETAG_EMPTY to indicate that + * the extension for obj_exts is expected to be NULL. + */ + mark_objexts_empty(obj_exts); + kfree(obj_exts); + slab->obj_exts = 0; +} + +static inline bool need_slab_obj_ext(void) +{ + if (mem_alloc_profiling_enabled()) + return true; + + /* + * CONFIG_MEMCG_KMEM creates vector of obj_cgroup objects conditionally + * inside memcg_slab_post_alloc_hook. No other users for now. + */ + return false; +} + +static inline struct slabobj_ext * +prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p) +{ + struct slab *slab; + + if (!p) + return NULL; + + if (s->flags & SLAB_NO_OBJ_EXT) + return NULL; + + if (flags & __GFP_NO_OBJ_EXT) + return NULL; + + slab = virt_to_slab(p); + if (!slab_obj_exts(slab) && + WARN(alloc_slab_obj_exts(slab, s, flags, false), + "%s, %s: Failed to create slab extension vector!\n", + __func__, s->name)) + return NULL; + + return slab_obj_exts(slab) + obj_to_index(s, slab, p); +} + +static inline void +alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, + int objects) +{ + struct slabobj_ext *obj_exts; + int i; + + if (!mem_alloc_profiling_enabled()) + return; + + obj_exts = slab_obj_exts(slab); + if (!obj_exts) + return; + + for (i = 0; i < objects; i++) { + unsigned int off = obj_to_index(s, slab, p[i]); + + alloc_tag_sub(&obj_exts[off].ref, s->size); + } +} + +#else /* CONFIG_SLAB_OBJ_EXT */ + +static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, + gfp_t gfp, bool new_slab) { - kfree(slab_objcgs(slab)); - slab->memcg_data = 0; + return 0; +} + +static inline void free_slab_obj_exts(struct slab *slab) +{ +} + +static inline bool need_slab_obj_ext(void) +{ + return false; } +static inline struct slabobj_ext * +prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p) +{ + return NULL; +} + +static inline void +alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, + int objects) +{ +} + +#endif /* CONFIG_SLAB_OBJ_EXT */ + +#ifdef CONFIG_MEMCG_KMEM static inline size_t obj_full_size(struct kmem_cache *s) { /* @@ -1956,15 +2165,15 @@ static void __memcg_slab_post_alloc_hook(struct kmem_cache *s, if (likely(p[i])) { slab = virt_to_slab(p[i]); - if (!slab_objcgs(slab) && - memcg_alloc_slab_cgroups(slab, s, flags, false)) { + if (!slab_obj_exts(slab) && + alloc_slab_obj_exts(slab, s, flags, false)) { obj_cgroup_uncharge(objcg, obj_full_size(s)); continue; } off = obj_to_index(s, slab, p[i]); obj_cgroup_get(objcg); - slab_objcgs(slab)[off] = objcg; + slab_obj_exts(slab)[off].objcg = objcg; mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s)); } else { @@ -1985,18 +2194,18 @@ void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects, - struct obj_cgroup **objcgs) + struct slabobj_ext *obj_exts) { for (int i = 0; i < objects; i++) { struct obj_cgroup *objcg; unsigned int off; off = obj_to_index(s, slab, p[i]); - objcg = objcgs[off]; + objcg = obj_exts[off].objcg; if (!objcg) continue; - objcgs[off] = NULL; + obj_exts[off].objcg = NULL; obj_cgroup_uncharge(objcg, obj_full_size(s)); mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), -obj_full_size(s)); @@ -2008,16 +2217,16 @@ static __fastpath_inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects) { - struct obj_cgroup **objcgs; + struct slabobj_ext *obj_exts; if (!memcg_kmem_online()) return; - objcgs = slab_objcgs(slab); - if (likely(!objcgs)) + obj_exts = slab_obj_exts(slab); + if (likely(!obj_exts)) return; - __memcg_slab_free_hook(s, slab, p, objects, objcgs); + __memcg_slab_free_hook(s, slab, p, objects, obj_exts); } static inline @@ -2028,10 +2237,6 @@ void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects, obj_cgroup_uncharge(objcg, objects * obj_full_size(s)); } #else /* CONFIG_MEMCG_KMEM */ -static inline void memcg_free_slab_cgroups(struct slab *slab) -{ -} - static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru, struct obj_cgroup **objcgp, @@ -2106,9 +2311,9 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init) return !kasan_slab_free(s, x, init); } -static inline bool slab_free_freelist_hook(struct kmem_cache *s, - void **head, void **tail, - int *cnt) +static __fastpath_inline +bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail, + int *cnt) { void *object; @@ -2298,7 +2503,7 @@ static __always_inline void account_slab(struct slab *slab, int order, struct kmem_cache *s, gfp_t gfp) { if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT)) - memcg_alloc_slab_cgroups(slab, s, gfp, true); + alloc_slab_obj_exts(slab, s, gfp, true); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), PAGE_SIZE << order); @@ -2307,8 +2512,8 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online()) - memcg_free_slab_cgroups(slab); + if (memcg_kmem_online() || need_slab_obj_ext()) + free_slab_obj_exts(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); @@ -3760,6 +3965,7 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, unsigned int orig_size) { unsigned int zero_size = s->object_size; + struct slabobj_ext *obj_exts; bool kasan_init = init; size_t i; gfp_t init_flags = flags & gfp_allowed_mask; @@ -3802,6 +4008,18 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, init_flags); kmsan_slab_alloc(s, p[i], init_flags); + if (need_slab_obj_ext()) { + obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]); +#ifdef CONFIG_MEM_ALLOC_PROFILING + /* + * Currently obj_exts is used only for allocation profiling. + * If other users appear then mem_alloc_profiling_enabled() + * check should be added before alloc_tag_add(). + */ + if (likely(obj_exts)) + alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size); +#endif + } } memcg_slab_post_alloc_hook(s, objcg, flags, size, p); @@ -3847,7 +4065,7 @@ out: return object; } -void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) +void *kmem_cache_alloc_noprof(struct kmem_cache *s, gfp_t gfpflags) { void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_, s->object_size); @@ -3856,9 +4074,9 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) return ret; } -EXPORT_SYMBOL(kmem_cache_alloc); +EXPORT_SYMBOL(kmem_cache_alloc_noprof); -void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, +void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) { void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_, @@ -3868,10 +4086,10 @@ void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, return ret; } -EXPORT_SYMBOL(kmem_cache_alloc_lru); +EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof); /** - * kmem_cache_alloc_node - Allocate an object on the specified node + * kmem_cache_alloc_node_noprof - Allocate an object on the specified node * @s: The cache to allocate from. * @gfpflags: See kmalloc(). * @node: node number of the target node. @@ -3883,7 +4101,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru); * * Return: pointer to the new object or %NULL in case of error */ -void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) +void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); @@ -3891,7 +4109,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) return ret; } -EXPORT_SYMBOL(kmem_cache_alloc_node); +EXPORT_SYMBOL(kmem_cache_alloc_node_noprof); /* * To avoid unnecessary overhead, we pass through large allocation requests @@ -3908,7 +4126,7 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) flags = kmalloc_fix_flags(flags); flags |= __GFP_COMP; - folio = (struct folio *)alloc_pages_node(node, flags, order); + folio = (struct folio *)alloc_pages_node_noprof(node, flags, order); if (folio) { ptr = folio_address(folio); lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, @@ -3923,7 +4141,7 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) return ptr; } -void *kmalloc_large(size_t size, gfp_t flags) +void *kmalloc_large_noprof(size_t size, gfp_t flags) { void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); @@ -3931,9 +4149,9 @@ void *kmalloc_large(size_t size, gfp_t flags) flags, NUMA_NO_NODE); return ret; } -EXPORT_SYMBOL(kmalloc_large); +EXPORT_SYMBOL(kmalloc_large_noprof); -void *kmalloc_large_node(size_t size, gfp_t flags, int node) +void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) { void *ret = __kmalloc_large_node(size, flags, node); @@ -3941,7 +4159,7 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) flags, node); return ret; } -EXPORT_SYMBOL(kmalloc_large_node); +EXPORT_SYMBOL(kmalloc_large_node_noprof); static __always_inline void *__do_kmalloc_node(size_t size, gfp_t flags, int node, @@ -3968,26 +4186,26 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, return ret; } -void *__kmalloc_node(size_t size, gfp_t flags, int node) +void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) { return __do_kmalloc_node(size, flags, node, _RET_IP_); } -EXPORT_SYMBOL(__kmalloc_node); +EXPORT_SYMBOL(__kmalloc_node_noprof); -void *__kmalloc(size_t size, gfp_t flags) +void *__kmalloc_noprof(size_t size, gfp_t flags) { return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); } -EXPORT_SYMBOL(__kmalloc); +EXPORT_SYMBOL(__kmalloc_noprof); -void *__kmalloc_node_track_caller(size_t size, gfp_t flags, - int node, unsigned long caller) +void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, + int node, unsigned long caller) { return __do_kmalloc_node(size, flags, node, caller); } -EXPORT_SYMBOL(__kmalloc_node_track_caller); +EXPORT_SYMBOL(kmalloc_node_track_caller_noprof); -void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) +void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_, size); @@ -3997,9 +4215,9 @@ void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; } -EXPORT_SYMBOL(kmalloc_trace); +EXPORT_SYMBOL(kmalloc_trace_noprof); -void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, +void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size); @@ -4009,7 +4227,7 @@ void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; } -EXPORT_SYMBOL(kmalloc_node_trace); +EXPORT_SYMBOL(kmalloc_node_trace_noprof); static noinline void free_to_partial_list( struct kmem_cache *s, struct slab *slab, @@ -4276,6 +4494,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, unsigned long addr) { memcg_slab_free_hook(s, slab, &object, 1); + alloc_tagging_slab_free_hook(s, slab, &object, 1); if (likely(slab_free_hook(s, object, slab_want_init_on_free(s)))) do_slab_free(s, slab, object, object, 1, addr); @@ -4286,6 +4505,7 @@ void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head, void *tail, void **p, int cnt, unsigned long addr) { memcg_slab_free_hook(s, slab, p, cnt); + alloc_tagging_slab_free_hook(s, slab, p, cnt); /* * With KASAN enabled slab_free_freelist_hook modifies the freelist * to remove objects, whose reuse must be delayed. @@ -4612,8 +4832,8 @@ error: #endif /* CONFIG_SLUB_TINY */ /* Note that interrupts must be enabled when calling this function. */ -int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) { int i; struct obj_cgroup *objcg = NULL; @@ -4641,7 +4861,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, return i; } -EXPORT_SYMBOL(kmem_cache_alloc_bulk); +EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof); /* @@ -5630,7 +5850,8 @@ void __init kmem_cache_init(void) node_set(node, slab_nodes); create_boot_cache(kmem_cache_node, "kmem_cache_node", - sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0); + sizeof(struct kmem_cache_node), + SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0); hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); @@ -5640,7 +5861,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, node) + nr_node_ids * sizeof(struct kmem_cache_node *), - SLAB_HWCACHE_ALIGN, 0, 0); + SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0); kmem_cache = bootstrap(&boot_kmem_cache); kmem_cache_node = bootstrap(&boot_kmem_cache_node); diff --git a/mm/util.c b/mm/util.c index 669397235787..157b5edcba75 100644 --- a/mm/util.c +++ b/mm/util.c @@ -115,7 +115,7 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp) EXPORT_SYMBOL(kstrndup); /** - * kmemdup - duplicate region of memory + * kmemdup_noprof - duplicate region of memory * * @src: memory region to duplicate * @len: memory region length @@ -124,16 +124,16 @@ EXPORT_SYMBOL(kstrndup); * Return: newly allocated copy of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ -void *kmemdup(const void *src, size_t len, gfp_t gfp) +void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) { void *p; - p = kmalloc_track_caller(len, gfp); + p = kmalloc_node_track_caller_noprof(len, gfp, NUMA_NO_NODE, _RET_IP_); if (p) memcpy(p, src, len); return p; } -EXPORT_SYMBOL(kmemdup); +EXPORT_SYMBOL(kmemdup_noprof); /** * kmemdup_array - duplicate a given array. @@ -594,7 +594,7 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, EXPORT_SYMBOL(vm_mmap); /** - * kvmalloc_node - attempt to allocate physically contiguous memory, but upon + * kvmalloc_node_noprof - attempt to allocate physically contiguous memory, but upon * failure, fall back to non-contiguous (vmalloc) allocation. * @size: size of the request. * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. @@ -609,7 +609,7 @@ EXPORT_SYMBOL(vm_mmap); * * Return: pointer to the allocated memory of %NULL in case of failure */ -void *kvmalloc_node(size_t size, gfp_t flags, int node) +void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) { gfp_t kmalloc_flags = flags; void *ret; @@ -631,7 +631,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) kmalloc_flags &= ~__GFP_NOFAIL; } - ret = kmalloc_node(size, kmalloc_flags, node); + ret = kmalloc_node_noprof(size, kmalloc_flags, node); /* * It doesn't really make sense to fallback to vmalloc for sub page @@ -656,11 +656,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) * about the resulting pointer, and cannot play * protection games. */ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END, flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, node, __builtin_return_address(0)); } -EXPORT_SYMBOL(kvmalloc_node); +EXPORT_SYMBOL(kvmalloc_node_noprof); /** * kvfree() - Free memory. @@ -699,7 +699,7 @@ void kvfree_sensitive(const void *addr, size_t len) } EXPORT_SYMBOL(kvfree_sensitive); -void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) +void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags) { void *newp; @@ -712,15 +712,15 @@ void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) kvfree(p); return newp; } -EXPORT_SYMBOL(kvrealloc); +EXPORT_SYMBOL(kvrealloc_noprof); /** - * __vmalloc_array - allocate memory for a virtually contiguous array. + * __vmalloc_array_noprof - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -void *__vmalloc_array(size_t n, size_t size, gfp_t flags) +void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; @@ -728,18 +728,18 @@ void *__vmalloc_array(size_t n, size_t size, gfp_t flags) return NULL; return __vmalloc(bytes, flags); } -EXPORT_SYMBOL(__vmalloc_array); +EXPORT_SYMBOL(__vmalloc_array_noprof); /** - * vmalloc_array - allocate memory for a virtually contiguous array. + * vmalloc_array_noprof - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ -void *vmalloc_array(size_t n, size_t size) +void *vmalloc_array_noprof(size_t n, size_t size) { return __vmalloc_array(n, size, GFP_KERNEL); } -EXPORT_SYMBOL(vmalloc_array); +EXPORT_SYMBOL(vmalloc_array_noprof); /** * __vcalloc - allocate and zero memory for a virtually contiguous array. @@ -747,22 +747,22 @@ EXPORT_SYMBOL(vmalloc_array); * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -void *__vcalloc(size_t n, size_t size, gfp_t flags) +void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) { return __vmalloc_array(n, size, flags | __GFP_ZERO); } -EXPORT_SYMBOL(__vcalloc); +EXPORT_SYMBOL(__vcalloc_noprof); /** - * vcalloc - allocate and zero memory for a virtually contiguous array. + * vcalloc_noprof - allocate and zero memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ -void *vcalloc(size_t n, size_t size) +void *vcalloc_noprof(size_t n, size_t size) { return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO); } -EXPORT_SYMBOL(vcalloc); +EXPORT_SYMBOL(vcalloc_noprof); struct anon_vma *folio_anon_vma(struct folio *folio) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 22aa63f4ef63..b2f2248d85a9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3507,12 +3507,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * but mempolicy wants to alloc memory by interleaving. */ if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE) - nr = alloc_pages_bulk_array_mempolicy(bulk_gfp, + nr = alloc_pages_bulk_array_mempolicy_noprof(bulk_gfp, nr_pages_request, pages + nr_allocated); else - nr = alloc_pages_bulk_array_node(bulk_gfp, nid, + nr = alloc_pages_bulk_array_node_noprof(bulk_gfp, nid, nr_pages_request, pages + nr_allocated); @@ -3542,9 +3542,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid, break; if (nid == NUMA_NO_NODE) - page = alloc_pages(alloc_gfp, order); + page = alloc_pages_noprof(alloc_gfp, order); else - page = alloc_pages_node(nid, alloc_gfp, order); + page = alloc_pages_node_noprof(nid, alloc_gfp, order); if (unlikely(!page)) { if (!nofail) break; @@ -3601,10 +3601,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - area->pages = __vmalloc_node(array_size, 1, nested_gfp, node, + area->pages = __vmalloc_node_noprof(array_size, 1, nested_gfp, node, area->caller); } else { - area->pages = kmalloc_node(array_size, nested_gfp, node); + area->pages = kmalloc_node_noprof(array_size, nested_gfp, node); } if (!area->pages) { @@ -3687,7 +3687,7 @@ fail: } /** - * __vmalloc_node_range - allocate virtually contiguous memory + * __vmalloc_node_range_noprof - allocate virtually contiguous memory * @size: allocation size * @align: desired alignment * @start: vm area range start @@ -3714,7 +3714,7 @@ fail: * * Return: the address of the area or %NULL on failure */ -void *__vmalloc_node_range(unsigned long size, unsigned long align, +void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) @@ -3843,7 +3843,7 @@ fail: } /** - * __vmalloc_node - allocate virtually contiguous memory + * __vmalloc_node_noprof - allocate virtually contiguous memory * @size: allocation size * @align: desired alignment * @gfp_mask: flags for the page level allocator @@ -3861,10 +3861,10 @@ fail: * * Return: pointer to the allocated memory or %NULL on error */ -void *__vmalloc_node(unsigned long size, unsigned long align, +void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) { - return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END, gfp_mask, PAGE_KERNEL, 0, node, caller); } /* @@ -3873,15 +3873,15 @@ void *__vmalloc_node(unsigned long size, unsigned long align, * than that. */ #ifdef CONFIG_TEST_VMALLOC_MODULE -EXPORT_SYMBOL_GPL(__vmalloc_node); +EXPORT_SYMBOL_GPL(__vmalloc_node_noprof); #endif -void *__vmalloc(unsigned long size, gfp_t gfp_mask) +void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) { - return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, gfp_mask, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(__vmalloc); +EXPORT_SYMBOL(__vmalloc_noprof); /** * vmalloc - allocate virtually contiguous memory @@ -3895,12 +3895,12 @@ EXPORT_SYMBOL(__vmalloc); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc(unsigned long size) +void *vmalloc_noprof(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc); +EXPORT_SYMBOL(vmalloc_noprof); /** * vmalloc_huge - allocate virtually contiguous memory, allow huge pages @@ -3914,16 +3914,16 @@ EXPORT_SYMBOL(vmalloc); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) +void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) { - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END, gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL_GPL(vmalloc_huge); +EXPORT_SYMBOL_GPL(vmalloc_huge_noprof); /** - * vzalloc - allocate virtually contiguous memory with zero fill + * vzalloc_noprof - allocate virtually contiguous memory with zero fill * @size: allocation size * * Allocate enough pages to cover @size from the page level @@ -3935,12 +3935,12 @@ EXPORT_SYMBOL_GPL(vmalloc_huge); * * Return: pointer to the allocated memory or %NULL on error */ -void *vzalloc(unsigned long size) +void *vzalloc_noprof(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vzalloc); +EXPORT_SYMBOL(vzalloc_noprof); /** * vmalloc_user - allocate zeroed virtually contiguous memory for userspace @@ -3951,17 +3951,17 @@ EXPORT_SYMBOL(vzalloc); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_user(unsigned long size) +void *vmalloc_user_noprof(unsigned long size) { - return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, SHMLBA, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, VM_USERMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_user); +EXPORT_SYMBOL(vmalloc_user_noprof); /** - * vmalloc_node - allocate memory on a specific node + * vmalloc_node_noprof - allocate memory on a specific node * @size: allocation size * @node: numa node * @@ -3973,15 +3973,15 @@ EXPORT_SYMBOL(vmalloc_user); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_node(unsigned long size, int node) +void *vmalloc_node_noprof(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL, node, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL, node, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_node); +EXPORT_SYMBOL(vmalloc_node_noprof); /** - * vzalloc_node - allocate memory on a specific node with zero fill + * vzalloc_node_noprof - allocate memory on a specific node with zero fill * @size: allocation size * @node: numa node * @@ -3991,12 +3991,12 @@ EXPORT_SYMBOL(vmalloc_node); * * Return: pointer to the allocated memory or %NULL on error */ -void *vzalloc_node(unsigned long size, int node) +void *vzalloc_node_noprof(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, node, __builtin_return_address(0)); } -EXPORT_SYMBOL(vzalloc_node); +EXPORT_SYMBOL(vzalloc_node_noprof); #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) @@ -4011,7 +4011,7 @@ EXPORT_SYMBOL(vzalloc_node); #endif /** - * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) + * vmalloc_32_noprof - allocate virtually contiguous memory (32bit addressable) * @size: allocation size * * Allocate enough 32bit PA addressable pages to cover @size from the @@ -4019,15 +4019,15 @@ EXPORT_SYMBOL(vzalloc_node); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_32(unsigned long size) +void *vmalloc_32_noprof(unsigned long size) { - return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, GFP_VMALLOC32, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_32); +EXPORT_SYMBOL(vmalloc_32_noprof); /** - * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory + * vmalloc_32_user_noprof - allocate zeroed virtually contiguous 32bit memory * @size: allocation size * * The resulting memory area is 32bit addressable and zeroed so it can be @@ -4035,14 +4035,14 @@ EXPORT_SYMBOL(vmalloc_32); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_32_user(unsigned long size) +void *vmalloc_32_user_noprof(unsigned long size) { - return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, SHMLBA, VMALLOC_START, VMALLOC_END, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, VM_USERMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_32_user); +EXPORT_SYMBOL(vmalloc_32_user_noprof); /* * Atomically zero bytes in the iterator. diff --git a/rust/helpers.c b/rust/helpers.c index 70e59efd92bc..858d802abd11 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -28,6 +28,7 @@ #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/sched/signal.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/wait.h> #include <linux/workqueue.h> @@ -157,6 +158,13 @@ void rust_helper_init_work_with_key(struct work_struct *work, work_func_t func, } EXPORT_SYMBOL_GPL(rust_helper_init_work_with_key); +void * __must_check __realloc_size(2) +rust_helper_krealloc(const void *objp, size_t new_size, gfp_t flags) +{ + return krealloc(objp, new_size, flags); +} +EXPORT_SYMBOL_GPL(rust_helper_krealloc); + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 653b92f6d4c8..47978efe4797 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -204,6 +204,11 @@ static int symbol_in_range(const struct sym_entry *s, return 0; } +static bool string_starts_with(const char *s, const char *prefix) +{ + return strncmp(s, prefix, strlen(prefix)) == 0; +} + static int symbol_valid(const struct sym_entry *s) { const char *name = sym_name(s); @@ -211,6 +216,14 @@ static int symbol_valid(const struct sym_entry *s) /* if --all-symbols is not specified, then symbols outside the text * and inittext sections are discarded */ if (!all_symbols) { + /* + * Symbols starting with __start and __stop are used to denote + * section boundaries, and should always be included: + */ + if (string_starts_with(name, "__start_") || + string_starts_with(name, "__stop_")) + return 1; + if (symbol_in_range(s, text_ranges, ARRAY_SIZE(text_ranges)) == 0) return 0; diff --git a/scripts/module.lds.S b/scripts/module.lds.S index bf5bcf2836d8..45c67a0994f3 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -9,6 +9,8 @@ #define DISCARD_EH_FRAME *(.eh_frame) #endif +#include <asm-generic/codetag.lds.h> + SECTIONS { /DISCARD/ : { *(.discard) @@ -47,12 +49,17 @@ SECTIONS { .data : { *(.data .data.[0-9a-zA-Z_]*) *(.data..L*) + CODETAG_SECTIONS() } .rodata : { *(.rodata .rodata.[0-9a-zA-Z_]*) *(.rodata..L*) } +#else + .data : { + CODETAG_SECTIONS() + } #endif } diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index d3fa6e136744..990b5bd717a1 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -13,6 +13,7 @@ #include <sound/soc.h> #include <linux/pm_runtime.h> #include <linux/spi/spi.h> +#include <linux/vmalloc.h> #include "hda_local.h" #include "hda_auto_parser.h" #include "hda_jack.h" |