diff options
51 files changed, 566 insertions, 292 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 176298f2f4de..ad9ba3ec90a5 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1433,6 +1433,24 @@ PAGE_SIZE multiple when read back. workingset_nodereclaim Number of times a shadow node has been reclaimed + pgscan (npn) + Amount of scanned pages (in an inactive LRU list) + + pgsteal (npn) + Amount of reclaimed pages + + pgscan_kswapd (npn) + Amount of scanned pages by kswapd (in an inactive LRU list) + + pgscan_direct (npn) + Amount of scanned pages directly (in an inactive LRU list) + + pgsteal_kswapd (npn) + Amount of reclaimed pages by kswapd + + pgsteal_direct (npn) + Amount of reclaimed pages directly + pgfault (npn) Total number of page faults incurred @@ -1442,12 +1460,6 @@ PAGE_SIZE multiple when read back. pgrefill (npn) Amount of scanned pages (in an active LRU list) - pgscan (npn) - Amount of scanned pages (in an inactive LRU list) - - pgsteal (npn) - Amount of reclaimed pages - pgactivate (npn) Amount of pages moved to the active LRU list diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst index 1c935f41cd3a..5483fd39ef29 100644 --- a/Documentation/dev-tools/kmemleak.rst +++ b/Documentation/dev-tools/kmemleak.rst @@ -174,7 +174,6 @@ mapping: - ``kmemleak_alloc_phys`` - ``kmemleak_free_part_phys`` -- ``kmemleak_not_leak_phys`` - ``kmemleak_ignore_phys`` Dealing with false positives/negatives diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index ec20c1004abf..ef427a6bdd1a 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -155,6 +155,10 @@ retry: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index dad27e4d69ff..5ca59a482632 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -146,6 +146,10 @@ retry: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + /* * Fault retry nuances, mmap_lock already relinquished by core mm */ diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a062e07516dd..46cccd6bf705 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -322,6 +322,10 @@ retry: return 0; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + if (!(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c5e11768e5c1..de166cdeb89a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -608,6 +608,10 @@ retry: return 0; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + if (fault & VM_FAULT_RETRY) { mm_flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index 7215a46b6b8e..e15f736cca4b 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -285,6 +285,10 @@ good_area: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index 4fac4b9eb316..f73c7cbfe326 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -96,6 +96,10 @@ good_area: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + /* The most common case -- we are done. */ if (likely(!(fault & VM_FAULT_ERROR))) { if (fault & VM_FAULT_RETRY) { diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 07379d1a227f..ef78c2d66cdd 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -139,6 +139,10 @@ retry: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { /* * We ran out of memory, or some other thing happened diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 71aa9f6315dc..4d2837eb3e2a 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -141,6 +141,10 @@ good_area: if (fault_signal_pending(fault, regs)) return 0; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index a9626e6a68af..5c40c3ebe52f 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -222,6 +222,10 @@ good_area: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index b08bc556d30d..a27045f5a556 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -162,6 +162,10 @@ good_area: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index a32f14cd72f2..edaca0a6c1c1 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -139,6 +139,10 @@ good_area: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 53b760af3bb7..b4762d66e9ef 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -165,6 +165,10 @@ good_area: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 84bc437be5cd..9ad80d4d3389 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -311,6 +311,10 @@ good_area: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { /* * We hit a shared mapping outside of the file, or some diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index c1cb21a00884..7c507fb48182 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -65,6 +65,11 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, ret = 0; *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL); + + /* The fault is fully completed (including releasing mmap lock) */ + if (*flt & VM_FAULT_COMPLETED) + return 0; + if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d53fed4eccbd..014005428687 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -511,6 +511,10 @@ retry: if (fault_signal_pending(fault, regs)) return user_mode(regs) ? 0 : SIGBUS; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + goto out; + /* * Handle the retry right now, the mmap_lock has been released in that * case. @@ -525,6 +529,7 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) return mm_fault_error(regs, address, fault); +out: /* * Major/minor page fault accounting. */ diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 40694f0cab9e..f2fbd1400b7c 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -326,6 +326,10 @@ good_area: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e173b6187ad5..973dcd05c293 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -433,6 +433,17 @@ retry: goto out_up; goto out; } + + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) { + if (gmap) { + mmap_read_lock(mm); + goto out_gmap; + } + fault = 0; + goto out; + } + if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; @@ -452,6 +463,7 @@ retry: mmap_read_lock(mm); goto retry; } +out_gmap: if (IS_ENABLED(CONFIG_PGSTE) && gmap) { address = __gmap_link(gmap, current->thread.gmap_addr, address); diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index e175667b1363..acd2f5e50bfc 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -485,6 +485,10 @@ good_area: if (mm_fault_error(regs, error_code, address, fault)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index ad569d9bd124..91259f291c54 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -190,6 +190,10 @@ good_area: if (fault_signal_pending(fault, regs)) return; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 253e07043298..4acc12eafbf5 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -427,6 +427,10 @@ good_area: if (fault_signal_pending(fault, regs)) goto exit_exception; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + goto lock_released; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -449,6 +453,7 @@ good_area: } mmap_read_unlock(mm); +lock_released: mm_rss = get_mm_rss(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE)); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index d1d5d0be0308..d3ce21c4ca32 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -76,6 +76,10 @@ good_area: if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { goto out_of_memory; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fad8faa29d04..fe10c6d76bac 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1408,6 +1408,10 @@ good_area: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + /* * If we need to retry the mmap_lock has already been released, * and if there is a fatal signal pending there is no guarantee diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index a0d023cb4292..509408da0da1 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -19,44 +19,6 @@ #include <asm/tlbflush.h> #include <asm/elf.h> -#if 0 /* This is just for testing */ -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - unsigned long start = address; - int length = 1; - int nr; - struct page *page; - struct vm_area_struct *vma; - - vma = find_vma(mm, addr); - if (!vma || !is_vm_hugetlb_page(vma)) - return ERR_PTR(-EINVAL); - - pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); - - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); - - page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; - - WARN_ON(!PageHead(page)); - - return page; -} - -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -int pud_huge(pud_t pud) -{ - return 0; -} - -#else - /* * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry. @@ -72,7 +34,6 @@ int pud_huge(pud_t pud) { return !!(pud_val(pud) & _PAGE_PSE); } -#endif #ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 16f0a5ff5799..8c781b05c0bd 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -172,6 +172,10 @@ good_area: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index a8f5b6532165..2c677e84c3f5 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -529,7 +529,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); if (!nomap) - kmemleak_alloc_phys(base, size, 0, 0); + kmemleak_alloc_phys(base, size, 0); } else pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n", diff --git a/include/linux/damon.h b/include/linux/damon.h index 7c62da31ce4b..2765c7d99beb 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -397,7 +397,6 @@ struct damon_callback { * detail. * * @kdamond: Kernel thread who does the monitoring. - * @kdamond_stop: Notifies whether kdamond should stop. * @kdamond_lock: Mutex for the synchronizations with @kdamond. * * For each monitoring context, one kernel thread for the monitoring is @@ -406,14 +405,14 @@ struct damon_callback { * Once started, the monitoring thread runs until explicitly required to be * terminated or every monitoring target is invalid. The validity of the * targets is checked via the &damon_operations.target_valid of @ops. The - * termination can also be explicitly requested by writing non-zero to - * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. - * Therefore, users can know whether the monitoring is ongoing or terminated by - * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from - * outside of the monitoring thread must be protected by @kdamond_lock. - * - * Note that the monitoring thread protects only @kdamond and @kdamond_stop via - * @kdamond_lock. Accesses to other fields must be protected by themselves. + * termination can also be explicitly requested by calling damon_stop(). + * The thread sets @kdamond to NULL when it terminates. Therefore, users can + * know whether the monitoring is ongoing or terminated by reading @kdamond. + * Reads and writes to @kdamond from outside of the monitoring thread must + * be protected by @kdamond_lock. + * + * Note that the monitoring thread protects only @kdamond via @kdamond_lock. + * Accesses to other fields must be protected by themselves. * * @ops: Set of monitoring operations for given use cases. * @callback: Set of callbacks for monitoring events notifications. diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 3af34de54330..fee9835e3793 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -336,19 +336,6 @@ static inline void memcpy_page(struct page *dst_page, size_t dst_off, kunmap_local(dst); } -static inline void memmove_page(struct page *dst_page, size_t dst_off, - struct page *src_page, size_t src_off, - size_t len) -{ - char *dst = kmap_local_page(dst_page); - char *src = kmap_local_page(src_page); - - VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); - memmove(dst + dst_off, src + src_off, len); - kunmap_local(src); - kunmap_local(dst); -} - static inline void memset_page(struct page *page, size_t offset, int val, size_t len) { diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 34684b2026ab..6a3cd1bf4680 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -29,10 +29,9 @@ extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; -extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, +extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, gfp_t gfp) __ref; extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref; -extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref; extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, @@ -107,15 +106,12 @@ static inline void kmemleak_no_scan(const void *ptr) { } static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, - int min_count, gfp_t gfp) + gfp_t gfp) { } static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) { } -static inline void kmemleak_not_leak_phys(phys_addr_t phys) -{ -} static inline void kmemleak_ignore_phys(phys_addr_t phys) { } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9ecead1042b9..04f2f33607e9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1740,6 +1740,7 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) } struct mem_cgroup *mem_cgroup_from_obj(void *p); +struct mem_cgroup *mem_cgroup_from_slab_obj(void *p); static inline void count_objcg_event(struct obj_cgroup *objcg, enum vm_event_item idx) @@ -1755,6 +1756,42 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, rcu_read_unlock(); } +/** + * get_mem_cgroup_from_obj - get a memcg associated with passed kernel object. + * @p: pointer to object from which memcg should be extracted. It can be NULL. + * + * Retrieves the memory group into which the memory of the pointed kernel + * object is accounted. If memcg is found, its reference is taken. + * If a passed kernel object is uncharged, or if proper memcg cannot be found, + * as well as if mem_cgroup is disabled, NULL is returned. + * + * Return: valid memcg pointer with taken reference or NULL. + */ +static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p) +{ + struct mem_cgroup *memcg; + + rcu_read_lock(); + do { + memcg = mem_cgroup_from_obj(p); + } while (memcg && !css_tryget(&memcg->css)); + rcu_read_unlock(); + return memcg; +} + +/** + * mem_cgroup_or_root - always returns a pointer to a valid memory cgroup. + * @memcg: pointer to a valid memory cgroup or NULL. + * + * If passed argument is not NULL, returns it without any additional checks + * and changes. Otherwise, root_mem_cgroup is returned. + * + * NOTE: root_mem_cgroup can be NULL during early boot. + */ +static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg) +{ + return memcg ? memcg : root_mem_cgroup; +} #else static inline bool mem_cgroup_kmem_disabled(void) { @@ -1798,7 +1835,12 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) { - return NULL; + return NULL; +} + +static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) +{ + return NULL; } static inline void count_objcg_event(struct obj_cgroup *objcg, @@ -1806,6 +1848,15 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { } +static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p) +{ + return NULL; +} + +static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg) +{ + return NULL; +} #endif /* CONFIG_MEMCG_KMEM */ #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c29ab4c0cd5c..6b961a29bf26 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -729,6 +729,7 @@ typedef __bitwise unsigned int vm_fault_t; * @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs * fsync() to complete (for synchronous page faults * in DAX) + * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released * @VM_FAULT_HINDEX_MASK: mask HINDEX value * */ @@ -746,6 +747,7 @@ enum vm_fault_reason { VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, + VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, }; diff --git a/lib/test_hmm.c b/lib/test_hmm.c index cfe632047839..f2c3015c5c82 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -732,7 +732,7 @@ static int dmirror_exclusive(struct dmirror *dmirror, mmap_read_lock(mm); for (addr = start; addr < end; addr = next) { - unsigned long mapped; + unsigned long mapped = 0; int i; if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) @@ -741,7 +741,13 @@ static int dmirror_exclusive(struct dmirror *dmirror, next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT); ret = make_device_exclusive_range(mm, addr, next, pages, NULL); - mapped = dmirror_atomic_map(addr, next, pages, dmirror); + /* + * Do dmirror_atomic_map() iff all pages are marked for + * exclusive access to avoid accessing uninitialized + * fields of pages. + */ + if (ret == (next - addr) >> PAGE_SHIFT) + mapped = dmirror_atomic_map(addr, next, pages, dmirror); for (i = 0; i < ret; i++) { if (pages[i]) { unlock_page(pages[i]); @@ -951,6 +951,25 @@ static int faultin_page(struct vm_area_struct *vma, } ret = handle_mm_fault(vma, address, fault_flags, NULL); + + if (ret & VM_FAULT_COMPLETED) { + /* + * With FAULT_FLAG_RETRY_NOWAIT we'll never release the + * mmap lock in the page fault handler. Sanity check this. + */ + WARN_ON_ONCE(fault_flags & FAULT_FLAG_RETRY_NOWAIT); + if (locked) + *locked = 0; + /* + * We should do the same as VM_FAULT_RETRY, but let's not + * return -EBUSY since that's not reflecting the reality of + * what has happened - we've just fully completed a page + * fault, with the mmap lock released. Use -EAGAIN to show + * that we want to take the mmap lock _again_. + */ + return -EAGAIN; + } + if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); @@ -1177,6 +1196,7 @@ retry: case 0: goto retry; case -EBUSY: + case -EAGAIN: ret = 0; fallthrough; case -EFAULT: @@ -1303,6 +1323,18 @@ retry: return -EINTR; ret = handle_mm_fault(vma, address, fault_flags, NULL); + + if (ret & VM_FAULT_COMPLETED) { + /* + * NOTE: it's a pity that we need to retake the lock here + * to pair with the unlock() in the callers. Ideally we + * could tell the callers so they do not need to unlock. + */ + mmap_read_lock(mm); + *unlocked = true; + return 0; + } + if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, 0); @@ -1368,7 +1400,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, /* VM_FAULT_RETRY couldn't trigger, bypass */ return ret; - /* VM_FAULT_RETRY cannot return errors */ + /* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */ if (!*locked) { BUG_ON(ret < 0); BUG_ON(ret >= nr_pages); diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a182f5ddaf68..1eddc0132f7f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -14,14 +14,16 @@ * The following locks and mutexes are used by kmemleak: * * - kmemleak_lock (raw_spinlock_t): protects the object_list modifications and - * accesses to the object_tree_root. The object_list is the main list - * holding the metadata (struct kmemleak_object) for the allocated memory - * blocks. The object_tree_root is a red black tree used to look-up - * metadata based on a pointer to the corresponding memory block. The - * kmemleak_object structures are added to the object_list and - * object_tree_root in the create_object() function called from the - * kmemleak_alloc() callback and removed in delete_object() called from the - * kmemleak_free() callback + * accesses to the object_tree_root (or object_phys_tree_root). The + * object_list is the main list holding the metadata (struct kmemleak_object) + * for the allocated memory blocks. The object_tree_root and object_phys_tree_root + * are red black trees used to look-up metadata based on a pointer to the + * corresponding memory block. The object_phys_tree_root is for objects + * allocated with physical address. The kmemleak_object structures are + * added to the object_list and object_tree_root (or object_phys_tree_root) + * in the create_object() function called from the kmemleak_alloc() (or + * kmemleak_alloc_phys()) callback and removed in delete_object() called from + * the kmemleak_free() callback * - kmemleak_object.lock (raw_spinlock_t): protects a kmemleak_object. * Accesses to the metadata (e.g. count) are protected by this lock. Note * that some members of this structure may be protected by other means @@ -172,6 +174,8 @@ struct kmemleak_object { #define OBJECT_NO_SCAN (1 << 2) /* flag set to fully scan the object when scan_area allocation failed */ #define OBJECT_FULL_SCAN (1 << 3) +/* flag set for object allocated with physical address */ +#define OBJECT_PHYS (1 << 4) #define HEX_PREFIX " " /* number of bytes to print per line; must be 16 or 32 */ @@ -193,7 +197,9 @@ static int mem_pool_free_count = ARRAY_SIZE(mem_pool); static LIST_HEAD(mem_pool_free_list); /* search tree for object boundaries */ static struct rb_root object_tree_root = RB_ROOT; -/* protecting the access to object_list and object_tree_root */ +/* search tree for object (with OBJECT_PHYS flag) boundaries */ +static struct rb_root object_phys_tree_root = RB_ROOT; +/* protecting the access to object_list, object_tree_root (or object_phys_tree_root) */ static DEFINE_RAW_SPINLOCK(kmemleak_lock); /* allocation caches for kmemleak internal data */ @@ -285,6 +291,9 @@ static void hex_dump_object(struct seq_file *seq, const u8 *ptr = (const u8 *)object->pointer; size_t len; + if (WARN_ON_ONCE(object->flags & OBJECT_PHYS)) + return; + /* limit the number of lines to HEX_MAX_LINES */ len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE); @@ -378,9 +387,11 @@ static void dump_object_info(struct kmemleak_object *object) * beginning of the memory block are allowed. The kmemleak_lock must be held * when calling this function. */ -static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) +static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias, + bool is_phys) { - struct rb_node *rb = object_tree_root.rb_node; + struct rb_node *rb = is_phys ? object_phys_tree_root.rb_node : + object_tree_root.rb_node; unsigned long untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); while (rb) { @@ -406,6 +417,12 @@ static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) return NULL; } +/* Look-up a kmemleak object which allocated with virtual address. */ +static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) +{ + return __lookup_object(ptr, alias, false); +} + /* * Increment the object use_count. Return 1 if successful or 0 otherwise. Note * that once an object's use_count reached 0, the RCU freeing was already @@ -515,14 +532,15 @@ static void put_object(struct kmemleak_object *object) /* * Look up an object in the object search tree and increase its use_count. */ -static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) +static struct kmemleak_object *__find_and_get_object(unsigned long ptr, int alias, + bool is_phys) { unsigned long flags; struct kmemleak_object *object; rcu_read_lock(); raw_spin_lock_irqsave(&kmemleak_lock, flags); - object = lookup_object(ptr, alias); + object = __lookup_object(ptr, alias, is_phys); raw_spin_unlock_irqrestore(&kmemleak_lock, flags); /* check whether the object is still available */ @@ -533,28 +551,39 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) return object; } +/* Look up and get an object which allocated with virtual address. */ +static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) +{ + return __find_and_get_object(ptr, alias, false); +} + /* - * Remove an object from the object_tree_root and object_list. Must be called - * with the kmemleak_lock held _if_ kmemleak is still enabled. + * Remove an object from the object_tree_root (or object_phys_tree_root) + * and object_list. Must be called with the kmemleak_lock held _if_ kmemleak + * is still enabled. */ static void __remove_object(struct kmemleak_object *object) { - rb_erase(&object->rb_node, &object_tree_root); + rb_erase(&object->rb_node, object->flags & OBJECT_PHYS ? + &object_phys_tree_root : + &object_tree_root); list_del_rcu(&object->object_list); } /* * Look up an object in the object search tree and remove it from both - * object_tree_root and object_list. The returned object's use_count should be - * at least 1, as initially set by create_object(). + * object_tree_root (or object_phys_tree_root) and object_list. The + * returned object's use_count should be at least 1, as initially set + * by create_object(). */ -static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int alias) +static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int alias, + bool is_phys) { unsigned long flags; struct kmemleak_object *object; raw_spin_lock_irqsave(&kmemleak_lock, flags); - object = lookup_object(ptr, alias); + object = __lookup_object(ptr, alias, is_phys); if (object) __remove_object(object); raw_spin_unlock_irqrestore(&kmemleak_lock, flags); @@ -572,10 +601,12 @@ static int __save_stack_trace(unsigned long *trace) /* * Create the metadata (struct kmemleak_object) corresponding to an allocated - * memory block and add it to the object_list and object_tree_root. + * memory block and add it to the object_list and object_tree_root (or + * object_phys_tree_root). */ -static struct kmemleak_object *create_object(unsigned long ptr, size_t size, - int min_count, gfp_t gfp) +static struct kmemleak_object *__create_object(unsigned long ptr, size_t size, + int min_count, gfp_t gfp, + bool is_phys) { unsigned long flags; struct kmemleak_object *object, *parent; @@ -595,7 +626,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, INIT_HLIST_HEAD(&object->area_list); raw_spin_lock_init(&object->lock); atomic_set(&object->use_count, 1); - object->flags = OBJECT_ALLOCATED; + object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0); object->pointer = ptr; object->size = kfence_ksize((void *)ptr) ?: size; object->excess_ref = 0; @@ -628,9 +659,16 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, raw_spin_lock_irqsave(&kmemleak_lock, flags); untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); - min_addr = min(min_addr, untagged_ptr); - max_addr = max(max_addr, untagged_ptr + size); - link = &object_tree_root.rb_node; + /* + * Only update min_addr and max_addr with object + * storing virtual address. + */ + if (!is_phys) { + min_addr = min(min_addr, untagged_ptr); + max_addr = max(max_addr, untagged_ptr + size); + } + link = is_phys ? &object_phys_tree_root.rb_node : + &object_tree_root.rb_node; rb_parent = NULL; while (*link) { rb_parent = *link; @@ -654,7 +692,8 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, } } rb_link_node(&object->rb_node, rb_parent, link); - rb_insert_color(&object->rb_node, &object_tree_root); + rb_insert_color(&object->rb_node, is_phys ? &object_phys_tree_root : + &object_tree_root); list_add_tail_rcu(&object->object_list, &object_list); out: @@ -662,6 +701,20 @@ out: return object; } +/* Create kmemleak object which allocated with virtual address. */ +static struct kmemleak_object *create_object(unsigned long ptr, size_t size, + int min_count, gfp_t gfp) +{ + return __create_object(ptr, size, min_count, gfp, false); +} + +/* Create kmemleak object which allocated with physical address. */ +static struct kmemleak_object *create_object_phys(unsigned long ptr, size_t size, + int min_count, gfp_t gfp) +{ + return __create_object(ptr, size, min_count, gfp, true); +} + /* * Mark the object as not allocated and schedule RCU freeing via put_object(). */ @@ -690,7 +743,7 @@ static void delete_object_full(unsigned long ptr) { struct kmemleak_object *object; - object = find_and_remove_object(ptr, 0); + object = find_and_remove_object(ptr, 0, false); if (!object) { #ifdef DEBUG kmemleak_warn("Freeing unknown object at 0x%08lx\n", @@ -706,12 +759,12 @@ static void delete_object_full(unsigned long ptr) * delete it. If the memory block is partially freed, the function may create * additional metadata for the remaining parts of the block. */ -static void delete_object_part(unsigned long ptr, size_t size) +static void delete_object_part(unsigned long ptr, size_t size, bool is_phys) { struct kmemleak_object *object; unsigned long start, end; - object = find_and_remove_object(ptr, 1); + object = find_and_remove_object(ptr, 1, is_phys); if (!object) { #ifdef DEBUG kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n", @@ -728,11 +781,11 @@ static void delete_object_part(unsigned long ptr, size_t size) start = object->pointer; end = object->pointer + object->size; if (ptr > start) - create_object(start, ptr - start, object->min_count, - GFP_KERNEL); + __create_object(start, ptr - start, object->min_count, + GFP_KERNEL, is_phys); if (ptr + size < end) - create_object(ptr + size, end - ptr - size, object->min_count, - GFP_KERNEL); + __create_object(ptr + size, end - ptr - size, object->min_count, + GFP_KERNEL, is_phys); __delete_object(object); } @@ -753,11 +806,11 @@ static void paint_it(struct kmemleak_object *object, int color) raw_spin_unlock_irqrestore(&object->lock, flags); } -static void paint_ptr(unsigned long ptr, int color) +static void paint_ptr(unsigned long ptr, int color, bool is_phys) { struct kmemleak_object *object; - object = find_and_get_object(ptr, 0); + object = __find_and_get_object(ptr, 0, is_phys); if (!object) { kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n", ptr, @@ -775,16 +828,16 @@ static void paint_ptr(unsigned long ptr, int color) */ static void make_gray_object(unsigned long ptr) { - paint_ptr(ptr, KMEMLEAK_GREY); + paint_ptr(ptr, KMEMLEAK_GREY, false); } /* * Mark the object as black-colored so that it is ignored from scans and * reporting. */ -static void make_black_object(unsigned long ptr) +static void make_black_object(unsigned long ptr, bool is_phys) { - paint_ptr(ptr, KMEMLEAK_BLACK); + paint_ptr(ptr, KMEMLEAK_BLACK, is_phys); } /* @@ -990,7 +1043,7 @@ void __ref kmemleak_free_part(const void *ptr, size_t size) pr_debug("%s(0x%p)\n", __func__, ptr); if (kmemleak_enabled && ptr && !IS_ERR(ptr)) - delete_object_part((unsigned long)ptr, size); + delete_object_part((unsigned long)ptr, size, false); } EXPORT_SYMBOL_GPL(kmemleak_free_part); @@ -1078,7 +1131,7 @@ void __ref kmemleak_ignore(const void *ptr) pr_debug("%s(0x%p)\n", __func__, ptr); if (kmemleak_enabled && ptr && !IS_ERR(ptr)) - make_black_object((unsigned long)ptr); + make_black_object((unsigned long)ptr, false); } EXPORT_SYMBOL(kmemleak_ignore); @@ -1125,15 +1178,18 @@ EXPORT_SYMBOL(kmemleak_no_scan); * address argument * @phys: physical address of the object * @size: size of the object - * @min_count: minimum number of references to this object. - * See kmemleak_alloc() * @gfp: kmalloc() flags used for kmemleak internal memory allocations */ -void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, - gfp_t gfp) +void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, gfp_t gfp) { - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) - kmemleak_alloc(__va(phys), size, min_count, gfp); + pr_debug("%s(0x%pa, %zu)\n", __func__, &phys, size); + + if (kmemleak_enabled) + /* + * Create object with OBJECT_PHYS flag and + * assume min_count 0. + */ + create_object_phys((unsigned long)phys, size, 0, gfp); } EXPORT_SYMBOL(kmemleak_alloc_phys); @@ -1146,22 +1202,12 @@ EXPORT_SYMBOL(kmemleak_alloc_phys); */ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) { - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) - kmemleak_free_part(__va(phys), size); -} -EXPORT_SYMBOL(kmemleak_free_part_phys); + pr_debug("%s(0x%pa)\n", __func__, &phys); -/** - * kmemleak_not_leak_phys - similar to kmemleak_not_leak but taking a physical - * address argument - * @phys: physical address of the object - */ -void __ref kmemleak_not_leak_phys(phys_addr_t phys) -{ - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) - kmemleak_not_leak(__va(phys)); + if (kmemleak_enabled) + delete_object_part((unsigned long)phys, size, true); } -EXPORT_SYMBOL(kmemleak_not_leak_phys); +EXPORT_SYMBOL(kmemleak_free_part_phys); /** * kmemleak_ignore_phys - similar to kmemleak_ignore but taking a physical @@ -1170,8 +1216,10 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys); */ void __ref kmemleak_ignore_phys(phys_addr_t phys) { - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) - kmemleak_ignore(__va(phys)); + pr_debug("%s(0x%pa)\n", __func__, &phys); + + if (kmemleak_enabled) + make_black_object((unsigned long)phys, true); } EXPORT_SYMBOL(kmemleak_ignore_phys); @@ -1182,6 +1230,9 @@ static bool update_checksum(struct kmemleak_object *object) { u32 old_csum = object->checksum; + if (WARN_ON_ONCE(object->flags & OBJECT_PHYS)) + return false; + kasan_disable_current(); kcsan_disable_current(); object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size); @@ -1335,6 +1386,7 @@ static void scan_object(struct kmemleak_object *object) { struct kmemleak_scan_area *area; unsigned long flags; + void *obj_ptr; /* * Once the object->lock is acquired, the corresponding memory block @@ -1346,10 +1398,15 @@ static void scan_object(struct kmemleak_object *object) if (!(object->flags & OBJECT_ALLOCATED)) /* already freed object */ goto out; + + obj_ptr = object->flags & OBJECT_PHYS ? + __va((phys_addr_t)object->pointer) : + (void *)object->pointer; + if (hlist_empty(&object->area_list) || object->flags & OBJECT_FULL_SCAN) { - void *start = (void *)object->pointer; - void *end = (void *)(object->pointer + object->size); + void *start = obj_ptr; + void *end = obj_ptr + object->size; void *next; do { @@ -1413,18 +1470,21 @@ static void scan_gray_list(void) */ static void kmemleak_scan(void) { - unsigned long flags; struct kmemleak_object *object; struct zone *zone; int __maybe_unused i; int new_leaks = 0; + int loop1_cnt = 0; jiffies_last_scan = jiffies; /* prepare the kmemleak_object's */ rcu_read_lock(); list_for_each_entry_rcu(object, &object_list, object_list) { - raw_spin_lock_irqsave(&object->lock, flags); + bool obj_pinned = false; + + loop1_cnt++; + raw_spin_lock_irq(&object->lock); #ifdef DEBUG /* * With a few exceptions there should be a maximum of @@ -1436,12 +1496,45 @@ static void kmemleak_scan(void) dump_object_info(object); } #endif + + /* ignore objects outside lowmem (paint them black) */ + if ((object->flags & OBJECT_PHYS) && + !(object->flags & OBJECT_NO_SCAN)) { + unsigned long phys = object->pointer; + + if (PHYS_PFN(phys) < min_low_pfn || + PHYS_PFN(phys + object->size) >= max_low_pfn) + __paint_it(object, KMEMLEAK_BLACK); + } + /* reset the reference count (whiten the object) */ object->count = 0; - if (color_gray(object) && get_object(object)) + if (color_gray(object) && get_object(object)) { list_add_tail(&object->gray_list, &gray_list); + obj_pinned = true; + } - raw_spin_unlock_irqrestore(&object->lock, flags); + raw_spin_unlock_irq(&object->lock); + + /* + * Do a cond_resched() to avoid soft lockup every 64k objects. + * Make sure a reference has been taken so that the object + * won't go away without RCU read lock. + */ + if (!(loop1_cnt & 0xffff)) { + if (!obj_pinned && !get_object(object)) { + /* Try the next object instead */ + loop1_cnt--; + continue; + } + + rcu_read_unlock(); + cond_resched(); + rcu_read_lock(); + + if (!obj_pinned) + put_object(object); + } } rcu_read_unlock(); @@ -1509,14 +1602,21 @@ static void kmemleak_scan(void) */ rcu_read_lock(); list_for_each_entry_rcu(object, &object_list, object_list) { - raw_spin_lock_irqsave(&object->lock, flags); + /* + * This is racy but we can save the overhead of lock/unlock + * calls. The missed objects, if any, should be caught in + * the next scan. + */ + if (!color_white(object)) + continue; + raw_spin_lock_irq(&object->lock); if (color_white(object) && (object->flags & OBJECT_ALLOCATED) && update_checksum(object) && get_object(object)) { /* color it gray temporarily */ object->count = object->min_count; list_add_tail(&object->gray_list, &gray_list); } - raw_spin_unlock_irqrestore(&object->lock, flags); + raw_spin_unlock_irq(&object->lock); } rcu_read_unlock(); @@ -1536,7 +1636,14 @@ static void kmemleak_scan(void) */ rcu_read_lock(); list_for_each_entry_rcu(object, &object_list, object_list) { - raw_spin_lock_irqsave(&object->lock, flags); + /* + * This is racy but we can save the overhead of lock/unlock + * calls. The missed objects, if any, should be caught in + * the next scan. + */ + if (!color_white(object)) + continue; + raw_spin_lock_irq(&object->lock); if (unreferenced_object(object) && !(object->flags & OBJECT_REPORTED)) { object->flags |= OBJECT_REPORTED; @@ -1546,7 +1653,7 @@ static void kmemleak_scan(void) new_leaks++; } - raw_spin_unlock_irqrestore(&object->lock, flags); + raw_spin_unlock_irq(&object->lock); } rcu_read_unlock(); @@ -1748,15 +1855,14 @@ static int dump_str_object_info(const char *str) static void kmemleak_clear(void) { struct kmemleak_object *object; - unsigned long flags; rcu_read_lock(); list_for_each_entry_rcu(object, &object_list, object_list) { - raw_spin_lock_irqsave(&object->lock, flags); + raw_spin_lock_irq(&object->lock); if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) __paint_it(object, KMEMLEAK_GREY); - raw_spin_unlock_irqrestore(&object->lock, flags); + raw_spin_unlock_irq(&object->lock); } rcu_read_unlock(); diff --git a/mm/list_lru.c b/mm/list_lru.c index ba76428ceece..a05e5bef3b40 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -71,7 +71,7 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr, if (!list_lru_memcg_aware(lru)) goto out; - memcg = mem_cgroup_from_obj(ptr); + memcg = mem_cgroup_from_slab_obj(ptr); if (!memcg) goto out; diff --git a/mm/memblock.c b/mm/memblock.c index e4f03a6e8e56..749abd2685c4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1345,8 +1345,8 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, * from the regions with mirroring enabled and then retried from any * memory region. * - * In addition, function sets the min_count to 0 using kmemleak_alloc_phys for - * allocated boot memory block, so that it is never reported as leaks. + * In addition, function using kmemleak_alloc_phys for allocated boot + * memory block, it is never reported as leaks. * * Return: * Physical address of allocated memory block on success, %0 on failure. @@ -1398,12 +1398,12 @@ done: */ if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) /* - * The min_count is set to 0 so that memblock allocated - * blocks are never reported as leaks. This is because many - * of these blocks are only referred via the physical - * address which is not looked up by kmemleak. + * Memblock allocated blocks are never reported as + * leaks. This is because many of these blocks are + * only referred via the physical address which is + * not looked up by kmemleak. */ - kmemleak_alloc_phys(found, size, 0, 0); + kmemleak_alloc_phys(found, size, 0); return found; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 618c366a2f07..655c09393ad5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -783,7 +783,7 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) struct lruvec *lruvec; rcu_read_lock(); - memcg = mem_cgroup_from_obj(p); + memcg = mem_cgroup_from_slab_obj(p); /* * Untracked pages have no memcg, no lruvec. Update only the @@ -1460,6 +1460,29 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, return memcg_page_state(memcg, item) * memcg_page_state_unit(item); } +/* Subset of vm_event_item to report for memcg event stats */ +static const unsigned int memcg_vm_event_stat[] = { + PGSCAN_KSWAPD, + PGSCAN_DIRECT, + PGSTEAL_KSWAPD, + PGSTEAL_DIRECT, + PGFAULT, + PGMAJFAULT, + PGREFILL, + PGACTIVATE, + PGDEACTIVATE, + PGLAZYFREE, + PGLAZYFREED, +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) + ZSWPIN, + ZSWPOUT, +#endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + THP_FAULT_ALLOC, + THP_COLLAPSE_ALLOC, +#endif +}; + static char *memory_stat_format(struct mem_cgroup *memcg) { struct seq_buf s; @@ -1495,41 +1518,17 @@ static char *memory_stat_format(struct mem_cgroup *memcg) } /* Accumulated memory events */ - - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT), - memcg_events(memcg, PGFAULT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT), - memcg_events(memcg, PGMAJFAULT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGREFILL), - memcg_events(memcg, PGREFILL)); seq_buf_printf(&s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT)); seq_buf_printf(&s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE), - memcg_events(memcg, PGACTIVATE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE), - memcg_events(memcg, PGDEACTIVATE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE), - memcg_events(memcg, PGLAZYFREE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED), - memcg_events(memcg, PGLAZYFREED)); -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) - seq_buf_printf(&s, "%s %lu\n", vm_event_name(ZSWPIN), - memcg_events(memcg, ZSWPIN)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(ZSWPOUT), - memcg_events(memcg, ZSWPOUT)); -#endif - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC), - memcg_events(memcg, THP_FAULT_ALLOC)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC), - memcg_events(memcg, THP_COLLAPSE_ALLOC)); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) + seq_buf_printf(&s, "%s %lu\n", + vm_event_name(memcg_vm_event_stat[i]), + memcg_events(memcg, memcg_vm_event_stat[i])); /* The above should easily fit into one page */ WARN_ON_ONCE(seq_buf_has_overflowed(&s)); @@ -2842,27 +2841,9 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, return 0; } -/* - * Returns a pointer to the memory cgroup to which the kernel object is charged. - * - * A passed kernel object can be a slab object or a generic kernel page, so - * different mechanisms for getting the memory cgroup pointer should be used. - * In certain cases (e.g. kernel stacks or large kmallocs with SLUB) the caller - * can not know for sure how the kernel object is implemented. - * mem_cgroup_from_obj() can be safely used in such cases. - * - * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(), - * cgroup_mutex, etc. - */ -struct mem_cgroup *mem_cgroup_from_obj(void *p) +static __always_inline +struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) { - struct folio *folio; - - if (mem_cgroup_disabled()) - return NULL; - - folio = virt_to_folio(p); - /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in @@ -2895,6 +2876,53 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) return page_memcg_check(folio_page(folio, 0)); } +/* + * Returns a pointer to the memory cgroup to which the kernel object is charged. + * + * A passed kernel object can be a slab object, vmalloc object or a generic + * kernel page, so different mechanisms for getting the memory cgroup pointer + * should be used. + * + * In certain cases (e.g. kernel stacks or large kmallocs with SLUB) the caller + * can not know for sure how the kernel object is implemented. + * mem_cgroup_from_obj() can be safely used in such cases. + * + * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(), + * cgroup_mutex, etc. + */ +struct mem_cgroup *mem_cgroup_from_obj(void *p) +{ + struct folio *folio; + + if (mem_cgroup_disabled()) + return NULL; + + if (unlikely(is_vmalloc_addr(p))) + folio = page_folio(vmalloc_to_page(p)); + else + folio = virt_to_folio(p); + + return mem_cgroup_from_obj_folio(folio, p); +} + +/* + * Returns a pointer to the memory cgroup to which the kernel object is charged. + * Similar to mem_cgroup_from_obj(), but faster and not suitable for objects, + * allocated using vmalloc(). + * + * A passed kernel object must be a slab object or a generic kernel page. + * + * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(), + * cgroup_mutex, etc. + */ +struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) +{ + if (mem_cgroup_disabled()) + return NULL; + + return mem_cgroup_from_obj_folio(virt_to_folio(p), p); +} + static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg) { struct obj_cgroup *objcg = NULL; diff --git a/mm/memory.c b/mm/memory.c index 7a089145cad4..580c62febe42 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3020,7 +3020,7 @@ static vm_fault_t fault_dirty_shared_page(struct vm_fault *vmf) balance_dirty_pages_ratelimited(mapping); if (fpin) { fput(fpin); - return VM_FAULT_RETRY; + return VM_FAULT_COMPLETED; } } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1213d0c67a53..1f1a730c4499 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -237,8 +237,7 @@ static void release_memory_resource(struct resource *res) kfree(res); } -static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, - const char *reason) +static int check_pfn_span(unsigned long pfn, unsigned long nr_pages) { /* * Disallow all operations smaller than a sub-section and only @@ -255,12 +254,8 @@ static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, min_align = PAGES_PER_SUBSECTION; else min_align = PAGES_PER_SECTION; - if (!IS_ALIGNED(pfn, min_align) - || !IS_ALIGNED(nr_pages, min_align)) { - WARN(1, "Misaligned __%s_pages start: %#lx end: #%lx\n", - reason, pfn, pfn + nr_pages - 1); + if (!IS_ALIGNED(pfn | nr_pages, min_align)) return -EINVAL; - } return 0; } @@ -337,9 +332,10 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, altmap->alloc = 0; } - err = check_pfn_span(pfn, nr_pages, "add"); - if (err) - return err; + if (check_pfn_span(pfn, nr_pages)) { + WARN(1, "Misaligned %s start: %#lx end: #%lx\n", __func__, pfn, pfn + nr_pages - 1); + return -EINVAL; + } for (; pfn < end_pfn; pfn += cur_nr_pages) { /* Select all remaining pages up to the next section boundary */ @@ -536,8 +532,10 @@ void __remove_pages(unsigned long pfn, unsigned long nr_pages, map_offset = vmem_altmap_offset(altmap); - if (check_pfn_span(pfn, nr_pages, "remove")) + if (check_pfn_span(pfn, nr_pages)) { + WARN(1, "Misaligned %s start: %#lx end: #%lx\n", __func__, pfn, pfn + nr_pages - 1); return; + } for (; pfn < end_pfn; pfn += cur_nr_pages) { cond_resched(); diff --git a/mm/mempool.c b/mm/mempool.c index b933d0fc21b8..96488b13a1ef 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -379,7 +379,7 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) gfp_t gfp_temp; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); - might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); + might_alloc(gfp_mask); gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ diff --git a/mm/memremap.c b/mm/memremap.c index b870a659eee6..8b5c8fd4ea8e 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -141,10 +141,10 @@ void memunmap_pages(struct dev_pagemap *pgmap) for (i = 0; i < pgmap->nr_range; i++) percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i)); wait_for_completion(&pgmap->done); - percpu_ref_exit(&pgmap->ref); for (i = 0; i < pgmap->nr_range; i++) pageunmap_range(pgmap, i); + percpu_ref_exit(&pgmap->ref); WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); devmap_managed_enable_put(pgmap); @@ -279,8 +279,8 @@ err_pfn_remap: /* - * Not device managed version of dev_memremap_pages, undone by - * memunmap_pages(). Please use dev_memremap_pages if you have a struct + * Not device managed version of devm_memremap_pages, undone by + * memunmap_pages(). Please use devm_memremap_pages if you have a struct * device available. */ void *memremap_pages(struct dev_pagemap *pgmap, int nid) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e008a3df0485..81fadb266973 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5197,10 +5197,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, *alloc_flags |= ALLOC_CPUSET; } - fs_reclaim_acquire(gfp_mask); - fs_reclaim_release(gfp_mask); - - might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); + might_alloc(gfp_mask); if (should_fail_alloc_page(gfp_mask, order)) return false; diff --git a/mm/shmem.c b/mm/shmem.c index a6f565308133..12d45a03f7fc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1706,10 +1706,10 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, } /* - * Swap in the page pointed to by *pagep. - * Caller has to make sure that *pagep contains a valid swapped page. - * Returns 0 and the page in pagep if success. On failure, returns the - * error code and NULL in *pagep. + * Swap in the folio pointed to by *foliop. + * Caller has to make sure that *foliop contains a valid swapped folio. + * Returns 0 and the folio in foliop if success. On failure, returns the + * error code and NULL in *foliop. */ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct folio **foliop, enum sgp_type sgp, @@ -1749,7 +1749,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } folio = page_folio(page); - /* We have to do this with page locked to prevent races */ + /* We have to do this with folio locked to prevent races */ folio_lock(folio); if (!folio_test_swapcache(folio) || folio_swap_entry(folio).val != swap.val || diff --git a/mm/slab.c b/mm/slab.c index f8cd00f4ba13..47151fb2b2d2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2958,12 +2958,6 @@ direct_grow: return ac->entry[--ac->avail]; } -static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, - gfp_t flags) -{ - might_sleep_if(gfpflags_allow_blocking(flags)); -} - #if DEBUG static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, void *objp, unsigned long caller) @@ -3205,7 +3199,6 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_ if (unlikely(ptr)) goto out_hooks; - cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); if (nodeid == NUMA_NO_NODE) @@ -3290,7 +3283,6 @@ slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, if (unlikely(objp)) goto out; - cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); objp = __do_cache_alloc(cachep, flags); local_irq_restore(save_flags); @@ -3527,8 +3519,6 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, if (!s) return 0; - cache_alloc_debugcheck_before(s, flags); - local_irq_disable(); for (i = 0; i < size; i++) { void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index f4fa61dbbee3..652f11a05749 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -200,8 +200,8 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, unsigned long next; pgd_t *pgd; - VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE)); - VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE)); + VM_BUG_ON(!PAGE_ALIGNED(start)); + VM_BUG_ON(!PAGE_ALIGNED(end)); pgd = pgd_offset_k(addr); do { @@ -737,7 +737,7 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); for (addr = start; addr < end; addr += size) { - unsigned long next = addr, last = addr + size; + unsigned long next, last = addr + size; /* Populate the head page vmemmap page */ pte = vmemmap_populate_address(addr, node, NULL, NULL); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index effd1ff6a4b4..5977b178694d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -790,6 +790,7 @@ unsigned long vmalloc_nr_pages(void) return atomic_long_read(&nr_vmalloc_pages); } +/* Look up the first VA which satisfies addr < va_end, NULL if none. */ static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr) { struct vmap_area *va = NULL; @@ -874,11 +875,9 @@ find_va_links(struct vmap_area *va, * Trigger the BUG() if there are sides(left/right) * or full overlaps. */ - if (va->va_start < tmp_va->va_end && - va->va_end <= tmp_va->va_start) + if (va->va_end <= tmp_va->va_start) link = &(*link)->rb_left; - else if (va->va_end > tmp_va->va_start && - va->va_start >= tmp_va->va_end) + else if (va->va_start >= tmp_va->va_end) link = &(*link)->rb_right; else { WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n", @@ -931,7 +930,7 @@ link_va(struct vmap_area *va, struct rb_root *root, * Some explanation here. Just perform simple insertion * to the tree. We do not set va->subtree_max_size to * its current size before calling rb_insert_augmented(). - * It is because of we populate the tree from the bottom + * It is because we populate the tree from the bottom * to parent levels when the node _is_ in the tree. * * Therefore we set subtree_max_size to zero after insertion, @@ -1335,10 +1334,10 @@ classify_va_fit_type(struct vmap_area *va, static __always_inline int adjust_va_to_fit_type(struct vmap_area *va, - unsigned long nva_start_addr, unsigned long size, - enum fit_type type) + unsigned long nva_start_addr, unsigned long size) { struct vmap_area *lva = NULL; + enum fit_type type = classify_va_fit_type(va, nva_start_addr, size); if (type == FL_FIT_TYPE) { /* @@ -1444,7 +1443,6 @@ __alloc_vmap_area(unsigned long size, unsigned long align, bool adjust_search_size = true; unsigned long nva_start_addr; struct vmap_area *va; - enum fit_type type; int ret; /* @@ -1472,14 +1470,9 @@ __alloc_vmap_area(unsigned long size, unsigned long align, if (nva_start_addr + size > vend) return vend; - /* Classify what we have found. */ - type = classify_va_fit_type(va, nva_start_addr, size); - if (WARN_ON_ONCE(type == NOTHING_FIT)) - return vend; - /* Update the free vmap_area. */ - ret = adjust_va_to_fit_type(va, nva_start_addr, size, type); - if (ret) + ret = adjust_va_to_fit_type(va, nva_start_addr, size); + if (WARN_ON_ONCE(ret)) return vend; #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK @@ -1663,7 +1656,7 @@ static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0); /* * Serialize vmap purging. There is no actual critical section protected - * by this look, but we want to avoid concurrent calls for performance + * by this lock, but we want to avoid concurrent calls for performance * reasons and to make the pcpu_get_vm_areas more deterministic. */ static DEFINE_MUTEX(vmap_purge_lock); @@ -1677,32 +1670,32 @@ static void purge_fragmented_blocks_allcpus(void); static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) { unsigned long resched_threshold; - struct list_head local_pure_list; + struct list_head local_purge_list; struct vmap_area *va, *n_va; lockdep_assert_held(&vmap_purge_lock); spin_lock(&purge_vmap_area_lock); purge_vmap_area_root = RB_ROOT; - list_replace_init(&purge_vmap_area_list, &local_pure_list); + list_replace_init(&purge_vmap_area_list, &local_purge_list); spin_unlock(&purge_vmap_area_lock); - if (unlikely(list_empty(&local_pure_list))) + if (unlikely(list_empty(&local_purge_list))) return false; start = min(start, - list_first_entry(&local_pure_list, + list_first_entry(&local_purge_list, struct vmap_area, list)->va_start); end = max(end, - list_last_entry(&local_pure_list, + list_last_entry(&local_purge_list, struct vmap_area, list)->va_end); flush_tlb_kernel_range(start, end); resched_threshold = lazy_max_pages() << 1; spin_lock(&free_vmap_area_lock); - list_for_each_entry_safe(va, n_va, &local_pure_list, list) { + list_for_each_entry_safe(va, n_va, &local_purge_list, list) { unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; unsigned long orig_start = va->va_start; unsigned long orig_end = va->va_end; @@ -3735,7 +3728,6 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, int area, area2, last_area, term_area; unsigned long base, start, size, end, last_end, orig_start, orig_end; bool purged = false; - enum fit_type type; /* verify parameters and allocate data structures */ BUG_ON(offset_in_page(align) || !is_power_of_2(align)); @@ -3846,15 +3838,11 @@ retry: /* It is a BUG(), but trigger recovery instead. */ goto recovery; - type = classify_va_fit_type(va, start, size); - if (WARN_ON_ONCE(type == NOTHING_FIT)) + ret = adjust_va_to_fit_type(va, start, size); + if (WARN_ON_ONCE(unlikely(ret))) /* It is a BUG(), but trigger recovery instead. */ goto recovery; - ret = adjust_va_to_fit_type(va, start, size, type); - if (unlikely(ret)) - goto recovery; - /* Allocated area. */ va = vas[area]; va->va_start = start; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0ec2f5906a27..6b9f19122ec1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,6 +18,7 @@ #include <linux/user_namespace.h> #include <linux/net_namespace.h> #include <linux/sched/task.h> +#include <linux/sched/mm.h> #include <linux/uidgid.h> #include <linux/cookie.h> @@ -1143,7 +1144,13 @@ static int __register_pernet_operations(struct list_head *list, * setup_net() and cleanup_net() are not possible. */ for_each_net(net) { + struct mem_cgroup *old, *memcg; + + memcg = mem_cgroup_or_root(get_mem_cgroup_from_obj(net)); + old = set_active_memcg(memcg); error = ops_init(ops, net); + set_active_memcg(old); + mem_cgroup_put(memcg); if (error) goto out_undo; list_add_tail(&net->exit_list, &net_exit_list); diff --git a/tools/testing/memblock/linux/kmemleak.h b/tools/testing/memblock/linux/kmemleak.h index 462f8c5e8aa0..5fed13bb9ec4 100644 --- a/tools/testing/memblock/linux/kmemleak.h +++ b/tools/testing/memblock/linux/kmemleak.h @@ -7,7 +7,7 @@ static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) } static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, - int min_count, gfp_t gfp) + gfp_t gfp) { } diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 0bdfc1955229..4bc24581760d 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -860,7 +860,7 @@ static int stress(struct uffd_stats *uffd_stats) /* * Be strict and immediately zap area_src, the whole area has * been transferred already by the background treads. The - * area_src could then be faulted in in a racy way by still + * area_src could then be faulted in a racy way by still * running uffdio_threads reading zeropages after we zapped * area_src (but they're guaranteed to get -EEXIST from * UFFDIO_COPY without writing zero pages into area_dst diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 9b68658b6bb8..3ae985dc24b6 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -125,7 +125,7 @@ static void usage(void) "-n|--numa Show NUMA information\n" "-N|--lines=K Show the first K slabs\n" "-o|--ops Show kmem_cache_ops\n" - "-P|--partial Sort by number of partial slabs\n" + "-P|--partial Sort by number of partial slabs\n" "-r|--report Detailed report on single slabs\n" "-s|--shrink Shrink slabs\n" "-S|--Size Sort by size\n" @@ -1045,15 +1045,27 @@ static void sort_slabs(void) for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { int result; - if (sort_size) - result = slab_size(s1) < slab_size(s2); - else if (sort_active) - result = slab_activity(s1) < slab_activity(s2); - else if (sort_loss) - result = slab_waste(s1) < slab_waste(s2); - else if (sort_partial) - result = s1->partial < s2->partial; - else + if (sort_size) { + if (slab_size(s1) == slab_size(s2)) + result = strcasecmp(s1->name, s2->name); + else + result = slab_size(s1) < slab_size(s2); + } else if (sort_active) { + if (slab_activity(s1) == slab_activity(s2)) + result = strcasecmp(s1->name, s2->name); + else + result = slab_activity(s1) < slab_activity(s2); + } else if (sort_loss) { + if (slab_waste(s1) == slab_waste(s2)) + result = strcasecmp(s1->name, s2->name); + else + result = slab_waste(s1) < slab_waste(s2); + } else if (sort_partial) { + if (s1->partial == s2->partial) + result = strcasecmp(s1->name, s2->name); + else + result = s1->partial < s2->partial; + } else result = strcasecmp(s1->name, s2->name); if (show_inverted) |