diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-11-22 18:15:33 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2025-06-30 17:52:46 -0400 |
commit | 8730399a8dfd9c3f12c962ead9073f775420951c (patch) | |
tree | b36b4de23c7c181e940c196ee6d23b91924fb221 | |
parent | 1237bbe95754e2f2a066624e89555c71f6f18385 (diff) |
mm: shrinker: Add a .to_text() method for shrinkers
This adds a new callback method to shrinkers which they can use to
describe anything relevant to memory reclaim about their internal state,
for example object dirtyness.
This patch also adds shrinkers_to_text(), which reports on the top 10
shrinkers - by object count - in sorted order, to be used in OOM
reporting.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: linux-mm@kvack.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
From david@fromorbit.com Tue Aug 27 23:32:26 2024
> > + if (!mutex_trylock(&shrinker_mutex)) {
> > + seq_buf_puts(out, "(couldn't take shrinker lock)");
> > + return;
> > + }
>
> Please don't use the shrinker_mutex like this. There can be tens of
> thousands of entries in the shrinker list (because memcgs) and
> holding the shrinker_mutex for long running traversals like this is
> known to cause latency problems for memcg reaping. If we are at
> ENOMEM, the last thing we want to be doing is preventing memcgs from
> being reaped.
>
> > + list_for_each_entry(shrinker, &shrinker_list, list) {
> > + struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
>
> This iteration and counting setup is neither node or memcg aware.
> For node aware shrinkers, this will only count the items freeable
> on node 0, and ignore all the other memory in the system. For memcg
> systems, it will also only scan the root memcg and so miss counting
> any memory in memcg owned caches.
>
> IOWs, the shrinker iteration mechanism needs to iterate both by NUMA
> node and by memcg. On large machines with multiple nodes and hosting
> thousands of memcgs, a total shrinker state iteration is has to walk
> a -lot- of structures.
>
> And example of this is drop_slab() - called from
> /proc/sys/vm/drop_caches(). It does this to iterate all the
> shrinkers for all the nodes and memcgs in the system:
>
> static unsigned long drop_slab_node(int nid)
> {
> unsigned long freed = 0;
> struct mem_cgroup *memcg = NULL;
>
> memcg = mem_cgroup_iter(NULL, NULL, NULL);
> do {
> freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
> } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
>
> return freed;
> }
>
> void drop_slab(void)
> {
> int nid;
> int shift = 0;
> unsigned long freed;
>
> do {
> freed = 0;
> for_each_online_node(nid) {
> if (fatal_signal_pending(current))
> return;
>
> freed += drop_slab_node(nid);
> }
> } while ((freed >> shift++) > 1);
> }
>
> Hence any iteration for finding the 10 largest shrinkable caches in
> the system needs to do something similar. Only, it needs to iterate
> memcgs first and then aggregate object counts across all nodes for
> shrinkers that are NUMA aware.
>
> Because it needs direct access to the shrinkers, it will need to use
> the RCU lock + refcount method of traversal because that's the only
> safe way to go from memcg to shrinker instance. IOWs, it
> needs to mirror the code in shrink_slab/shrink_slab_memcg to obtain
> a safe reference to the relevant shrinker so it can call
> ->count_objects() and store a refcounted pointer to the shrinker(s)
> that will get printed out after the scan is done....
>
> Once the shrinker iteration is sorted out, I'll look further at the
> rest of the code in this patch...
>
> -Dave.
-rw-r--r-- | include/linux/shrinker.h | 7 | ||||
-rw-r--r-- | mm/shrinker.c | 73 |
2 files changed, 78 insertions, 2 deletions
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 1a00be90d93a..6193612617a1 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -24,6 +24,8 @@ struct shrinker_info { struct shrinker_info_unit *unit[]; }; +struct seq_buf; + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. @@ -80,10 +82,12 @@ struct shrink_control { * @flags determine the shrinker abilities, like numa awareness */ struct shrinker { + const char *name; unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc); unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc); + void (*to_text)(struct seq_buf *, struct shrinker *); long batch; /* reclaim batch size, 0 = default */ int seeks; /* seeks to recreate an obj */ @@ -110,7 +114,6 @@ struct shrinker { #endif #ifdef CONFIG_SHRINKER_DEBUG int debugfs_id; - const char *name; struct dentry *debugfs_entry; #endif /* objs pending delete, per node */ @@ -135,6 +138,8 @@ __printf(2, 3) struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); +void shrinker_to_text(struct seq_buf *, struct shrinker *); +void shrinkers_to_text(struct seq_buf *); static inline bool shrinker_try_get(struct shrinker *shrinker) { diff --git a/mm/shrinker.c b/mm/shrinker.c index 4a93fd433689..82d2161d6b4b 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/memcontrol.h> +#include <linux/rculist.h> #include <linux/rwsem.h> +#include <linux/seq_buf.h> #include <linux/shrinker.h> -#include <linux/rculist.h> #include <trace/events/vmscan.h> #include "internal.h" @@ -809,3 +810,73 @@ void shrinker_free(struct shrinker *shrinker) call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); } EXPORT_SYMBOL_GPL(shrinker_free); + +void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker) +{ + struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; + + seq_buf_puts(out, shrinker->name); + seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc)); + + if (shrinker->to_text) { + shrinker->to_text(out, shrinker); + seq_buf_puts(out, "\n"); + } +} + +/** + * shrinkers_to_text - Report on shrinkers with highest usage + * + * This reports on the top 10 shrinkers, by object counts, in sorted order: + * intended to be used for OOM reporting. + */ +void shrinkers_to_text(struct seq_buf *out) +{ + struct shrinker *shrinker; + struct shrinker_by_mem { + struct shrinker *shrinker; + unsigned long mem; + } shrinkers_by_mem[4]; + int i, nr = 0; + + if (!mutex_trylock(&shrinker_mutex)) { + seq_buf_puts(out, "(couldn't take shrinker lock)"); + return; + } + + list_for_each_entry(shrinker, &shrinker_list, list) { + struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; + unsigned long mem = shrinker->count_objects(shrinker, &sc); + + if (!mem || mem == SHRINK_STOP || mem == SHRINK_EMPTY) + continue; + + for (i = 0; i < nr; i++) + if (mem < shrinkers_by_mem[i].mem) + break; + + if (nr < ARRAY_SIZE(shrinkers_by_mem)) { + memmove(&shrinkers_by_mem[i + 1], + &shrinkers_by_mem[i], + sizeof(shrinkers_by_mem[0]) * (nr - i)); + nr++; + } else if (i) { + i--; + memmove(&shrinkers_by_mem[0], + &shrinkers_by_mem[1], + sizeof(shrinkers_by_mem[0]) * i); + } else { + continue; + } + + shrinkers_by_mem[i] = (struct shrinker_by_mem) { + .shrinker = shrinker, + .mem = mem, + }; + } + + for (i = nr - 1; i >= 0; --i) + shrinker_to_text(out, shrinkers_by_mem[i].shrinker); + + mutex_unlock(&shrinker_mutex); +} |