From 1a4c937f8d584acc0d411eadfd7ea7e14dee8c84 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 22 Nov 2023 18:15:33 -0500 Subject: mm: shrinker: Add a .to_text() method for shrinkers This adds a new callback method to shrinkers which they can use to describe anything relevant to memory reclaim about their internal state, for example object dirtyness. This patch also adds shrinkers_to_text(), which reports on the top 10 shrinkers - by object count - in sorted order, to be used in OOM reporting. Cc: Andrew Morton Cc: Qi Zheng Cc: Roman Gushchin Cc: linux-mm@kvack.org Signed-off-by: Kent Overstreet From david@fromorbit.com Tue Aug 27 23:32:26 2024 > > + if (!mutex_trylock(&shrinker_mutex)) { > > + seq_buf_puts(out, "(couldn't take shrinker lock)"); > > + return; > > + } > > Please don't use the shrinker_mutex like this. There can be tens of > thousands of entries in the shrinker list (because memcgs) and > holding the shrinker_mutex for long running traversals like this is > known to cause latency problems for memcg reaping. If we are at > ENOMEM, the last thing we want to be doing is preventing memcgs from > being reaped. > > > + list_for_each_entry(shrinker, &shrinker_list, list) { > > + struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; > > This iteration and counting setup is neither node or memcg aware. > For node aware shrinkers, this will only count the items freeable > on node 0, and ignore all the other memory in the system. For memcg > systems, it will also only scan the root memcg and so miss counting > any memory in memcg owned caches. > > IOWs, the shrinker iteration mechanism needs to iterate both by NUMA > node and by memcg. On large machines with multiple nodes and hosting > thousands of memcgs, a total shrinker state iteration is has to walk > a -lot- of structures. > > And example of this is drop_slab() - called from > /proc/sys/vm/drop_caches(). It does this to iterate all the > shrinkers for all the nodes and memcgs in the system: > > static unsigned long drop_slab_node(int nid) > { > unsigned long freed = 0; > struct mem_cgroup *memcg = NULL; > > memcg = mem_cgroup_iter(NULL, NULL, NULL); > do { > freed += shrink_slab(GFP_KERNEL, nid, memcg, 0); > } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); > > return freed; > } > > void drop_slab(void) > { > int nid; > int shift = 0; > unsigned long freed; > > do { > freed = 0; > for_each_online_node(nid) { > if (fatal_signal_pending(current)) > return; > > freed += drop_slab_node(nid); > } > } while ((freed >> shift++) > 1); > } > > Hence any iteration for finding the 10 largest shrinkable caches in > the system needs to do something similar. Only, it needs to iterate > memcgs first and then aggregate object counts across all nodes for > shrinkers that are NUMA aware. > > Because it needs direct access to the shrinkers, it will need to use > the RCU lock + refcount method of traversal because that's the only > safe way to go from memcg to shrinker instance. IOWs, it > needs to mirror the code in shrink_slab/shrink_slab_memcg to obtain > a safe reference to the relevant shrinker so it can call > ->count_objects() and store a refcounted pointer to the shrinker(s) > that will get printed out after the scan is done.... > > Once the shrinker iteration is sorted out, I'll look further at the > rest of the code in this patch... > > -Dave. --- include/linux/shrinker.h | 7 ++++- mm/shrinker.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 1a00be90d93a..6193612617a1 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -24,6 +24,8 @@ struct shrinker_info { struct shrinker_info_unit *unit[]; }; +struct seq_buf; + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. @@ -80,10 +82,12 @@ struct shrink_control { * @flags determine the shrinker abilities, like numa awareness */ struct shrinker { + const char *name; unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc); unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc); + void (*to_text)(struct seq_buf *, struct shrinker *); long batch; /* reclaim batch size, 0 = default */ int seeks; /* seeks to recreate an obj */ @@ -110,7 +114,6 @@ struct shrinker { #endif #ifdef CONFIG_SHRINKER_DEBUG int debugfs_id; - const char *name; struct dentry *debugfs_entry; #endif /* objs pending delete, per node */ @@ -135,6 +138,8 @@ __printf(2, 3) struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); +void shrinker_to_text(struct seq_buf *, struct shrinker *); +void shrinkers_to_text(struct seq_buf *); static inline bool shrinker_try_get(struct shrinker *shrinker) { diff --git a/mm/shrinker.c b/mm/shrinker.c index 4a93fd433689..82d2161d6b4b 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include +#include #include -#include #include #include "internal.h" @@ -809,3 +810,73 @@ void shrinker_free(struct shrinker *shrinker) call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); } EXPORT_SYMBOL_GPL(shrinker_free); + +void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker) +{ + struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; + + seq_buf_puts(out, shrinker->name); + seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc)); + + if (shrinker->to_text) { + shrinker->to_text(out, shrinker); + seq_buf_puts(out, "\n"); + } +} + +/** + * shrinkers_to_text - Report on shrinkers with highest usage + * + * This reports on the top 10 shrinkers, by object counts, in sorted order: + * intended to be used for OOM reporting. + */ +void shrinkers_to_text(struct seq_buf *out) +{ + struct shrinker *shrinker; + struct shrinker_by_mem { + struct shrinker *shrinker; + unsigned long mem; + } shrinkers_by_mem[4]; + int i, nr = 0; + + if (!mutex_trylock(&shrinker_mutex)) { + seq_buf_puts(out, "(couldn't take shrinker lock)"); + return; + } + + list_for_each_entry(shrinker, &shrinker_list, list) { + struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; + unsigned long mem = shrinker->count_objects(shrinker, &sc); + + if (!mem || mem == SHRINK_STOP || mem == SHRINK_EMPTY) + continue; + + for (i = 0; i < nr; i++) + if (mem < shrinkers_by_mem[i].mem) + break; + + if (nr < ARRAY_SIZE(shrinkers_by_mem)) { + memmove(&shrinkers_by_mem[i + 1], + &shrinkers_by_mem[i], + sizeof(shrinkers_by_mem[0]) * (nr - i)); + nr++; + } else if (i) { + i--; + memmove(&shrinkers_by_mem[0], + &shrinkers_by_mem[1], + sizeof(shrinkers_by_mem[0]) * i); + } else { + continue; + } + + shrinkers_by_mem[i] = (struct shrinker_by_mem) { + .shrinker = shrinker, + .mem = mem, + }; + } + + for (i = nr - 1; i >= 0; --i) + shrinker_to_text(out, shrinkers_by_mem[i].shrinker); + + mutex_unlock(&shrinker_mutex); +} -- cgit v1.2.3