summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWaiman Long <longman@redhat.com>2025-04-24 23:16:55 -0400
committerTejun Heo <tj@kernel.org>2025-04-25 09:45:16 -1000
commit8f52633cf5ebe1fb9c4814b6590f824cf906b346 (patch)
tree56e2b80a33c854b5e85cf4617fe151f81170933c
parentf304da9134f8c245d198502ad681ffae63b5e29c (diff)
cgroup/rstat: Improve cgroup_rstat_push_children() documentation
The cgroup_rstat_push_children() function converts a set of updated_children lists from different cgroups into a single ordered list of cgroups to be flushed via the rstat_flush_next pointer. The algorithm used isn't that well illustrated and it takes time to grasp what it is doing. Improve the embedded documentation and variable names to better illustrate the transformation process and make the code easier to understand. Also cgroup_rstat_lock must be held for the whole duration from where the rstat_flush_next list is being constructed in cgroup_rstat_push_children() to when it is consumed later in css_rstat_flush(). Otherwise, list corruption can happen leading to system crash as reported in [1]. In this particular case, the branch being used has commit 093c8812de2d ("cgroup: rstat: Cleanup flushing functions and locking") which breaks this rule, but is missing the fix commit 7d6c63c31914 ("cgroup: rstat: call cgroup_rstat_updated_list with cgroup_rstat_lock") that fixes it. This patch has no functional change. [1] https://lore.kernel.org/lkml/BY5PR04MB68495E9E8A46CA9614D62669BCBB2@BY5PR04MB6849.namprd04.prod.outlook.com/ Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--kernel/cgroup/rstat.c42
1 files changed, 33 insertions, 9 deletions
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 4d5fd8d12bdd..357c538d14da 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -144,30 +144,54 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
* @head: current head of the list (= subtree root)
* @child: first child of the root
* @cpu: target cpu
- * Return: A new singly linked list of cgroups to be flush
+ * Return: A new singly linked list of cgroups to be flushed
*
* Iteratively traverse down the cgroup_rstat_cpu updated tree level by
* level and push all the parents first before their next level children
- * into a singly linked list built from the tail backward like "pushing"
- * cgroups into a stack. The root is pushed by the caller.
+ * into a singly linked list via the rstat_flush_next pointer built from the
+ * tail backward like "pushing" cgroups into a stack. The root is pushed by
+ * the caller.
*/
static struct cgroup *cgroup_rstat_push_children(struct cgroup *head,
struct cgroup *child, int cpu)
{
- struct cgroup *chead = child; /* Head of child cgroup level */
+ struct cgroup *cnext = child; /* Next head of child cgroup level */
struct cgroup *ghead = NULL; /* Head of grandchild cgroup level */
struct cgroup *parent, *grandchild;
struct cgroup_rstat_cpu *crstatc;
child->rstat_flush_next = NULL;
+ /*
+ * The cgroup_rstat_lock must be held for the whole duration from
+ * here as the rstat_flush_next list is being constructed to when
+ * it is consumed later in css_rstat_flush().
+ */
+ lockdep_assert_held(&cgroup_rstat_lock);
+
+ /*
+ * Notation: -> updated_next pointer
+ * => rstat_flush_next pointer
+ *
+ * Assuming the following sample updated_children lists:
+ * P: C1 -> C2 -> P
+ * C1: G11 -> G12 -> C1
+ * C2: G21 -> G22 -> C2
+ *
+ * After 1st iteration:
+ * head => C2 => C1 => NULL
+ * ghead => G21 => G11 => NULL
+ *
+ * After 2nd iteration:
+ * head => G12 => G11 => G22 => G21 => C2 => C1 => NULL
+ */
next_level:
- while (chead) {
- child = chead;
- chead = child->rstat_flush_next;
+ while (cnext) {
+ child = cnext;
+ cnext = child->rstat_flush_next;
parent = cgroup_parent(child);
- /* updated_next is parent cgroup terminated */
+ /* updated_next is parent cgroup terminated if !NULL */
while (child != parent) {
child->rstat_flush_next = head;
head = child;
@@ -185,7 +209,7 @@ next_level:
}
if (ghead) {
- chead = ghead;
+ cnext = ghead;
ghead = NULL;
goto next_level;
}