summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-04-12 23:00:53 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-02-07 18:18:59 -0500
commitd9b1e57f3b38d16fdd7177f3b9636dcbb4c749ed (patch)
tree75375dc391a092b6f65d4455bea521451e40314b
parentd88e92952e4e9a1f2a3c82712dec611043885ab1 (diff)
sched_wakeup_backtrace debugfstime_stats_sched
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--include/trace/events/sched.h47
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/core.c19
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/sched/timestats_bt.c199
-rw-r--r--lib/Makefile2
7 files changed, 207 insertions, 67 deletions
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 3e3150ae73b3..9ea4c404bd4e 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -186,53 +186,6 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
-TRACE_EVENT(sched_wakeup_backtrace,
- TP_PROTO(struct task_struct *p, unsigned sleep_ns, unsigned long *bt, unsigned bt_nr),
-
- TP_ARGS(p, sleep_ns, bt, bt_nr),
-
- TP_STRUCT__entry(
- __array( char, comm, TASK_COMM_LEN )
- __field( pid_t, pid )
- __field( unsigned, sleep_us)
- __field( unsigned, bt_nr )
- __array( ulong, bt, 10 )
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
- __entry->pid = p->pid;
- __entry->sleep_us = sleep_ns / 1000;
- __entry->bt_nr = bt_nr;
- memset(__entry->bt, 0, sizeof(__entry->bt));
- memcpy(__entry->bt, bt, bt_nr * sizeof(bt[0]));
- ),
-
- TP_printk("comm=%s pid=%d sleep=%u\n"
- " %pB\n"
- " %pB\n"
- " %pB\n"
- " %pB\n"
- " %pB\n"
- " %pB\n"
- " %pB\n"
- " %pB\n"
- " %pB\n"
- " %pB\n",
- __entry->comm, __entry->pid,
- __entry->sleep_us,
- (void *) __entry->bt[0],
- (void *) __entry->bt[1],
- (void *) __entry->bt[2],
- (void *) __entry->bt[3],
- (void *) __entry->bt[4],
- (void *) __entry->bt[5],
- (void *) __entry->bt[6],
- (void *) __entry->bt[7],
- (void *) __entry->bt[8],
- (void *) __entry->bt[9])
-);
-
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt,
unsigned int prev_state,
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 976092b7bd45..bfc5c88920ff 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -32,3 +32,4 @@ obj-y += core.o
obj-y += fair.o
obj-y += build_policy.o
obj-y += build_utility.o
+obj-y += timestats_bt.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e3e8d71df105..dae03d76cd2c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4052,22 +4052,6 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
return match > 0;
}
-static noinline void do_trace_sched_wakeup_backtrace(struct task_struct *task, u64 start_time)
-{
- u64 duration = ktime_get_ns() - start_time;
-
- if (duration < 100 * NSEC_PER_MSEC)
- return;
-
- if (task->__state & TASK_NOLOAD)
- return;
-
- unsigned long bt[10];
- unsigned bt_nr = stack_trace_save_tsk(task, bt, ARRAY_SIZE(bt), 0);
-
- trace_sched_wakeup_backtrace(task, duration, bt, bt_nr);
-}
-
/*
* Notes on Program-Order guarantees on SMP systems.
*
@@ -4222,9 +4206,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
u64 sleep_start;
if (p->sleep_timestamp &&
- trace_sched_wakeup_backtrace_enabled() &&
(sleep_start = xchg(&p->sleep_timestamp, 0)))
- do_trace_sched_wakeup_backtrace(p, sleep_start);
+ sched_wakeup_backtrace(p, sleep_start);
/*
* If we are going to wake up a thread waiting for CONDITION we
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index fd7e85220715..bd51d107ad6f 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -470,7 +470,7 @@ static const struct file_operations fair_server_period_fops = {
.release = single_release,
};
-static struct dentry *debugfs_sched;
+struct dentry *debugfs_sched;
static void debugfs_fair_server_init(void)
{
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 38e0e323dda2..5dbd52d3a99f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3984,4 +3984,8 @@ void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
#include "ext.h"
+extern struct dentry *debugfs_sched;
+
+void sched_wakeup_backtrace(struct task_struct *task, u64 start_time);
+
#endif /* _KERNEL_SCHED_SCHED_H */
diff --git a/kernel/sched/timestats_bt.c b/kernel/sched/timestats_bt.c
new file mode 100644
index 000000000000..5cd36cb84e0c
--- /dev/null
+++ b/kernel/sched/timestats_bt.c
@@ -0,0 +1,199 @@
+#define pr_fmt(fmt) "%s() " fmt "\n", __func__
+
+#include <linux/debugfs.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/init.h>
+#include <linux/mmu_context.h>
+#include <linux/rhashtable.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/spinlock.h>
+#include <linux/stacktrace.h>
+#include <linux/time_stats.h>
+
+#include "sched.h"
+
+struct sched_wakeup_bt {
+ unsigned long d[10];
+};
+
+struct sched_wakeup_stats {
+ struct rhash_head hash;
+ struct sched_wakeup_bt bt;
+ struct time_stats stats;
+};
+
+static const struct rhashtable_params sched_wakeup_bt_params = {
+ .head_offset = offsetof(struct sched_wakeup_stats, hash),
+ .key_offset = offsetof(struct sched_wakeup_stats, bt),
+ .key_len = sizeof(struct sched_wakeup_bt),
+};
+
+static struct rhashtable stats_table;
+static GENRADIX(struct sched_wakeup_stats *) stats_list;
+static unsigned stats_nr;
+static DEFINE_SPINLOCK(stats_lock);
+
+static struct sched_wakeup_stats *__sched_wakeup_stats_new(struct sched_wakeup_bt *bt)
+{
+ struct sched_wakeup_stats **slot =
+ genradix_ptr_alloc(&stats_list, stats_nr, GFP_ATOMIC);
+ if (!slot)
+ return ERR_PTR(-ENOMEM);
+
+ struct sched_wakeup_stats *s = kzalloc(sizeof(*s), GFP_ATOMIC);
+ if (!s)
+ return ERR_PTR(-ENOMEM);
+
+ s->bt = *bt;
+ int ret = rhashtable_lookup_insert_fast(&stats_table, &s->hash, sched_wakeup_bt_params);
+ if (unlikely(ret)) {
+ kfree(s);
+ return ret != -EEXIST ? ERR_PTR(ret) : NULL;
+ }
+
+ *slot = s;
+ stats_nr++;
+ return s;
+}
+
+static noinline struct sched_wakeup_stats *sched_wakeup_stats_new(struct sched_wakeup_bt *bt)
+{
+ spin_lock(&stats_lock);
+ struct sched_wakeup_stats *s = __sched_wakeup_stats_new(bt);
+ spin_unlock(&stats_lock);
+ return s;
+}
+
+void sched_wakeup_backtrace(struct task_struct *task, u64 start_time)
+{
+ if (!stats_table.tbl)
+ return;
+
+ if (task->__state & TASK_NOLOAD)
+ return;
+
+ u64 now = ktime_get_ns();
+ u64 duration = now - start_time;
+
+ if (duration < NSEC_PER_USEC)
+ return;
+
+ struct sched_wakeup_bt bt = {};
+ stack_trace_save_tsk(task, bt.d, ARRAY_SIZE(bt.d), 0);
+
+ struct sched_wakeup_stats *s;
+ while (!(s = rhashtable_lookup(&stats_table, &bt, sched_wakeup_bt_params) ?:
+ sched_wakeup_stats_new(&bt)))
+ ;
+
+ if (likely(!IS_ERR(s)))
+ __time_stats_update(&s->stats, start_time, now);
+}
+
+struct sched_wakeup_iter {
+ loff_t pos;
+ size_t nr;
+ struct sched_wakeup_stats *d[];
+};
+
+#define cmp_int(l, r) ((l > r) - (l < r))
+
+static int sched_wakeup_stats_cmp(const void *_l, const void *_r)
+{
+ const struct sched_wakeup_stats * const *l = _l;
+ const struct sched_wakeup_stats * const *r = _r;
+
+ return -cmp_int((*l)->stats.total_duration, (*r)->stats.total_duration);
+}
+
+static void *sched_wakeup_start(struct seq_file *m, loff_t *pos)
+{
+ unsigned nr = READ_ONCE(stats_nr);
+ struct sched_wakeup_iter *iter =
+ kzalloc(struct_size(iter, d, nr), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ iter->pos = *pos;
+ iter->nr = nr;
+
+ for (size_t i = 0; i < nr; i++)
+ iter->d[i] = *genradix_ptr(&stats_list, i);
+
+ sort(iter->d, nr, sizeof(iter->d[0]), sched_wakeup_stats_cmp, NULL);
+ return iter;
+}
+
+static void sched_wakeup_stop(struct seq_file *m, void *arg)
+{
+ kfree(arg);
+}
+
+static void *sched_wakeup_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ struct sched_wakeup_iter *iter = arg;
+
+ *pos = ++iter->pos;
+
+ if (iter->pos >= iter->nr)
+ return NULL;
+
+ return iter;
+}
+
+static int sched_wakeup_show(struct seq_file *m, void *arg)
+{
+ struct sched_wakeup_iter *iter = arg;
+ struct sched_wakeup_stats *s = iter->d[iter->pos];
+
+ if (!s)
+ return 0;
+
+ char *bufp;
+ size_t n = seq_get_buf(m, &bufp);
+
+ struct seq_buf buf;
+ seq_buf_init(&buf, bufp, n);
+
+ if (iter->pos)
+ seq_buf_puts(&buf, "\n");
+
+ for (unsigned i = 0; i < ARRAY_SIZE(s->bt.d) && s->bt.d[i]; i++)
+ seq_buf_printf(&buf, "%pS\n", (void *) s->bt.d[i]);
+
+ time_stats_to_seq_buf(&buf, &s->stats, "startup", 0);
+ seq_commit(m, seq_buf_used(&buf));
+ return 0;
+}
+
+static const struct seq_operations sched_wakeup_ops = {
+ .start = sched_wakeup_start,
+ .stop = sched_wakeup_stop,
+ .next = sched_wakeup_next,
+ .show = sched_wakeup_show,
+};
+
+static int sched_wakeups_open(struct inode *inode, struct file *filp)
+{
+ return seq_open(filp, &sched_wakeup_ops);
+}
+
+static const struct file_operations sched_wakeups_fops = {
+ .open = sched_wakeups_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int sched_wakeup_backtrace_init(void)
+{
+ int ret = rhashtable_init(&stats_table, &sched_wakeup_bt_params);
+ WARN_ON(ret);
+
+ WARN_ON(!stats_table.tbl);
+
+ debugfs_create_file("wakeups", 0444, debugfs_sched, NULL, &sched_wakeups_fops);
+ return 0;
+}
+late_initcall(sched_wakeup_backtrace_init);
diff --git a/lib/Makefile b/lib/Makefile
index 9027645c88f6..16fa41a33e7b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -354,7 +354,7 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o
obj-$(CONFIG_PARMAN) += parman.o
-obj-$(CONFIG_TIME_STATS) += time_stats.o
+obj-y += time_stats.o
obj-y += group_cpus.o