summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-04-12 23:00:53 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-03-24 10:01:07 -0400
commit266e5b3f18b94306ea9085ffd6c99082423b30be (patch)
tree9235ae607157910e6f9cefeb9f62fbda62d61f3c
parent26f6d134f60cfeac9efc9f76bf90814e4e3922f8 (diff)
trace_sched_wakeup_backtrace
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--include/linux/sched.h31
-rw-r--r--include/trace/events/sched.h47
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/smpboot.c2
4 files changed, 93 insertions, 9 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9632e3318e0d..73a330748658 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,18 +223,32 @@ struct user_event_mm;
*
* Also see the comments of try_to_wake_up().
*/
-#define __set_current_state(state_value) \
+
+extern ktime_t ktime_get(void);
+
+#define set_task_sleep_time(task, state_value) \
+do { \
+ if (((state_value) & TASK_NORMAL) && !((task)->__state & TASK_NORMAL))\
+ task->sleep_timestamp = ktime_get(); \
+} while (0)
+
+#define __set_task_state(task, type, state_value) \
do { \
- debug_normal_state_change((state_value)); \
- WRITE_ONCE(current->__state, (state_value)); \
+ debug_##type##_state_change((state_value)); \
+ set_task_sleep_time(task, state_value); \
+ WRITE_ONCE(task->__state, (state_value)); \
} while (0)
-#define set_current_state(state_value) \
+#define set_task_state(task, type, state_value) \
do { \
- debug_normal_state_change((state_value)); \
- smp_store_mb(current->__state, (state_value)); \
+ debug_##type##_state_change((state_value)); \
+ set_task_sleep_time(task, state_value); \
+ smp_store_mb(task->__state, (state_value)); \
} while (0)
+#define __set_current_state(state_value) __set_task_state(current, normal, state_value)
+#define set_current_state(state_value) set_task_state(current, normal, state_value)
+
/*
* set_special_state() should be used for those states when the blocking task
* can not use the regular condition based wait-loop. In that case we must
@@ -246,8 +260,7 @@ struct user_event_mm;
unsigned long flags; /* may shadow */ \
\
raw_spin_lock_irqsave(&current->pi_lock, flags); \
- debug_special_state_change((state_value)); \
- WRITE_ONCE(current->__state, (state_value)); \
+ __set_task_state(current, special, (state_value)); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
@@ -801,6 +814,8 @@ struct task_struct {
/* saved state for "spinlock sleepers" */
unsigned int saved_state;
+ u64 sleep_timestamp;
+
/*
* This begins the randomizable portion of task_struct. Only
* scheduling-critical items should be added above here.
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9ea4c404bd4e..3e3150ae73b3 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -186,6 +186,53 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
+TRACE_EVENT(sched_wakeup_backtrace,
+ TP_PROTO(struct task_struct *p, unsigned sleep_ns, unsigned long *bt, unsigned bt_nr),
+
+ TP_ARGS(p, sleep_ns, bt, bt_nr),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( unsigned, sleep_us)
+ __field( unsigned, bt_nr )
+ __array( ulong, bt, 10 )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ __entry->pid = p->pid;
+ __entry->sleep_us = sleep_ns / 1000;
+ __entry->bt_nr = bt_nr;
+ memset(__entry->bt, 0, sizeof(__entry->bt));
+ memcpy(__entry->bt, bt, bt_nr * sizeof(bt[0]));
+ ),
+
+ TP_printk("comm=%s pid=%d sleep=%u\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n"
+ " %pB\n",
+ __entry->comm, __entry->pid,
+ __entry->sleep_us,
+ (void *) __entry->bt[0],
+ (void *) __entry->bt[1],
+ (void *) __entry->bt[2],
+ (void *) __entry->bt[3],
+ (void *) __entry->bt[4],
+ (void *) __entry->bt[5],
+ (void *) __entry->bt[6],
+ (void *) __entry->bt[7],
+ (void *) __entry->bt[8],
+ (void *) __entry->bt[9])
+);
+
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt,
unsigned int prev_state,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 165c90ba64ea..e3e8d71df105 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4052,6 +4052,22 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
return match > 0;
}
+static noinline void do_trace_sched_wakeup_backtrace(struct task_struct *task, u64 start_time)
+{
+ u64 duration = ktime_get_ns() - start_time;
+
+ if (duration < 100 * NSEC_PER_MSEC)
+ return;
+
+ if (task->__state & TASK_NOLOAD)
+ return;
+
+ unsigned long bt[10];
+ unsigned bt_nr = stack_trace_save_tsk(task, bt, ARRAY_SIZE(bt), 0);
+
+ trace_sched_wakeup_backtrace(task, duration, bt, bt_nr);
+}
+
/*
* Notes on Program-Order guarantees on SMP systems.
*
@@ -4204,6 +4220,12 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
goto out;
}
+ u64 sleep_start;
+ if (p->sleep_timestamp &&
+ trace_sched_wakeup_backtrace_enabled() &&
+ (sleep_start = xchg(&p->sleep_timestamp, 0)))
+ do_trace_sched_wakeup_backtrace(p, sleep_start);
+
/*
* If we are going to wake up a thread waiting for CONDITION we
* need to ensure that CONDITION=1 done by the caller can not be
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 1992b62e980b..6dfe5c39bd74 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -109,7 +109,7 @@ static int smpboot_thread_fn(void *data)
struct smp_hotplug_thread *ht = td->ht;
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_IDLE);
preempt_disable();
if (kthread_should_stop()) {
__set_current_state(TASK_RUNNING);