diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2024-04-12 23:00:53 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2025-03-24 10:01:07 -0400 |
commit | 266e5b3f18b94306ea9085ffd6c99082423b30be (patch) | |
tree | 9235ae607157910e6f9cefeb9f62fbda62d61f3c | |
parent | 26f6d134f60cfeac9efc9f76bf90814e4e3922f8 (diff) |
trace_sched_wakeup_backtrace
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | include/linux/sched.h | 31 | ||||
-rw-r--r-- | include/trace/events/sched.h | 47 | ||||
-rw-r--r-- | kernel/sched/core.c | 22 | ||||
-rw-r--r-- | kernel/smpboot.c | 2 |
4 files changed, 93 insertions, 9 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 9632e3318e0d..73a330748658 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -223,18 +223,32 @@ struct user_event_mm; * * Also see the comments of try_to_wake_up(). */ -#define __set_current_state(state_value) \ + +extern ktime_t ktime_get(void); + +#define set_task_sleep_time(task, state_value) \ +do { \ + if (((state_value) & TASK_NORMAL) && !((task)->__state & TASK_NORMAL))\ + task->sleep_timestamp = ktime_get(); \ +} while (0) + +#define __set_task_state(task, type, state_value) \ do { \ - debug_normal_state_change((state_value)); \ - WRITE_ONCE(current->__state, (state_value)); \ + debug_##type##_state_change((state_value)); \ + set_task_sleep_time(task, state_value); \ + WRITE_ONCE(task->__state, (state_value)); \ } while (0) -#define set_current_state(state_value) \ +#define set_task_state(task, type, state_value) \ do { \ - debug_normal_state_change((state_value)); \ - smp_store_mb(current->__state, (state_value)); \ + debug_##type##_state_change((state_value)); \ + set_task_sleep_time(task, state_value); \ + smp_store_mb(task->__state, (state_value)); \ } while (0) +#define __set_current_state(state_value) __set_task_state(current, normal, state_value) +#define set_current_state(state_value) set_task_state(current, normal, state_value) + /* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must @@ -246,8 +260,7 @@ struct user_event_mm; unsigned long flags; /* may shadow */ \ \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ - debug_special_state_change((state_value)); \ - WRITE_ONCE(current->__state, (state_value)); \ + __set_task_state(current, special, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) @@ -801,6 +814,8 @@ struct task_struct { /* saved state for "spinlock sleepers" */ unsigned int saved_state; + u64 sleep_timestamp; + /* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9ea4c404bd4e..3e3150ae73b3 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -186,6 +186,53 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, TP_PROTO(struct task_struct *p), TP_ARGS(p)); +TRACE_EVENT(sched_wakeup_backtrace, + TP_PROTO(struct task_struct *p, unsigned sleep_ns, unsigned long *bt, unsigned bt_nr), + + TP_ARGS(p, sleep_ns, bt, bt_nr), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned, sleep_us) + __field( unsigned, bt_nr ) + __array( ulong, bt, 10 ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sleep_us = sleep_ns / 1000; + __entry->bt_nr = bt_nr; + memset(__entry->bt, 0, sizeof(__entry->bt)); + memcpy(__entry->bt, bt, bt_nr * sizeof(bt[0])); + ), + + TP_printk("comm=%s pid=%d sleep=%u\n" + " %pB\n" + " %pB\n" + " %pB\n" + " %pB\n" + " %pB\n" + " %pB\n" + " %pB\n" + " %pB\n" + " %pB\n" + " %pB\n", + __entry->comm, __entry->pid, + __entry->sleep_us, + (void *) __entry->bt[0], + (void *) __entry->bt[1], + (void *) __entry->bt[2], + (void *) __entry->bt[3], + (void *) __entry->bt[4], + (void *) __entry->bt[5], + (void *) __entry->bt[6], + (void *) __entry->bt[7], + (void *) __entry->bt[8], + (void *) __entry->bt[9]) +); + #ifdef CREATE_TRACE_POINTS static inline long __trace_sched_switch_state(bool preempt, unsigned int prev_state, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 165c90ba64ea..e3e8d71df105 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4052,6 +4052,22 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) return match > 0; } +static noinline void do_trace_sched_wakeup_backtrace(struct task_struct *task, u64 start_time) +{ + u64 duration = ktime_get_ns() - start_time; + + if (duration < 100 * NSEC_PER_MSEC) + return; + + if (task->__state & TASK_NOLOAD) + return; + + unsigned long bt[10]; + unsigned bt_nr = stack_trace_save_tsk(task, bt, ARRAY_SIZE(bt), 0); + + trace_sched_wakeup_backtrace(task, duration, bt, bt_nr); +} + /* * Notes on Program-Order guarantees on SMP systems. * @@ -4204,6 +4220,12 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) goto out; } + u64 sleep_start; + if (p->sleep_timestamp && + trace_sched_wakeup_backtrace_enabled() && + (sleep_start = xchg(&p->sleep_timestamp, 0))) + do_trace_sched_wakeup_backtrace(p, sleep_start); + /* * If we are going to wake up a thread waiting for CONDITION we * need to ensure that CONDITION=1 done by the caller can not be diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 1992b62e980b..6dfe5c39bd74 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -109,7 +109,7 @@ static int smpboot_thread_fn(void *data) struct smp_hotplug_thread *ht = td->ht; while (1) { - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_IDLE); preempt_disable(); if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); |