From 9b2e4f1880b789be1f24f9684f7a54b90310b5c0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Sep 2011 12:10:22 -0700 Subject: rcu: Track idleness independent of idle tasks Earlier versions of RCU used the scheduling-clock tick to detect idleness by checking for the idle task, but handled idleness differently for CONFIG_NO_HZ=y. But there are now a number of uses of RCU read-side critical sections in the idle task, for example, for tracing. A more fine-grained detection of idleness is therefore required. This commit presses the old dyntick-idle code into full-time service, so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is always invoked at the beginning of an idle loop iteration. Similarly, rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked at the end of an idle-loop iteration. This allows the idle task to use RCU everywhere except between consecutive rcu_idle_enter() and rcu_idle_exit() calls, in turn allowing architecture maintainers to specify exactly where in the idle loop that RCU may be used. Because some of the userspace upcall uses can result in what looks to RCU like half of an interrupt, it is not possible to expect that the irq_enter() and irq_exit() hooks will give exact counts. This patch therefore expands the ->dynticks_nesting counter to 64 bits and uses two separate bitfields to count process/idle transitions and interrupt entry/exit transitions. It is presumed that userspace upcalls do not happen in the idle loop or from usermode execution (though usermode might do a system call that results in an upcall). The counter is hard-reset on each process/idle transition, which avoids the interrupt entry/exit error from accumulating. Overflow is avoided by the 64-bitness of the ->dyntick_nesting counter. This commit also adds warnings if a non-idle task asks RCU to enter idle state (and these checks will need some adjustment before applying Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246). In addition, validation of ->dynticks and ->dynticks_nesting is added. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutiny.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 107 insertions(+), 17 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 636af6d9c6e5..3ab77bdc90c4 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -53,31 +53,122 @@ static void __call_rcu(struct rcu_head *head, #include "rcutiny_plugin.h" -#ifdef CONFIG_NO_HZ +static long long rcu_dynticks_nesting = LLONG_MAX / 2; -static long rcu_dynticks_nesting = 1; +/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ +static void rcu_idle_enter_common(void) +{ + if (rcu_dynticks_nesting) { + RCU_TRACE(trace_rcu_dyntick("--=", rcu_dynticks_nesting)); + return; + } + RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting)); + if (!idle_cpu(smp_processor_id())) { + WARN_ON_ONCE(1); /* must be idle task! */ + RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", + rcu_dynticks_nesting)); + ftrace_dump(DUMP_ALL); + } + rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ +} /* - * Enter dynticks-idle mode, which is an extended quiescent state - * if we have fully entered that mode (i.e., if the new value of - * dynticks_nesting is zero). + * Enter idle, which is an extended quiescent state if we have fully + * entered that mode (i.e., if the new value of dynticks_nesting is zero). */ -void rcu_enter_nohz(void) +void rcu_idle_enter(void) { - if (--rcu_dynticks_nesting == 0) - rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ + unsigned long flags; + + local_irq_save(flags); + rcu_dynticks_nesting = 0; + rcu_idle_enter_common(); + local_irq_restore(flags); } /* - * Exit dynticks-idle mode, so that we are no longer in an extended - * quiescent state. + * Exit an interrupt handler towards idle. + */ +void rcu_irq_exit(void) +{ + unsigned long flags; + + local_irq_save(flags); + rcu_dynticks_nesting--; + WARN_ON_ONCE(rcu_dynticks_nesting < 0); + rcu_idle_enter_common(); + local_irq_restore(flags); +} + +/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ +static void rcu_idle_exit_common(long long oldval) +{ + if (oldval) { + RCU_TRACE(trace_rcu_dyntick("++=", rcu_dynticks_nesting)); + return; + } + RCU_TRACE(trace_rcu_dyntick("End", oldval)); + if (!idle_cpu(smp_processor_id())) { + WARN_ON_ONCE(1); /* must be idle task! */ + RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", + oldval)); + ftrace_dump(DUMP_ALL); + } +} + +/* + * Exit idle, so that we are no longer in an extended quiescent state. */ -void rcu_exit_nohz(void) +void rcu_idle_exit(void) { + unsigned long flags; + long long oldval; + + local_irq_save(flags); + oldval = rcu_dynticks_nesting; + WARN_ON_ONCE(oldval != 0); + rcu_dynticks_nesting = LLONG_MAX / 2; + rcu_idle_exit_common(oldval); + local_irq_restore(flags); +} + +/* + * Enter an interrupt handler, moving away from idle. + */ +void rcu_irq_enter(void) +{ + unsigned long flags; + long long oldval; + + local_irq_save(flags); + oldval = rcu_dynticks_nesting; rcu_dynticks_nesting++; + WARN_ON_ONCE(rcu_dynticks_nesting == 0); + rcu_idle_exit_common(oldval); + local_irq_restore(flags); +} + +#ifdef CONFIG_PROVE_RCU + +/* + * Test whether RCU thinks that the current CPU is idle. + */ +int rcu_is_cpu_idle(void) +{ + return !rcu_dynticks_nesting; } -#endif /* #ifdef CONFIG_NO_HZ */ +#endif /* #ifdef CONFIG_PROVE_RCU */ + +/* + * Test whether the current CPU was interrupted from idle. Nested + * interrupts don't count, we must be running at the first interrupt + * level. + */ +int rcu_is_cpu_rrupt_from_idle(void) +{ + return rcu_dynticks_nesting <= 0; +} /* * Helper function for rcu_sched_qs() and rcu_bh_qs(). @@ -126,14 +217,13 @@ void rcu_bh_qs(int cpu) /* * Check to see if the scheduling-clock interrupt came from an extended - * quiescent state, and, if so, tell RCU about it. + * quiescent state, and, if so, tell RCU about it. This function must + * be called from hardirq context. It is normally called from the + * scheduling-clock interrupt. */ void rcu_check_callbacks(int cpu, int user) { - if (user || - (idle_cpu(cpu) && - !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) + if (user || rcu_is_cpu_rrupt_from_idle()) rcu_sched_qs(cpu); else if (!in_softirq()) rcu_bh_qs(cpu); -- cgit v1.2.3 From e6b80a3b0994ea6c3d876d72464f2debbfcfeb05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 16:25:18 -0700 Subject: rcu: Detect illegal rcu dereference in extended quiescent state Report that none of the rcu read lock maps are held while in an RCU extended quiescent state (the section between rcu_idle_enter() and rcu_idle_exit()). This helps detect any use of rcu_dereference() and friends from within the section in idle where RCU is not allowed. This way we can guarantee an extended quiescent window where the CPU can be put in dyntick idle mode or can simply aoid to be part of any global grace period completion while in the idle loop. Uses of RCU from such mode are totally ignored by RCU, hence the importance of these checks. Signed-off-by: Frederic Weisbecker Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Lai Jiangshan Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 26 ++++++++++++++++++++++++++ kernel/rcupdate.c | 2 ++ kernel/rcutiny.c | 1 + kernel/rcutree.c | 1 + 4 files changed, 30 insertions(+) (limited to 'kernel/rcutiny.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8d315b013e37..bf91fcfe181c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -228,6 +228,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) #ifdef CONFIG_DEBUG_LOCK_ALLOC +#ifdef CONFIG_PROVE_RCU +extern int rcu_is_cpu_idle(void); +#else /* !CONFIG_PROVE_RCU */ +static inline int rcu_is_cpu_idle(void) +{ + return 0; +} +#endif /* else !CONFIG_PROVE_RCU */ + extern struct lockdep_map rcu_lock_map; # define rcu_read_acquire() \ lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) @@ -262,6 +271,8 @@ static inline int rcu_read_lock_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; + if (rcu_is_cpu_idle()) + return 0; return lock_is_held(&rcu_lock_map); } @@ -285,6 +296,19 @@ extern int rcu_read_lock_bh_held(void); * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. + * + * Note that if the CPU is in the idle loop from an RCU point of + * view (ie: that we are in the section between rcu_idle_enter() and + * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU + * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs + * that are in such a section, considering these as in extended quiescent + * state, so such a CPU is effectively never in an RCU read-side critical + * section regardless of what RCU primitives it invokes. This state of + * affairs is required --- we need to keep an RCU-free window in idle + * where the CPU may possibly enter into low power mode. This way we can + * notice an extended quiescent state to other CPUs that started a grace + * period. Otherwise we would delay any grace period as long as we run in + * the idle task. */ #ifdef CONFIG_PREEMPT_COUNT static inline int rcu_read_lock_sched_held(void) @@ -293,6 +317,8 @@ static inline int rcu_read_lock_sched_held(void) if (!debug_lockdep_rcu_enabled()) return 1; + if (rcu_is_cpu_idle()) + return 0; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 92e771d7b44b..2bc4e135ff23 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -93,6 +93,8 @@ int rcu_read_lock_bh_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; + if (rcu_is_cpu_idle()) + return 0; return in_softirq() || irqs_disabled(); } EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 3ab77bdc90c4..b4e0b4981768 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -157,6 +157,7 @@ int rcu_is_cpu_idle(void) { return !rcu_dynticks_nesting; } +EXPORT_SYMBOL(rcu_is_cpu_idle); #endif /* #ifdef CONFIG_PROVE_RCU */ diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 8afb2e89745b..489b62a67d35 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -567,6 +567,7 @@ int rcu_is_cpu_idle(void) preempt_enable(); return ret; } +EXPORT_SYMBOL(rcu_is_cpu_idle); #endif /* #ifdef CONFIG_PROVE_RCU */ -- cgit v1.2.3 From 4145fa7fbee3ec1e61c52825b146192885d9759f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Oct 2011 15:01:54 -0700 Subject: rcu: Deconfuse dynticks entry-exit tracing The trace_rcu_dyntick() trace event did not print both the old and the new value of the nesting level, and furthermore printed only the low-order 32 bits of it. This could result in some confusion when interpreting trace-event dumps, so this commit prints both the old and the new value, prints the full 64 bits, and also selects the process-entry/exit increment to print nicely in hexadecimal. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 15 +++++++++------ kernel/rcu.h | 7 +++++++ kernel/rcutiny.c | 28 +++++++++++++++++----------- kernel/rcutree.c | 35 ++++++++++++++++++++--------------- 4 files changed, 53 insertions(+), 32 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 172620a92b1a..c29fb2f55909 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -246,21 +246,24 @@ TRACE_EVENT(rcu_fqs, */ TRACE_EVENT(rcu_dyntick, - TP_PROTO(char *polarity, int nesting), + TP_PROTO(char *polarity, long long oldnesting, long long newnesting), - TP_ARGS(polarity, nesting), + TP_ARGS(polarity, oldnesting, newnesting), TP_STRUCT__entry( __field(char *, polarity) - __field(int, nesting) + __field(long long, oldnesting) + __field(long long, newnesting) ), TP_fast_assign( __entry->polarity = polarity; - __entry->nesting = nesting; + __entry->oldnesting = oldnesting; + __entry->newnesting = newnesting; ), - TP_printk("%s %d", __entry->polarity, __entry->nesting) + TP_printk("%s %llx %llx", __entry->polarity, + __entry->oldnesting, __entry->newnesting) ); /* @@ -470,7 +473,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity, nesting) do { } while (0) +#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) diff --git a/kernel/rcu.h b/kernel/rcu.h index f600868d550d..aa88baab5f78 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -29,6 +29,13 @@ #define RCU_TRACE(stmt) #endif /* #else #ifdef CONFIG_RCU_TRACE */ +/* + * Process-level increment to ->dynticks_nesting field. This allows for + * architectures that use half-interrupts and half-exceptions from + * process context. + */ +#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1) + /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index b4e0b4981768..9b9bdf666fb5 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -53,20 +53,21 @@ static void __call_rcu(struct rcu_head *head, #include "rcutiny_plugin.h" -static long long rcu_dynticks_nesting = LLONG_MAX / 2; +static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING; /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ -static void rcu_idle_enter_common(void) +static void rcu_idle_enter_common(long long oldval) { if (rcu_dynticks_nesting) { - RCU_TRACE(trace_rcu_dyntick("--=", rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("--=", + oldval, rcu_dynticks_nesting)); return; } - RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", - rcu_dynticks_nesting)); + oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); } rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ @@ -79,10 +80,12 @@ static void rcu_idle_enter_common(void) void rcu_idle_enter(void) { unsigned long flags; + long long oldval; local_irq_save(flags); + oldval = rcu_dynticks_nesting; rcu_dynticks_nesting = 0; - rcu_idle_enter_common(); + rcu_idle_enter_common(oldval); local_irq_restore(flags); } @@ -92,11 +95,13 @@ void rcu_idle_enter(void) void rcu_irq_exit(void) { unsigned long flags; + long long oldval; local_irq_save(flags); + oldval = rcu_dynticks_nesting; rcu_dynticks_nesting--; WARN_ON_ONCE(rcu_dynticks_nesting < 0); - rcu_idle_enter_common(); + rcu_idle_enter_common(oldval); local_irq_restore(flags); } @@ -104,14 +109,15 @@ void rcu_irq_exit(void) static void rcu_idle_exit_common(long long oldval) { if (oldval) { - RCU_TRACE(trace_rcu_dyntick("++=", rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("++=", + oldval, rcu_dynticks_nesting)); return; } - RCU_TRACE(trace_rcu_dyntick("End", oldval)); + RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", - oldval)); + oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); } } @@ -127,7 +133,7 @@ void rcu_idle_exit(void) local_irq_save(flags); oldval = rcu_dynticks_nesting; WARN_ON_ONCE(oldval != 0); - rcu_dynticks_nesting = LLONG_MAX / 2; + rcu_dynticks_nesting = DYNTICK_TASK_NESTING; rcu_idle_exit_common(oldval); local_irq_restore(flags); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 489b62a67d35..06e40dd53b23 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -196,7 +196,7 @@ void rcu_note_context_switch(int cpu) EXPORT_SYMBOL_GPL(rcu_note_context_switch); DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = LLONG_MAX / 2, + .dynticks_nesting = DYNTICK_TASK_NESTING, .dynticks = ATOMIC_INIT(1), }; @@ -348,17 +348,17 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) * we really have entered idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_idle_enter_common(struct rcu_dynticks *rdtp) +static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) { if (rdtp->dynticks_nesting) { - trace_rcu_dyntick("--=", rdtp->dynticks_nesting); + trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); return; } - trace_rcu_dyntick("Start", rdtp->dynticks_nesting); + trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ trace_rcu_dyntick("Error on entry: not idle task", - rdtp->dynticks_nesting); + oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ALL); } /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ @@ -383,12 +383,14 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp) void rcu_idle_enter(void) { unsigned long flags; + long long oldval; struct rcu_dynticks *rdtp; local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); + oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting = 0; - rcu_idle_enter_common(rdtp); + rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } @@ -411,13 +413,15 @@ void rcu_idle_enter(void) void rcu_irq_exit(void) { unsigned long flags; + long long oldval; struct rcu_dynticks *rdtp; local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); + oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; WARN_ON_ONCE(rdtp->dynticks_nesting < 0); - rcu_idle_enter_common(rdtp); + rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } @@ -431,7 +435,7 @@ void rcu_irq_exit(void) static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) { if (oldval) { - trace_rcu_dyntick("++=", rdtp->dynticks_nesting); + trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); return; } smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ @@ -439,10 +443,11 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); - trace_rcu_dyntick("End", oldval); + trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ - trace_rcu_dyntick("Error on exit: not idle task", oldval); + trace_rcu_dyntick("Error on exit: not idle task", + oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ALL); } } @@ -453,8 +458,8 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to LLONG_MAX/2 to allow for - * the possibility of usermode upcalls messing up our count + * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to + * allow for the possibility of usermode upcalls messing up our count * of interrupt nesting level during the busy period that is just * now starting. */ @@ -468,7 +473,7 @@ void rcu_idle_exit(void) rdtp = &__get_cpu_var(rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(oldval != 0); - rdtp->dynticks_nesting = LLONG_MAX / 2; + rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; rcu_idle_exit_common(rdtp, oldval); local_irq_restore(flags); } @@ -2012,7 +2017,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->nxttail[i] = &rdp->nxtlist; rdp->qlen = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); rdp->cpu = cpu; rdp->rsp = rsp; @@ -2040,7 +2045,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); WARN_ON_ONCE((atomic_read(&rdp->dynticks->dynticks) & 0x1) != 1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ -- cgit v1.2.3 From 0989cb46783188ea7346ba6490be0046b9b7a725 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Nov 2011 08:57:21 -0700 Subject: rcu: Add more information to the wrong-idle-task complaint The current code just complains if the current task is not the idle task. This commit therefore adds printing of the identity of the idle task. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutiny.c | 12 ++++++++++-- kernel/rcutree.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 9b9bdf666fb5..6d70ff71a875 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -65,10 +65,14 @@ static void rcu_idle_enter_common(long long oldval) } RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); if (!idle_cpu(smp_processor_id())) { - WARN_ON_ONCE(1); /* must be idle task! */ + struct task_struct *idle = idle_task(smp_processor_id()); + RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); + WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", + current->pid, current->comm, + idle->pid, idle->comm); /* must be idle task! */ } rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ } @@ -115,10 +119,14 @@ static void rcu_idle_exit_common(long long oldval) } RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); if (!idle_cpu(smp_processor_id())) { - WARN_ON_ONCE(1); /* must be idle task! */ + struct task_struct *idle = idle_task(smp_processor_id()); + RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); + WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", + current->pid, current->comm, + idle->pid, idle->comm); /* must be idle task! */ } } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 06e40dd53b23..9888a0ad2d4e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -356,10 +356,14 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) } trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); if (!idle_cpu(smp_processor_id())) { - WARN_ON_ONCE(1); /* must be idle task! */ + struct task_struct *idle = idle_task(smp_processor_id()); + trace_rcu_dyntick("Error on entry: not idle task", oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ALL); + WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", + current->pid, current->comm, + idle->pid, idle->comm); /* must be idle task! */ } /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ smp_mb__before_atomic_inc(); /* See above. */ @@ -445,10 +449,14 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); if (!idle_cpu(smp_processor_id())) { - WARN_ON_ONCE(1); /* must be idle task! */ + struct task_struct *idle = idle_task(smp_processor_id()); + trace_rcu_dyntick("Error on exit: not idle task", oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ALL); + WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", + current->pid, current->comm, + idle->pid, idle->comm); /* must be idle task! */ } } -- cgit v1.2.3 From 11dbaa8cb79a6e4a234a134898436f717a663f01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Nov 2011 07:38:25 -0700 Subject: rcu: Fix idle-task checks RCU has traditionally relied on idle_cpu() to determine whether a given CPU is running in the context of an idle task, but commit 908a3283 (Fix idle_cpu()) has invalidated this approach. After commit 908a3283, idle_cpu() will return true if the current CPU is currently running the idle task, and will be doing so for the foreseeable future. RCU instead needs to know whether or not the current CPU is currently running the idle task, regardless of what the near future might bring. This commit therefore switches from idle_cpu() to "current->pid != 0". Reported-by: Wu Fengguang Suggested-by: Carsten Emde Signed-off-by: Paul E. McKenney Acked-by: Steven Rostedt Tested-by: Wu Fengguang Signed-off-by: Paul E. McKenney --- kernel/rcutiny.c | 4 ++-- kernel/rcutree.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 6d70ff71a875..4e16ce36fa03 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -64,7 +64,7 @@ static void rcu_idle_enter_common(long long oldval) return; } RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); - if (!idle_cpu(smp_processor_id())) { + if (current->pid != 0) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", @@ -118,7 +118,7 @@ static void rcu_idle_exit_common(long long oldval) return; } RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); - if (!idle_cpu(smp_processor_id())) { + if (current->pid != 0) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b1711c48a7ec..49e0783fb200 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -355,7 +355,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) return; } trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); - if (!idle_cpu(smp_processor_id())) { + if (current->pid != 0) { struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on entry: not idle task", @@ -449,7 +449,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); - if (!idle_cpu(smp_processor_id())) { + if (current->pid != 0) { struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on exit: not idle task", -- cgit v1.2.3 From 99745b6a83414006f5c1e83efaebb423b41b67ef Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Nov 2011 15:48:45 -0800 Subject: rcu: Make RCU use the new is_idle_task() API Change from direct comparison of ->pid with zero to is_idle_task(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutiny.c | 4 ++-- kernel/rcutree.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 4e16ce36fa03..e5bd94954fa3 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -64,7 +64,7 @@ static void rcu_idle_enter_common(long long oldval) return; } RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); - if (current->pid != 0) { + if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", @@ -118,7 +118,7 @@ static void rcu_idle_exit_common(long long oldval) return; } RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); - if (current->pid != 0) { + if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 49e0783fb200..7fb8b0e60811 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -355,7 +355,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) return; } trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); - if (current->pid != 0) { + if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on entry: not idle task", @@ -449,7 +449,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); - if (current->pid != 0) { + if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on exit: not idle task", -- cgit v1.2.3 From 4968c300e1fa5389fdf1f1ebd8b8e4aec9aa4a9e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Dec 2011 16:32:40 -0800 Subject: rcu: Augment rcu_batch_end tracing for idle and callback state The current rcu_batch_end event trace records only the name of the RCU flavor and the total number of callbacks that remain queued on the current CPU. This is insufficient for testing and tuning the new dyntick-idle RCU_FAST_NO_HZ code, so this commit adds idle state along with whether or not any of the callbacks that were ready to invoke at the beginning of rcu_do_batch() are still queued. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 38 +++++++++++++++++++++++++++++--------- kernel/rcutiny.c | 10 ++++++++-- kernel/rcutiny_plugin.h | 25 +++++++++++++++++++++++++ kernel/rcutree.c | 8 ++++++-- 4 files changed, 68 insertions(+), 13 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index c75418c3ccb8..d2d88bed891b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -461,27 +461,46 @@ TRACE_EVENT(rcu_invoke_kfree_callback, /* * Tracepoint for exiting rcu_do_batch after RCU callbacks have been - * invoked. The first argument is the name of the RCU flavor and - * the second argument is number of callbacks actually invoked. + * invoked. The first argument is the name of the RCU flavor, + * the second argument is number of callbacks actually invoked, + * the third argument (cb) is whether or not any of the callbacks that + * were ready to invoke at the beginning of this batch are still + * queued, the fourth argument (nr) is the return value of need_resched(), + * the fifth argument (iit) is 1 if the current task is the idle task, + * and the sixth argument (risk) is the return value from + * rcu_is_callbacks_kthread(). */ TRACE_EVENT(rcu_batch_end, - TP_PROTO(char *rcuname, int callbacks_invoked), + TP_PROTO(char *rcuname, int callbacks_invoked, + bool cb, bool nr, bool iit, bool risk), - TP_ARGS(rcuname, callbacks_invoked), + TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk), TP_STRUCT__entry( __field(char *, rcuname) __field(int, callbacks_invoked) + __field(bool, cb) + __field(bool, nr) + __field(bool, iit) + __field(bool, risk) ), TP_fast_assign( __entry->rcuname = rcuname; __entry->callbacks_invoked = callbacks_invoked; - ), - - TP_printk("%s CBs-invoked=%d", - __entry->rcuname, __entry->callbacks_invoked) + __entry->cb = cb; + __entry->nr = nr; + __entry->iit = iit; + __entry->risk = risk; + ), + + TP_printk("%s CBs-invoked=%d idle=%c%c%c%c", + __entry->rcuname, __entry->callbacks_invoked, + __entry->cb ? 'C' : '.', + __entry->nr ? 'S' : '.', + __entry->iit ? 'I' : '.', + __entry->risk ? 'R' : '.') ); /* @@ -524,7 +543,8 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) -#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) +#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ + do { } while (0) #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index e5bd94954fa3..977296dca0a4 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -259,7 +259,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) { RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, + ACCESS_ONCE(rcp->rcucblist), + need_resched(), + is_idle_task(current), + rcu_is_callbacks_kthread())); return; } @@ -288,7 +292,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), + is_idle_task(current), + rcu_is_callbacks_kthread())); } static void rcu_process_callbacks(struct softirq_action *unused) diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 2b0484a5dc28..dfa97cbb3910 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -885,6 +885,19 @@ static void invoke_rcu_callbacks(void) wake_up(&rcu_kthread_wq); } +#ifdef CONFIG_RCU_TRACE + +/* + * Is the current CPU running the RCU-callbacks kthread? + * Caller must have preemption disabled. + */ +static bool rcu_is_callbacks_kthread(void) +{ + return rcu_kthread_task == current; +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + /* * This kthread invokes RCU callbacks whose grace periods have * elapsed. It is awakened as needed, and takes the place of the @@ -938,6 +951,18 @@ void invoke_rcu_callbacks(void) raise_softirq(RCU_SOFTIRQ); } +#ifdef CONFIG_RCU_TRACE + +/* + * There is no callback kthread, so this thread is never it. + */ +static bool rcu_is_callbacks_kthread(void) +{ + return false; +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + void rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2b2e1a996a65..6c4a6722abfd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1373,7 +1373,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* If no callbacks are ready, just return.*/ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { trace_rcu_batch_start(rsp->name, 0, 0); - trace_rcu_batch_end(rsp->name, 0); + trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), + need_resched(), is_idle_task(current), + rcu_is_callbacks_kthread()); return; } @@ -1409,7 +1411,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } local_irq_save(flags); - trace_rcu_batch_end(rsp->name, count); + trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), + is_idle_task(current), + rcu_is_callbacks_kthread()); /* Update count, and requeue any remaining callbacks. */ rdp->qlen -= count; -- cgit v1.2.3