summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2020-07-31 12:12:21 +0200
committerIngo Molnar <mingo@kernel.org>2020-07-31 12:13:21 +0200
commitadb334d17858d8b679a41f7f2cd230e5c6accc0a (patch)
treefd4fd188aedd4412b6d1097eb58e0a479bb97596
parentf3020b8891b890b48d9e1a83241e3cce518427c1 (diff)
parentf87032aec41e0b00ae5fa9103eb8e7b2d1f8416b (diff)
Merge branch 'WIP.x86/entry' into x86/entry, to merge the latest generic code and resolve conflictsx86-entry-2020-08-04
Pick up and resolve the NMI entry code changes from the locking tree, and also pick up the latest two fixes from tip:core/entry. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/entry/common.c34
-rw-r--r--arch/x86/include/asm/idtentry.h3
-rw-r--r--arch/x86/kernel/nmi.c9
-rw-r--r--arch/x86/kernel/traps.c17
-rw-r--r--include/linux/hardirq.h28
-rw-r--r--include/linux/lockdep.h178
-rw-r--r--include/linux/lockdep_types.h196
-rw-r--r--include/linux/seccomp.h3
-rw-r--r--include/linux/spinlock.h1
-rw-r--r--include/linux/spinlock_types.h2
-rw-r--r--kernel/entry/common.c4
-rw-r--r--kernel/kcsan/core.c5
-rw-r--r--kernel/kcsan/report.c9
-rw-r--r--kernel/locking/lockdep.c46
14 files changed, 319 insertions, 216 deletions
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 3de0303703ae..48512c7944e7 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -198,6 +198,40 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
+noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+{
+ bool irq_state = lockdep_hardirqs_enabled();
+
+ __nmi_enter();
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ lockdep_hardirq_enter();
+ rcu_nmi_enter();
+
+ instrumentation_begin();
+ trace_hardirqs_off_finish();
+ ftrace_nmi_enter();
+ instrumentation_end();
+
+ return irq_state;
+}
+
+noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+{
+ instrumentation_begin();
+ ftrace_nmi_exit();
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ }
+ instrumentation_end();
+
+ rcu_nmi_exit();
+ lockdep_hardirq_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ __nmi_exit();
+}
+
#ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION
/*
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 73eb277e63ab..ff198fc2495e 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -11,6 +11,9 @@
#include <asm/irq_stack.h>
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
+
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
* No error code pushed by hardware
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index d7c5e44b26f7..4fc9954a9560 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
__this_cpu_write(last_nmi_rip, regs->ip);
instrumentation_begin();
- trace_hardirqs_off_finish();
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
unknown_nmi_error(reason, regs);
out:
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
}
@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi)
{
+ bool irq_state;
+
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
@@ -491,14 +490,14 @@ nmi_restart:
this_cpu_write(nmi_dr7, local_db_save());
- nmi_enter();
+ irq_state = idtentry_enter_nmi(regs);
inc_irq_stat(__nmi_count);
if (!ignore_nmis)
default_do_nmi(regs);
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
local_db_restore(this_cpu_read(nmi_dr7));
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index be8fcfec004a..438fc554d48d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -405,7 +405,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
}
#endif
- nmi_enter();
+ idtentry_enter_nmi(regs);
instrumentation_begin();
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -651,15 +651,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
instrumentation_end();
irqentry_exit_to_user_mode(regs);
} else {
- nmi_enter();
+ bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
- trace_hardirqs_off_finish();
if (!do_int3(regs))
die("int3", regs, 0);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
}
@@ -867,9 +864,8 @@ out:
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
- nmi_enter();
+ bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
- trace_hardirqs_off_finish();
/*
* If something gets miswired and we end up here for a user mode
@@ -886,10 +882,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
handle_debug(regs, dr6, false);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -905,6 +899,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
instrumentation_begin();
handle_debug(regs, dr6, true);
+
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 03c9fece7d43..754f67ac4326 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
-#define nmi_enter() \
+#define __nmi_enter() \
do { \
+ lockdep_off(); \
arch_nmi_enter(); \
printk_nmi_enter(); \
- lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
- rcu_nmi_enter(); \
+ } while (0)
+
+#define nmi_enter() \
+ do { \
+ __nmi_enter(); \
lockdep_hardirq_enter(); \
+ rcu_nmi_enter(); \
instrumentation_begin(); \
ftrace_nmi_enter(); \
instrumentation_end(); \
} while (0)
+#define __nmi_exit() \
+ do { \
+ BUG_ON(!in_nmi()); \
+ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
+ printk_nmi_exit(); \
+ arch_nmi_exit(); \
+ lockdep_on(); \
+ } while (0)
+
#define nmi_exit() \
do { \
instrumentation_begin(); \
ftrace_nmi_exit(); \
instrumentation_end(); \
- lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
- BUG_ON(!in_nmi()); \
- __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
- lockdep_on(); \
- printk_nmi_exit(); \
- arch_nmi_exit(); \
+ lockdep_hardirq_exit(); \
+ __nmi_exit(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 8fce5c98a4b0..3b73cf84f77d 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -10,181 +10,20 @@
#ifndef __LINUX_LOCKDEP_H
#define __LINUX_LOCKDEP_H
+#include <linux/lockdep_types.h>
+
struct task_struct;
-struct lockdep_map;
/* for sysctl */
extern int prove_locking;
extern int lock_stat;
-#define MAX_LOCKDEP_SUBCLASSES 8UL
-
-#include <linux/types.h>
-
-enum lockdep_wait_type {
- LD_WAIT_INV = 0, /* not checked, catch all */
-
- LD_WAIT_FREE, /* wait free, rcu etc.. */
- LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
-
-#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
- LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
-#else
- LD_WAIT_CONFIG = LD_WAIT_SPIN,
-#endif
- LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
-
- LD_WAIT_MAX, /* must be last */
-};
-
#ifdef CONFIG_LOCKDEP
#include <linux/linkage.h>
-#include <linux/list.h>
#include <linux/debug_locks.h>
#include <linux/stacktrace.h>
-/*
- * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
- * the total number of states... :-(
- */
-#define XXX_LOCK_USAGE_STATES (1+2*4)
-
-/*
- * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
- * cached in the instance of lockdep_map
- *
- * Currently main class (subclass == 0) and signle depth subclass
- * are cached in lockdep_map. This optimization is mainly targeting
- * on rq->lock. double_rq_lock() acquires this highly competitive with
- * single depth.
- */
-#define NR_LOCKDEP_CACHING_CLASSES 2
-
-/*
- * A lockdep key is associated with each lock object. For static locks we use
- * the lock address itself as the key. Dynamically allocated lock objects can
- * have a statically or dynamically allocated key. Dynamically allocated lock
- * keys must be registered before being used and must be unregistered before
- * the key memory is freed.
- */
-struct lockdep_subclass_key {
- char __one_byte;
-} __attribute__ ((__packed__));
-
-/* hash_entry is used to keep track of dynamically allocated keys. */
-struct lock_class_key {
- union {
- struct hlist_node hash_entry;
- struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
- };
-};
-
-extern struct lock_class_key __lockdep_no_validate__;
-
-struct lock_trace;
-
-#define LOCKSTAT_POINTS 4
-
-/*
- * The lock-class itself. The order of the structure members matters.
- * reinit_class() zeroes the key member and all subsequent members.
- */
-struct lock_class {
- /*
- * class-hash:
- */
- struct hlist_node hash_entry;
-
- /*
- * Entry in all_lock_classes when in use. Entry in free_lock_classes
- * when not in use. Instances that are being freed are on one of the
- * zapped_classes lists.
- */
- struct list_head lock_entry;
-
- /*
- * These fields represent a directed graph of lock dependencies,
- * to every node we attach a list of "forward" and a list of
- * "backward" graph nodes.
- */
- struct list_head locks_after, locks_before;
-
- const struct lockdep_subclass_key *key;
- unsigned int subclass;
- unsigned int dep_gen_id;
-
- /*
- * IRQ/softirq usage tracking bits:
- */
- unsigned long usage_mask;
- const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
-
- /*
- * Generation counter, when doing certain classes of graph walking,
- * to ensure that we check one node only once:
- */
- int name_version;
- const char *name;
-
- short wait_type_inner;
- short wait_type_outer;
-
-#ifdef CONFIG_LOCK_STAT
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
-#endif
-} __no_randomize_layout;
-
-#ifdef CONFIG_LOCK_STAT
-struct lock_time {
- s64 min;
- s64 max;
- s64 total;
- unsigned long nr;
-};
-
-enum bounce_type {
- bounce_acquired_write,
- bounce_acquired_read,
- bounce_contended_write,
- bounce_contended_read,
- nr_bounce_types,
-
- bounce_acquired = bounce_acquired_write,
- bounce_contended = bounce_contended_write,
-};
-
-struct lock_class_stats {
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
- struct lock_time read_waittime;
- struct lock_time write_waittime;
- struct lock_time read_holdtime;
- struct lock_time write_holdtime;
- unsigned long bounces[nr_bounce_types];
-};
-
-struct lock_class_stats lock_stats(struct lock_class *class);
-void clear_lock_stats(struct lock_class *class);
-#endif
-
-/*
- * Map the lock object (the lock instance) to the lock-class object.
- * This is embedded into specific lock instances:
- */
-struct lockdep_map {
- struct lock_class_key *key;
- struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
- const char *name;
- short wait_type_outer; /* can be taken in this context */
- short wait_type_inner; /* presents this context */
-#ifdef CONFIG_LOCK_STAT
- int cpu;
- unsigned long ip;
-#endif
-};
-
static inline void lockdep_copy_map(struct lockdep_map *to,
struct lockdep_map *from)
{
@@ -440,8 +279,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
-struct pin_cookie { unsigned int val; };
-
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
@@ -520,10 +357,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
# define lockdep_reset() do { debug_locks = 1; } while (0)
# define lockdep_free_key_range(start, size) do { } while (0)
# define lockdep_sys_exit() do { } while (0)
-/*
- * The class key takes no space if lockdep is disabled:
- */
-struct lock_class_key { };
static inline void lockdep_register_key(struct lock_class_key *key)
{
@@ -533,11 +366,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key)
{
}
-/*
- * The lockdep_map takes no space if lockdep is disabled:
- */
-struct lockdep_map { };
-
#define lockdep_depth(tsk) (0)
#define lockdep_is_held_type(l, r) (1)
@@ -549,8 +377,6 @@ struct lockdep_map { };
#define lockdep_recursing(tsk) (0)
-struct pin_cookie { };
-
#define NIL_COOKIE (struct pin_cookie){ }
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
new file mode 100644
index 000000000000..7b9350624577
--- /dev/null
+++ b/include/linux/lockdep_types.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Runtime locking correctness validator
+ *
+ * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * see Documentation/locking/lockdep-design.rst for more details.
+ */
+#ifndef __LINUX_LOCKDEP_TYPES_H
+#define __LINUX_LOCKDEP_TYPES_H
+
+#include <linux/types.h>
+
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+enum lockdep_wait_type {
+ LD_WAIT_INV = 0, /* not checked, catch all */
+
+ LD_WAIT_FREE, /* wait free, rcu etc.. */
+ LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
+
+#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
+ LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
+#else
+ LD_WAIT_CONFIG = LD_WAIT_SPIN,
+#endif
+ LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
+
+ LD_WAIT_MAX, /* must be last */
+};
+
+#ifdef CONFIG_LOCKDEP
+
+#include <linux/list.h>
+
+/*
+ * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
+ * the total number of states... :-(
+ */
+#define XXX_LOCK_USAGE_STATES (1+2*4)
+
+/*
+ * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
+ * cached in the instance of lockdep_map
+ *
+ * Currently main class (subclass == 0) and signle depth subclass
+ * are cached in lockdep_map. This optimization is mainly targeting
+ * on rq->lock. double_rq_lock() acquires this highly competitive with
+ * single depth.
+ */
+#define NR_LOCKDEP_CACHING_CLASSES 2
+
+/*
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
+ */
+struct lockdep_subclass_key {
+ char __one_byte;
+} __attribute__ ((__packed__));
+
+/* hash_entry is used to keep track of dynamically allocated keys. */
+struct lock_class_key {
+ union {
+ struct hlist_node hash_entry;
+ struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+ };
+};
+
+extern struct lock_class_key __lockdep_no_validate__;
+
+struct lock_trace;
+
+#define LOCKSTAT_POINTS 4
+
+/*
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
+ */
+struct lock_class {
+ /*
+ * class-hash:
+ */
+ struct hlist_node hash_entry;
+
+ /*
+ * Entry in all_lock_classes when in use. Entry in free_lock_classes
+ * when not in use. Instances that are being freed are on one of the
+ * zapped_classes lists.
+ */
+ struct list_head lock_entry;
+
+ /*
+ * These fields represent a directed graph of lock dependencies,
+ * to every node we attach a list of "forward" and a list of
+ * "backward" graph nodes.
+ */
+ struct list_head locks_after, locks_before;
+
+ const struct lockdep_subclass_key *key;
+ unsigned int subclass;
+ unsigned int dep_gen_id;
+
+ /*
+ * IRQ/softirq usage tracking bits:
+ */
+ unsigned long usage_mask;
+ const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
+
+ /*
+ * Generation counter, when doing certain classes of graph walking,
+ * to ensure that we check one node only once:
+ */
+ int name_version;
+ const char *name;
+
+ short wait_type_inner;
+ short wait_type_outer;
+
+#ifdef CONFIG_LOCK_STAT
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+#endif
+} __no_randomize_layout;
+
+#ifdef CONFIG_LOCK_STAT
+struct lock_time {
+ s64 min;
+ s64 max;
+ s64 total;
+ unsigned long nr;
+};
+
+enum bounce_type {
+ bounce_acquired_write,
+ bounce_acquired_read,
+ bounce_contended_write,
+ bounce_contended_read,
+ nr_bounce_types,
+
+ bounce_acquired = bounce_acquired_write,
+ bounce_contended = bounce_contended_write,
+};
+
+struct lock_class_stats {
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+ struct lock_time read_waittime;
+ struct lock_time write_waittime;
+ struct lock_time read_holdtime;
+ struct lock_time write_holdtime;
+ unsigned long bounces[nr_bounce_types];
+};
+
+struct lock_class_stats lock_stats(struct lock_class *class);
+void clear_lock_stats(struct lock_class *class);
+#endif
+
+/*
+ * Map the lock object (the lock instance) to the lock-class object.
+ * This is embedded into specific lock instances:
+ */
+struct lockdep_map {
+ struct lock_class_key *key;
+ struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
+ const char *name;
+ short wait_type_outer; /* can be taken in this context */
+ short wait_type_inner; /* presents this context */
+#ifdef CONFIG_LOCK_STAT
+ int cpu;
+ unsigned long ip;
+#endif
+};
+
+struct pin_cookie { unsigned int val; };
+
+#else /* !CONFIG_LOCKDEP */
+
+/*
+ * The class key takes no space if lockdep is disabled:
+ */
+struct lock_class_key { };
+
+/*
+ * The lockdep_map takes no space if lockdep is disabled:
+ */
+struct lockdep_map { };
+
+struct pin_cookie { };
+
+#endif /* !LOCKDEP */
+
+#endif /* __LINUX_LOCKDEP_TYPES_H */
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 03d28c32ad01..51f234b6d28f 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -58,10 +58,11 @@ static inline int seccomp_mode(struct seccomp *s)
struct seccomp { };
struct seccomp_filter { };
+struct seccomp_data;
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
static inline int secure_computing(void) { return 0; }
-static inline int __secure_computing(void) { return 0; }
+static inline int __secure_computing(const struct seccomp_data *sd) { return 0; }
#else
static inline void secure_computing_strict(int this_syscall) { return; }
#endif
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d3770b3f9d9a..f2f12d746dbd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -56,6 +56,7 @@
#include <linux/kernel.h>
#include <linux/stringify.h>
#include <linux/bottom_half.h>
+#include <linux/lockdep.h>
#include <asm/barrier.h>
#include <asm/mmiowb.h>
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 6102e6bff3ae..b981caafe8bf 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -15,7 +15,7 @@
# include <linux/spinlock_types_up.h>
#endif
-#include <linux/lockdep.h>
+#include <linux/lockdep_types.h>
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 495f5c051b03..9852e0d62d95 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -256,7 +256,7 @@ noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
exit_to_user_mode();
}
-irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs)
+noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
{
irqentry_state_t ret = {
.exit_rcu = false,
@@ -333,7 +333,7 @@ void irqentry_exit_cond_resched(void)
}
}
-void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
+noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
{
lockdep_assert_irqs_disabled();
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 15f67949d11e..732623c30359 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
}
if (!kcsan_interrupt_watcher)
- /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
- raw_local_irq_save(irq_flags);
+ local_irq_save(irq_flags);
watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
if (!kcsan_interrupt_watcher)
- raw_local_irq_restore(irq_flags);
+ local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..6b2fb1a6d8cd 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -606,10 +606,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
goto out;
/*
- * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
- * we do not turn off lockdep here; this could happen due to recursion
- * into lockdep via KCSAN if we detect a race in utilities used by
- * lockdep.
+ * Because we may generate reports when we're in scheduler code, the use
+ * of printk() could deadlock. Until such time that all printing code
+ * called in print_report() is scheduler-safe, accept the risk, and just
+ * get our message out. As such, also disable lockdep to hide the
+ * warning, and avoid disabling lockdep for the rest of the kernel.
*/
lockdep_off();
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 29a8de4c50b9..d595623c4b34 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -395,7 +395,7 @@ void lockdep_init_task(struct task_struct *task)
static __always_inline void lockdep_recursion_finish(void)
{
- if (WARN_ON_ONCE(--current->lockdep_recursion))
+ if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK))
current->lockdep_recursion = 0;
}
@@ -3646,7 +3646,16 @@ static void __trace_hardirqs_on_caller(void)
*/
void lockdep_hardirqs_on_prepare(unsigned long ip)
{
- if (unlikely(!debug_locks || current->lockdep_recursion))
+ if (unlikely(!debug_locks))
+ return;
+
+ /*
+ * NMIs do not (and cannot) track lock dependencies, nothing to do.
+ */
+ if (unlikely(in_nmi()))
+ return;
+
+ if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
if (unlikely(current->hardirqs_enabled)) {
@@ -3692,7 +3701,27 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
{
struct task_struct *curr = current;
- if (unlikely(!debug_locks || curr->lockdep_recursion))
+ if (unlikely(!debug_locks))
+ return;
+
+ /*
+ * NMIs can happen in the middle of local_irq_{en,dis}able() where the
+ * tracking state and hardware state are out of sync.
+ *
+ * NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from,
+ * and not rely on hardware state like normal interrupts.
+ */
+ if (unlikely(in_nmi())) {
+ /*
+ * Skip:
+ * - recursion check, because NMI can hit lockdep;
+ * - hardware state check, because above;
+ * - chain_key check, see lockdep_hardirqs_on_prepare().
+ */
+ goto skip_checks;
+ }
+
+ if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
if (curr->hardirqs_enabled) {
@@ -3720,6 +3749,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
current->curr_chain_key);
+skip_checks:
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
curr->hardirq_enable_ip = ip;
@@ -3735,7 +3765,15 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
{
struct task_struct *curr = current;
- if (unlikely(!debug_locks || curr->lockdep_recursion))
+ if (unlikely(!debug_locks))
+ return;
+
+ /*
+ * Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep;
+ * they will restore the software state. This ensures the software
+ * state is consistent inside NMIs as well.
+ */
+ if (unlikely(!in_nmi() && (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)))
return;
/*