summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/events/amd/uncore.c21
-rw-r--r--arch/x86/include/asm/cacheinfo.h7
-rw-r--r--arch/x86/include/asm/processor.h9
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c36
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c (renamed from arch/x86/kernel/cpu/intel_cacheinfo.c)46
-rw-r--r--arch/x86/kernel/cpu/centaur.c53
-rw-r--r--arch/x86/kernel/cpu/common.c20
-rw-r--r--arch/x86/kernel/cpu/cpu.h10
-rw-r--r--arch/x86/kernel/cpu/intel.c34
-rw-r--r--arch/x86/kernel/cpu/topology.c8
-rw-r--r--arch/x86/kernel/smpboot.c7
-rw-r--r--tools/objtool/check.c167
-rw-r--r--tools/objtool/elf.c42
-rw-r--r--tools/objtool/elf.h2
16 files changed, 295 insertions, 170 deletions
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index f5cbbba99283..981ba5e8241b 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -19,6 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/perf_event.h>
#include <asm/msr.h>
+#include <asm/smp.h>
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
@@ -399,26 +400,8 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
}
if (amd_uncore_llc) {
- unsigned int apicid = cpu_data(cpu).apicid;
- unsigned int nshared, subleaf, prev_eax = 0;
-
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- /*
- * Iterate over Cache Topology Definition leaves until no
- * more cache descriptions are available.
- */
- for (subleaf = 0; subleaf < 5; subleaf++) {
- cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx);
-
- /* EAX[0:4] gives type of cache */
- if (!(eax & 0x1f))
- break;
-
- prev_eax = eax;
- }
- nshared = ((prev_eax >> 14) & 0xfff) + 1;
-
- uncore->id = apicid - (apicid % nshared);
+ uncore->id = per_cpu(cpu_llc_id, cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h
new file mode 100644
index 000000000000..e958e28f7ab5
--- /dev/null
+++ b/arch/x86/include/asm/cacheinfo.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CACHEINFO_H
+#define _ASM_X86_CACHEINFO_H
+
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
+
+#endif /* _ASM_X86_CACHEINFO_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 21a114914ba4..e28add6b791f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -186,15 +186,6 @@ extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern u32 get_scattered_cpuid_leaf(unsigned int level,
- unsigned int sub_leaf,
- enum cpuid_regs_idx reg);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
-
-extern void detect_extended_topology(struct cpuinfo_x86 *c);
-extern void detect_ht(struct cpuinfo_x86 *c);
#ifdef CONFIG_X86_32
extern int have_cpuid_p(void);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f75bff8f9d82..547c4fe50711 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -171,7 +171,6 @@ static inline int wbinvd_on_all_cpus(void)
wbinvd();
return 0;
}
-#define smp_num_siblings 1
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a66229f51b12..7a40196967cb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
-obj-y := intel_cacheinfo.o scattered.o topology.o
+obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 12bc0a1139da..55361ee04cc5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,6 +9,7 @@
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/apic.h>
+#include <asm/cacheinfo.h>
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
@@ -297,7 +298,6 @@ static int nearby_node(int apicid)
}
#endif
-#ifdef CONFIG_SMP
/*
* Fix up cpu_core_id for pre-F17h systems to be in the
* [0 .. cores_per_node - 1] range. Not really needed but
@@ -327,6 +327,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ int err;
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -345,21 +346,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
}
/*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
+ * In case leaf B is available, use it to derive
+ * topology information.
*/
- if (cpuid_edx(0x80000006)) {
- if (c->x86 == 0x17) {
- /*
- * LLC is at the core complex level.
- * Core complex id is ApicId[3].
- */
- per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
- } else {
- /* LLC is at the node level. */
- per_cpu(cpu_llc_id, cpu) = node_id;
- }
- }
+ err = detect_extended_topology(c);
+ if (!err)
+ c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+
+ cacheinfo_amd_init_llc_id(c, cpu, node_id);
+
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -375,7 +370,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
legacy_fixup_core_id(c);
}
}
-#endif
/*
* On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
@@ -383,7 +377,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_SMP
unsigned bits;
int cpu = smp_processor_id();
@@ -394,17 +387,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
- amd_get_topology(c);
-#endif
}
u16 amd_get_nb_id(int cpu)
{
- u16 id = 0;
-#ifdef CONFIG_SMP
- id = per_cpu(cpu_llc_id, cpu);
-#endif
- return id;
+ return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
@@ -842,6 +829,7 @@ static void init_amd(struct cpuinfo_x86 *c)
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
amd_detect_cmp(c);
+ amd_get_topology(c);
srat_detect_node(c);
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 54d04d574148..38354c66df81 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -20,6 +20,8 @@
#include <asm/amd_nb.h>
#include <asm/smp.h>
+#include "cpu.h"
+
#define LVL_1_INST 1
#define LVL_1_DATA 2
#define LVL_2 3
@@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
+{
+ /*
+ * We may have multiple LLCs if L3 caches exist, so check if we
+ * have an L3 cache by looking at the L3 cache CPUID leaf.
+ */
+ if (!cpuid_edx(0x80000006))
+ return;
+
+ if (c->x86 < 0x17) {
+ /* LLC is at the node level. */
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ } else if (c->x86 == 0x17 &&
+ c->x86_model >= 0 && c->x86_model <= 0x1F) {
+ /*
+ * LLC is at the core complex level.
+ * Core complex ID is ApicId[3] for these processors.
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+ } else {
+ /*
+ * LLC ID is calculated from the number of threads sharing the
+ * cache.
+ * */
+ u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
+ u32 llc_index = find_num_cache_leaves(c) - 1;
+
+ cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
+ if (eax)
+ num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
+
+ if (num_sharing_cache) {
+ int bits = get_count_order(num_sharing_cache) - 1;
+
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
+ }
+ }
+}
+
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
@@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
-unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
+void init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
@@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
- return l2;
+ if (!l2)
+ cpu_detect_cache_sizes(c);
}
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e5ec0f11c0de..14433ff5b828 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -18,6 +18,13 @@
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
static void init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
@@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
}
}
+static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
+}
+
static void init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
@@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, 0*32+31);
#endif
early_init_centaur(c);
+ init_intel_cacheinfo(c);
+ detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+
+ if (c->cpuid_level > 9) {
+ unsigned int eax = cpuid_eax(10);
+
+ /*
+ * Check for version and the number of counters
+ * Version(eax[7:0]) can't be 0;
+ * Counters(eax[15:8]) should be greater than 1;
+ */
+ if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+ }
+
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
@@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ centaur_detect_vmx_virtcap(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ce243f7d2d4e..39ed2e6ff8a0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+EXPORT_SYMBOL(smp_num_siblings);
+
+/* Last level cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c)
*(s + 1) = '\0';
}
+void detect_num_cpu_cores(struct cpuinfo_x86 *c)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ c->x86_max_cores = 1;
+ if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
+ return;
+
+ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
+ if (eax & 0x1f)
+ c->x86_max_cores = (eax >> 26) + 1;
+}
+
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index e806b11a99af..295cb00a5ac5 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -47,6 +47,16 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+ unsigned int sub_leaf,
+ enum cpuid_regs_idx reg);
+extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
+extern int detect_extended_topology(struct cpuinfo_x86 *c);
+extern void detect_ht(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 60d1897041da..1226c4fa6e51 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -453,24 +453,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-/*
- * find out the number of processor cores on the die
- */
-static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, ebx, ecx, edx;
-
- if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
- return 1;
-
- /* Intel has a non-standard dependency on %ecx for this CPUID level. */
- cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- if (eax & 0x1f)
- return (eax >> 26) + 1;
- else
- return 1;
-}
-
static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
/* Intel VMX MSR indicated features */
@@ -653,8 +635,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
static void init_intel(struct cpuinfo_x86 *c)
{
- unsigned int l2 = 0;
-
early_init_intel(c);
intel_workarounds(c);
@@ -671,19 +651,13 @@ static void init_intel(struct cpuinfo_x86 *c)
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
* detection.
*/
- c->x86_max_cores = intel_num_cpu_cores(c);
+ detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
}
- l2 = init_intel_cacheinfo(c);
-
- /* Detect legacy cache sizes if init_intel_cacheinfo did not */
- if (l2 == 0) {
- cpu_detect_cache_sizes(c);
- l2 = c->x86_cache_size;
- }
+ init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
@@ -696,7 +670,8 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
- unsigned int l1;
+ unsigned int l1, l2;
+
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
set_cpu_cap(c, X86_FEATURE_BTS);
@@ -724,6 +699,7 @@ static void init_intel(struct cpuinfo_x86 *c)
* Dixon is NOT a Celeron.
*/
if (c->x86 == 6) {
+ unsigned int l2 = c->x86_cache_size;
char *p = NULL;
switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index b099024d339c..81c0afb39d0a 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,7 @@
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
-void detect_extended_topology(struct cpuinfo_x86 *c)
+int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
@@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
static bool printed;
if (c->cpuid_level < 0xb)
- return;
+ return -1;
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
@@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
* check if the cpuid leaf 0xb is actually implemented.
*/
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
- return;
+ return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
@@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->cpu_core_id);
printed = 1;
}
- return;
#endif
+ return 0;
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0f1cbb042f49..734e57847917 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -80,13 +80,6 @@
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
-
-/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
-
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5409f6f6c48d..3a31b238f885 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
return next;
}
+static struct instruction *next_insn_same_func(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *next = list_next_entry(insn, list);
+ struct symbol *func = insn->func;
+
+ if (!func)
+ return NULL;
+
+ if (&next->list != &file->insn_list && next->func == func)
+ return next;
+
+ /* Check if we're already in the subfunction: */
+ if (func == func->cfunc)
+ return NULL;
+
+ /* Move to the subfunction: */
+ return find_insn(file, func->cfunc->sec, func->cfunc->offset);
+}
+
+#define func_for_each_insn_all(file, func, insn) \
+ for (insn = find_insn(file, func->sec, func->offset); \
+ insn; \
+ insn = next_insn_same_func(file, insn))
+
#define func_for_each_insn(file, func, insn) \
for (insn = find_insn(file, func->sec, func->offset); \
insn && &insn->list != &file->insn_list && \
@@ -149,10 +174,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
if (!strcmp(func->name, global_noreturns[i]))
return 1;
- if (!func->sec)
+ if (!func->len)
return 0;
- func_for_each_insn(file, func, insn) {
+ insn = find_insn(file, func->sec, func->offset);
+ if (!insn->func)
+ return 0;
+
+ func_for_each_insn_all(file, func, insn) {
empty = false;
if (insn->type == INSN_RETURN)
@@ -167,35 +196,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
* case, the function's dead-end status depends on whether the target
* of the sibling call returns.
*/
- func_for_each_insn(file, func, insn) {
- if (insn->sec != func->sec ||
- insn->offset >= func->offset + func->len)
- break;
-
+ func_for_each_insn_all(file, func, insn) {
if (insn->type == INSN_JUMP_UNCONDITIONAL) {
struct instruction *dest = insn->jump_dest;
- struct symbol *dest_func;
if (!dest)
/* sibling call to another file */
return 0;
- if (dest->sec != func->sec ||
- dest->offset < func->offset ||
- dest->offset >= func->offset + func->len) {
- /* local sibling call */
- dest_func = find_symbol_by_offset(dest->sec,
- dest->offset);
- if (!dest_func)
- continue;
+ if (dest->func && dest->func->pfunc != insn->func->pfunc) {
+ /* local sibling call */
if (recursion == 5) {
- WARN_FUNC("infinite recursion (objtool bug!)",
- dest->sec, dest->offset);
- return -1;
+ /*
+ * Infinite recursion: two functions
+ * have sibling calls to each other.
+ * This is a very rare case. It means
+ * they aren't dead ends.
+ */
+ return 0;
}
- return __dead_end_function(file, dest_func,
+ return __dead_end_function(file, dest->func,
recursion + 1);
}
}
@@ -422,7 +444,7 @@ static void add_ignores(struct objtool_file *file)
if (!ignore_func(file, func))
continue;
- func_for_each_insn(file, func, insn)
+ func_for_each_insn_all(file, func, insn)
insn->ignore = true;
}
}
@@ -782,30 +804,35 @@ out:
return ret;
}
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
- struct instruction *insn, struct rela *table,
- struct rela *next_table)
+static int add_switch_table(struct objtool_file *file, struct instruction *insn,
+ struct rela *table, struct rela *next_table)
{
struct rela *rela = table;
struct instruction *alt_insn;
struct alternative *alt;
+ struct symbol *pfunc = insn->func->pfunc;
+ unsigned int prev_offset = 0;
list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
if (rela == next_table)
break;
- if (rela->sym->sec != insn->sec ||
- rela->addend <= func->offset ||
- rela->addend >= func->offset + func->len)
+ /* Make sure the switch table entries are consecutive: */
+ if (prev_offset && rela->offset != prev_offset + 8)
break;
- alt_insn = find_insn(file, insn->sec, rela->addend);
- if (!alt_insn) {
- WARN("%s: can't find instruction at %s+0x%x",
- file->rodata->rela->name, insn->sec->name,
- rela->addend);
- return -1;
- }
+ /* Detect function pointers from contiguous objects: */
+ if (rela->sym->sec == pfunc->sec &&
+ rela->addend == pfunc->offset)
+ break;
+
+ alt_insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!alt_insn)
+ break;
+
+ /* Make sure the jmp dest is in the function or subfunction: */
+ if (alt_insn->func->pfunc != pfunc)
+ break;
alt = malloc(sizeof(*alt));
if (!alt) {
@@ -815,6 +842,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
alt->insn = alt_insn;
list_add_tail(&alt->list, &insn->alts);
+ prev_offset = rela->offset;
+ }
+
+ if (!prev_offset) {
+ WARN_FUNC("can't find switch jump table",
+ insn->sec, insn->offset);
+ return -1;
}
return 0;
@@ -869,40 +903,21 @@ static struct rela *find_switch_table(struct objtool_file *file,
{
struct rela *text_rela, *rodata_rela;
struct instruction *orig_insn = insn;
+ unsigned long table_offset;
- text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
- if (text_rela && text_rela->sym == file->rodata->sym) {
- /* case 1 */
- rodata_rela = find_rela_by_dest(file->rodata,
- text_rela->addend);
- if (rodata_rela)
- return rodata_rela;
-
- /* case 2 */
- rodata_rela = find_rela_by_dest(file->rodata,
- text_rela->addend + 4);
- if (!rodata_rela)
- return NULL;
-
- file->ignore_unreachables = true;
- return rodata_rela;
- }
-
- /* case 3 */
/*
* Backward search using the @first_jump_src links, these help avoid
* much of the 'in between' code. Which avoids us getting confused by
* it.
*/
- for (insn = list_prev_entry(insn, list);
-
+ for (;
&insn->list != &file->insn_list &&
insn->sec == func->sec &&
insn->offset >= func->offset;
insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
- if (insn->type == INSN_JUMP_DYNAMIC)
+ if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
break;
/* allow small jumps within the range */
@@ -918,18 +933,29 @@ static struct rela *find_switch_table(struct objtool_file *file,
if (!text_rela || text_rela->sym != file->rodata->sym)
continue;
+ table_offset = text_rela->addend;
+ if (text_rela->type == R_X86_64_PC32)
+ table_offset += 4;
+
/*
* Make sure the .rodata address isn't associated with a
* symbol. gcc jump tables are anonymous data.
*/
- if (find_symbol_containing(file->rodata, text_rela->addend))
+ if (find_symbol_containing(file->rodata, table_offset))
continue;
- rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
- if (!rodata_rela)
- continue;
+ rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+ if (rodata_rela) {
+ /*
+ * Use of RIP-relative switch jumps is quite rare, and
+ * indicates a rare GCC quirk/bug which can leave dead
+ * code behind.
+ */
+ if (text_rela->type == R_X86_64_PC32)
+ file->ignore_unreachables = true;
- return rodata_rela;
+ return rodata_rela;
+ }
}
return NULL;
@@ -943,7 +969,7 @@ static int add_func_switch_tables(struct objtool_file *file,
struct rela *rela, *prev_rela = NULL;
int ret;
- func_for_each_insn(file, func, insn) {
+ func_for_each_insn_all(file, func, insn) {
if (!last)
last = insn;
@@ -974,8 +1000,7 @@ static int add_func_switch_tables(struct objtool_file *file,
* the beginning of another switch table in the same function.
*/
if (prev_jump) {
- ret = add_switch_table(file, func, prev_jump, prev_rela,
- rela);
+ ret = add_switch_table(file, prev_jump, prev_rela, rela);
if (ret)
return ret;
}
@@ -985,7 +1010,7 @@ static int add_func_switch_tables(struct objtool_file *file,
}
if (prev_jump) {
- ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+ ret = add_switch_table(file, prev_jump, prev_rela, NULL);
if (ret)
return ret;
}
@@ -1749,15 +1774,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
while (1) {
next_insn = next_insn_same_sec(file, insn);
-
- if (file->c_file && func && insn->func && func != insn->func) {
+ if (file->c_file && func && insn->func && func != insn->func->pfunc) {
WARN("%s() falls through to next function %s()",
func->name, insn->func->name);
return 1;
}
- if (insn->func)
- func = insn->func;
+ func = insn->func ? insn->func->pfunc : NULL;
if (func && insn->ignore) {
WARN_FUNC("BUG: why am I validating an ignored function?",
@@ -1778,7 +1801,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
i = insn;
save_insn = NULL;
- func_for_each_insn_continue_reverse(file, func, i) {
+ func_for_each_insn_continue_reverse(file, insn->func, i) {
if (i->save) {
save_insn = i;
break;
@@ -1865,7 +1888,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
case INSN_JUMP_UNCONDITIONAL:
if (insn->jump_dest &&
(!func || !insn->jump_dest->func ||
- func == insn->jump_dest->func)) {
+ insn->jump_dest->func->pfunc == func)) {
ret = validate_branch(file, insn->jump_dest,
state);
if (ret)
@@ -2060,7 +2083,7 @@ static int validate_functions(struct objtool_file *file)
for_each_sec(file, sec) {
list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
+ if (func->type != STT_FUNC || func->pfunc != func)
continue;
insn = find_insn(file, sec, func->offset);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c1c338661699..4e60e105583e 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
return NULL;
}
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+{
+ struct section *sec;
+ struct symbol *sym;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ list_for_each_entry(sym, &sec->symbol_list, list)
+ if (!strcmp(sym->name, name))
+ return sym;
+
+ return NULL;
+}
+
struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
{
struct symbol *sym;
@@ -203,10 +216,11 @@ static int read_sections(struct elf *elf)
static int read_symbols(struct elf *elf)
{
- struct section *symtab;
- struct symbol *sym;
+ struct section *symtab, *sec;
+ struct symbol *sym, *pfunc;
struct list_head *entry, *tmp;
int symbols_nr, i;
+ char *coldstr;
symtab = find_section_by_name(elf, ".symtab");
if (!symtab) {
@@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf)
hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
}
+ /* Create parent/child links for any cold subfunctions */
+ list_for_each_entry(sec, &elf->sections, list) {
+ list_for_each_entry(sym, &sec->symbol_list, list) {
+ if (sym->type != STT_FUNC)
+ continue;
+ sym->pfunc = sym->cfunc = sym;
+ coldstr = strstr(sym->name, ".cold.");
+ if (coldstr) {
+ coldstr[0] = '\0';
+ pfunc = find_symbol_by_name(elf, sym->name);
+ coldstr[0] = '.';
+
+ if (!pfunc) {
+ WARN("%s(): can't find parent function",
+ sym->name);
+ goto err;
+ }
+
+ sym->pfunc = pfunc;
+ pfunc->cfunc = sym;
+ }
+ }
+ }
+
return 0;
err:
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index d86e2ff14466..de5cd2ddded9 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -61,6 +61,7 @@ struct symbol {
unsigned char bind, type;
unsigned long offset;
unsigned int len;
+ struct symbol *pfunc, *cfunc;
};
struct rela {
@@ -86,6 +87,7 @@ struct elf {
struct elf *elf_open(const char *name, int flags);
struct section *find_section_by_name(struct elf *elf, const char *name);
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,