diff options
-rw-r--r-- | arch/x86/events/amd/uncore.c | 21 | ||||
-rw-r--r-- | arch/x86/include/asm/cacheinfo.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/smp.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cacheinfo.c (renamed from arch/x86/kernel/cpu/intel_cacheinfo.c) | 46 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/centaur.c | 53 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpu.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 34 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 7 | ||||
-rw-r--r-- | tools/objtool/check.c | 167 | ||||
-rw-r--r-- | tools/objtool/elf.c | 42 | ||||
-rw-r--r-- | tools/objtool/elf.h | 2 |
16 files changed, 295 insertions, 170 deletions
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index f5cbbba99283..981ba5e8241b 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -19,6 +19,7 @@ #include <asm/cpufeature.h> #include <asm/perf_event.h> #include <asm/msr.h> +#include <asm/smp.h> #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 @@ -399,26 +400,8 @@ static int amd_uncore_cpu_starting(unsigned int cpu) } if (amd_uncore_llc) { - unsigned int apicid = cpu_data(cpu).apicid; - unsigned int nshared, subleaf, prev_eax = 0; - uncore = *per_cpu_ptr(amd_uncore_llc, cpu); - /* - * Iterate over Cache Topology Definition leaves until no - * more cache descriptions are available. - */ - for (subleaf = 0; subleaf < 5; subleaf++) { - cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx); - - /* EAX[0:4] gives type of cache */ - if (!(eax & 0x1f)) - break; - - prev_eax = eax; - } - nshared = ((prev_eax >> 14) & 0xfff) + 1; - - uncore->id = apicid - (apicid % nshared); + uncore->id = per_cpu(cpu_llc_id, cpu); uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h new file mode 100644 index 000000000000..e958e28f7ab5 --- /dev/null +++ b/arch/x86/include/asm/cacheinfo.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CACHEINFO_H +#define _ASM_X86_CACHEINFO_H + +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); + +#endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 21a114914ba4..e28add6b791f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -186,15 +186,6 @@ extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); -extern u32 get_scattered_cpuid_leaf(unsigned int level, - unsigned int sub_leaf, - enum cpuid_regs_idx reg); -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); -extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); - -extern void detect_extended_topology(struct cpuinfo_x86 *c); -extern void detect_ht(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_32 extern int have_cpuid_p(void); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f75bff8f9d82..547c4fe50711 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -171,7 +171,6 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } -#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a66229f51b12..7a40196967cb 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_common.o := $(nostackp) -obj-y := intel_cacheinfo.o scattered.o topology.o +obj-y := cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 12bc0a1139da..55361ee04cc5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,6 +9,7 @@ #include <linux/random.h> #include <asm/processor.h> #include <asm/apic.h> +#include <asm/cacheinfo.h> #include <asm/cpu.h> #include <asm/smp.h> #include <asm/pci-direct.h> @@ -297,7 +298,6 @@ static int nearby_node(int apicid) } #endif -#ifdef CONFIG_SMP /* * Fix up cpu_core_id for pre-F17h systems to be in the * [0 .. cores_per_node - 1] range. Not really needed but @@ -327,6 +327,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + int err; u32 eax, ebx, ecx, edx; cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); @@ -345,21 +346,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c) } /* - * We may have multiple LLCs if L3 caches exist, so check if we - * have an L3 cache by looking at the L3 cache CPUID leaf. + * In case leaf B is available, use it to derive + * topology information. */ - if (cpuid_edx(0x80000006)) { - if (c->x86 == 0x17) { - /* - * LLC is at the core complex level. - * Core complex id is ApicId[3]. - */ - per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; - } else { - /* LLC is at the node level. */ - per_cpu(cpu_llc_id, cpu) = node_id; - } - } + err = detect_extended_topology(c); + if (!err) + c->x86_coreid_bits = get_count_order(c->x86_max_cores); + + cacheinfo_amd_init_llc_id(c, cpu, node_id); + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; @@ -375,7 +370,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) legacy_fixup_core_id(c); } } -#endif /* * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. @@ -383,7 +377,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) */ static void amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); @@ -394,17 +387,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; - amd_get_topology(c); -#endif } u16 amd_get_nb_id(int cpu) { - u16 id = 0; -#ifdef CONFIG_SMP - id = per_cpu(cpu_llc_id, cpu); -#endif - return id; + return per_cpu(cpu_llc_id, cpu); } EXPORT_SYMBOL_GPL(amd_get_nb_id); @@ -842,6 +829,7 @@ static void init_amd(struct cpuinfo_x86 *c) /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + amd_get_topology(c); srat_detect_node(c); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 54d04d574148..38354c66df81 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -20,6 +20,8 @@ #include <asm/amd_nb.h> #include <asm/smp.h> +#include "cpu.h" + #define LVL_1_INST 1 #define LVL_1_DATA 2 #define LVL_2 3 @@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +{ + /* + * We may have multiple LLCs if L3 caches exist, so check if we + * have an L3 cache by looking at the L3 cache CPUID leaf. + */ + if (!cpuid_edx(0x80000006)) + return; + + if (c->x86 < 0x17) { + /* LLC is at the node level. */ + per_cpu(cpu_llc_id, cpu) = node_id; + } else if (c->x86 == 0x17 && + c->x86_model >= 0 && c->x86_model <= 0x1F) { + /* + * LLC is at the core complex level. + * Core complex ID is ApicId[3] for these processors. + */ + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; + } else { + /* + * LLC ID is calculated from the number of threads sharing the + * cache. + * */ + u32 eax, ebx, ecx, edx, num_sharing_cache = 0; + u32 llc_index = find_num_cache_leaves(c) - 1; + + cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx); + if (eax) + num_sharing_cache = ((eax >> 14) & 0xfff) + 1; + + if (num_sharing_cache) { + int bits = get_count_order(num_sharing_cache) - 1; + + per_cpu(cpu_llc_id, cpu) = c->apicid >> bits; + } + } +} + void init_amd_cacheinfo(struct cpuinfo_x86 *c) { @@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c) } } -unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) +void init_intel_cacheinfo(struct cpuinfo_x86 *c) { /* Cache sizes */ unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; @@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - return l2; + if (!l2) + cpu_detect_cache_sizes(c); } static int __cache_amd_cpumap_setup(unsigned int cpu, int index, diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e5ec0f11c0de..14433ff5b828 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -18,6 +18,13 @@ #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + static void init_c3(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c) } } +static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + static void init_centaur(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 @@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, 0*32+31); #endif early_init_centaur(c); + init_intel_cacheinfo(c); + detect_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + + if (c->cpuid_level > 9) { + unsigned int eax = cpuid_eax(10); + + /* + * Check for version and the number of counters + * Version(eax[7:0]) can't be 0; + * Counters(eax[15:8]) should be greater than 1; + */ + if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + switch (c->x86) { #ifdef CONFIG_X86_32 case 5: @@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); #endif + + if (cpu_has(c, X86_FEATURE_VMX)) + centaur_detect_vmx_virtcap(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ce243f7d2d4e..39ed2e6ff8a0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; +EXPORT_SYMBOL(smp_num_siblings); + +/* Last level cache ID of each logical CPU */ +DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { @@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c) *(s + 1) = '\0'; } +void detect_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + c->x86_max_cores = 1; + if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) + return; + + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + c->x86_max_cores = (eax >> 26) + 1; +} + void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e806b11a99af..295cb00a5ac5 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -47,6 +47,16 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); +extern u32 get_scattered_cpuid_leaf(unsigned int level, + unsigned int sub_leaf, + enum cpuid_regs_idx reg); +extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); + +extern void detect_num_cpu_cores(struct cpuinfo_x86 *c); +extern int detect_extended_topology(struct cpuinfo_x86 *c); +extern void detect_ht(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 60d1897041da..1226c4fa6e51 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -453,24 +453,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -/* - * find out the number of processor cores on the die - */ -static int intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return (eax >> 26) + 1; - else - return 1; -} - static void detect_vmx_virtcap(struct cpuinfo_x86 *c) { /* Intel VMX MSR indicated features */ @@ -653,8 +635,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) static void init_intel(struct cpuinfo_x86 *c) { - unsigned int l2 = 0; - early_init_intel(c); intel_workarounds(c); @@ -671,19 +651,13 @@ static void init_intel(struct cpuinfo_x86 *c) * let's use the legacy cpuid vector 0x1 and 0x4 for topology * detection. */ - c->x86_max_cores = intel_num_cpu_cores(c); + detect_num_cpu_cores(c); #ifdef CONFIG_X86_32 detect_ht(c); #endif } - l2 = init_intel_cacheinfo(c); - - /* Detect legacy cache sizes if init_intel_cacheinfo did not */ - if (l2 == 0) { - cpu_detect_cache_sizes(c); - l2 = c->x86_cache_size; - } + init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); @@ -696,7 +670,8 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (boot_cpu_has(X86_FEATURE_DS)) { - unsigned int l1; + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) set_cpu_cap(c, X86_FEATURE_BTS); @@ -724,6 +699,7 @@ static void init_intel(struct cpuinfo_x86 *c) * Dixon is NOT a Celeron. */ if (c->x86 == 6) { + unsigned int l2 = c->x86_cache_size; char *p = NULL; switch (c->x86_model) { diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b099024d339c..81c0afb39d0a 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -27,7 +27,7 @@ * exists, use it for populating initial_apicid and cpu topology * detection. */ -void detect_extended_topology(struct cpuinfo_x86 *c) +int detect_extended_topology(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; @@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) static bool printed; if (c->cpuid_level < 0xb) - return; + return -1; cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); @@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) * check if the cpuid leaf 0xb is actually implemented. */ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) - return; + return -1; set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); @@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c) c->cpu_core_id); printed = 1; } - return; #endif + return 0; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0f1cbb042f49..734e57847917 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -80,13 +80,6 @@ #include <asm/intel-family.h> #include <asm/cpu_device_id.h> -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - -/* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; - /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5409f6f6c48d..3a31b238f885 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, return next; } +static struct instruction *next_insn_same_func(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *next = list_next_entry(insn, list); + struct symbol *func = insn->func; + + if (!func) + return NULL; + + if (&next->list != &file->insn_list && next->func == func) + return next; + + /* Check if we're already in the subfunction: */ + if (func == func->cfunc) + return NULL; + + /* Move to the subfunction: */ + return find_insn(file, func->cfunc->sec, func->cfunc->offset); +} + +#define func_for_each_insn_all(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn; \ + insn = next_insn_same_func(file, insn)) + #define func_for_each_insn(file, func, insn) \ for (insn = find_insn(file, func->sec, func->offset); \ insn && &insn->list != &file->insn_list && \ @@ -149,10 +174,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, if (!strcmp(func->name, global_noreturns[i])) return 1; - if (!func->sec) + if (!func->len) return 0; - func_for_each_insn(file, func, insn) { + insn = find_insn(file, func->sec, func->offset); + if (!insn->func) + return 0; + + func_for_each_insn_all(file, func, insn) { empty = false; if (insn->type == INSN_RETURN) @@ -167,35 +196,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, * case, the function's dead-end status depends on whether the target * of the sibling call returns. */ - func_for_each_insn(file, func, insn) { - if (insn->sec != func->sec || - insn->offset >= func->offset + func->len) - break; - + func_for_each_insn_all(file, func, insn) { if (insn->type == INSN_JUMP_UNCONDITIONAL) { struct instruction *dest = insn->jump_dest; - struct symbol *dest_func; if (!dest) /* sibling call to another file */ return 0; - if (dest->sec != func->sec || - dest->offset < func->offset || - dest->offset >= func->offset + func->len) { - /* local sibling call */ - dest_func = find_symbol_by_offset(dest->sec, - dest->offset); - if (!dest_func) - continue; + if (dest->func && dest->func->pfunc != insn->func->pfunc) { + /* local sibling call */ if (recursion == 5) { - WARN_FUNC("infinite recursion (objtool bug!)", - dest->sec, dest->offset); - return -1; + /* + * Infinite recursion: two functions + * have sibling calls to each other. + * This is a very rare case. It means + * they aren't dead ends. + */ + return 0; } - return __dead_end_function(file, dest_func, + return __dead_end_function(file, dest->func, recursion + 1); } } @@ -422,7 +444,7 @@ static void add_ignores(struct objtool_file *file) if (!ignore_func(file, func)) continue; - func_for_each_insn(file, func, insn) + func_for_each_insn_all(file, func, insn) insn->ignore = true; } } @@ -782,30 +804,35 @@ out: return ret; } -static int add_switch_table(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct rela *table, - struct rela *next_table) +static int add_switch_table(struct objtool_file *file, struct instruction *insn, + struct rela *table, struct rela *next_table) { struct rela *rela = table; struct instruction *alt_insn; struct alternative *alt; + struct symbol *pfunc = insn->func->pfunc; + unsigned int prev_offset = 0; list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { if (rela == next_table) break; - if (rela->sym->sec != insn->sec || - rela->addend <= func->offset || - rela->addend >= func->offset + func->len) + /* Make sure the switch table entries are consecutive: */ + if (prev_offset && rela->offset != prev_offset + 8) break; - alt_insn = find_insn(file, insn->sec, rela->addend); - if (!alt_insn) { - WARN("%s: can't find instruction at %s+0x%x", - file->rodata->rela->name, insn->sec->name, - rela->addend); - return -1; - } + /* Detect function pointers from contiguous objects: */ + if (rela->sym->sec == pfunc->sec && + rela->addend == pfunc->offset) + break; + + alt_insn = find_insn(file, rela->sym->sec, rela->addend); + if (!alt_insn) + break; + + /* Make sure the jmp dest is in the function or subfunction: */ + if (alt_insn->func->pfunc != pfunc) + break; alt = malloc(sizeof(*alt)); if (!alt) { @@ -815,6 +842,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, alt->insn = alt_insn; list_add_tail(&alt->list, &insn->alts); + prev_offset = rela->offset; + } + + if (!prev_offset) { + WARN_FUNC("can't find switch jump table", + insn->sec, insn->offset); + return -1; } return 0; @@ -869,40 +903,21 @@ static struct rela *find_switch_table(struct objtool_file *file, { struct rela *text_rela, *rodata_rela; struct instruction *orig_insn = insn; + unsigned long table_offset; - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) { - /* case 1 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend); - if (rodata_rela) - return rodata_rela; - - /* case 2 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend + 4); - if (!rodata_rela) - return NULL; - - file->ignore_unreachables = true; - return rodata_rela; - } - - /* case 3 */ /* * Backward search using the @first_jump_src links, these help avoid * much of the 'in between' code. Which avoids us getting confused by * it. */ - for (insn = list_prev_entry(insn, list); - + for (; &insn->list != &file->insn_list && insn->sec == func->sec && insn->offset >= func->offset; insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { - if (insn->type == INSN_JUMP_DYNAMIC) + if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) break; /* allow small jumps within the range */ @@ -918,18 +933,29 @@ static struct rela *find_switch_table(struct objtool_file *file, if (!text_rela || text_rela->sym != file->rodata->sym) continue; + table_offset = text_rela->addend; + if (text_rela->type == R_X86_64_PC32) + table_offset += 4; + /* * Make sure the .rodata address isn't associated with a * symbol. gcc jump tables are anonymous data. */ - if (find_symbol_containing(file->rodata, text_rela->addend)) + if (find_symbol_containing(file->rodata, table_offset)) continue; - rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); - if (!rodata_rela) - continue; + rodata_rela = find_rela_by_dest(file->rodata, table_offset); + if (rodata_rela) { + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead + * code behind. + */ + if (text_rela->type == R_X86_64_PC32) + file->ignore_unreachables = true; - return rodata_rela; + return rodata_rela; + } } return NULL; @@ -943,7 +969,7 @@ static int add_func_switch_tables(struct objtool_file *file, struct rela *rela, *prev_rela = NULL; int ret; - func_for_each_insn(file, func, insn) { + func_for_each_insn_all(file, func, insn) { if (!last) last = insn; @@ -974,8 +1000,7 @@ static int add_func_switch_tables(struct objtool_file *file, * the beginning of another switch table in the same function. */ if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, - rela); + ret = add_switch_table(file, prev_jump, prev_rela, rela); if (ret) return ret; } @@ -985,7 +1010,7 @@ static int add_func_switch_tables(struct objtool_file *file, } if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); + ret = add_switch_table(file, prev_jump, prev_rela, NULL); if (ret) return ret; } @@ -1749,15 +1774,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, while (1) { next_insn = next_insn_same_sec(file, insn); - - if (file->c_file && func && insn->func && func != insn->func) { + if (file->c_file && func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", func->name, insn->func->name); return 1; } - if (insn->func) - func = insn->func; + func = insn->func ? insn->func->pfunc : NULL; if (func && insn->ignore) { WARN_FUNC("BUG: why am I validating an ignored function?", @@ -1778,7 +1801,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, i = insn; save_insn = NULL; - func_for_each_insn_continue_reverse(file, func, i) { + func_for_each_insn_continue_reverse(file, insn->func, i) { if (i->save) { save_insn = i; break; @@ -1865,7 +1888,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, case INSN_JUMP_UNCONDITIONAL: if (insn->jump_dest && (!func || !insn->jump_dest->func || - func == insn->jump_dest->func)) { + insn->jump_dest->func->pfunc == func)) { ret = validate_branch(file, insn->jump_dest, state); if (ret) @@ -2060,7 +2083,7 @@ static int validate_functions(struct objtool_file *file) for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) + if (func->type != STT_FUNC || func->pfunc != func) continue; insn = find_insn(file, sec, func->offset); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c1c338661699..4e60e105583e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_by_name(struct elf *elf, const char *name) +{ + struct section *sec; + struct symbol *sym; + + list_for_each_entry(sec, &elf->sections, list) + list_for_each_entry(sym, &sec->symbol_list, list) + if (!strcmp(sym->name, name)) + return sym; + + return NULL; +} + struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) { struct symbol *sym; @@ -203,10 +216,11 @@ static int read_sections(struct elf *elf) static int read_symbols(struct elf *elf) { - struct section *symtab; - struct symbol *sym; + struct section *symtab, *sec; + struct symbol *sym, *pfunc; struct list_head *entry, *tmp; int symbols_nr, i; + char *coldstr; symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { @@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf) hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); } + /* Create parent/child links for any cold subfunctions */ + list_for_each_entry(sec, &elf->sections, list) { + list_for_each_entry(sym, &sec->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + sym->pfunc = sym->cfunc = sym; + coldstr = strstr(sym->name, ".cold."); + if (coldstr) { + coldstr[0] = '\0'; + pfunc = find_symbol_by_name(elf, sym->name); + coldstr[0] = '.'; + + if (!pfunc) { + WARN("%s(): can't find parent function", + sym->name); + goto err; + } + + sym->pfunc = pfunc; + pfunc->cfunc = sym; + } + } + } + return 0; err: diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index d86e2ff14466..de5cd2ddded9 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -61,6 +61,7 @@ struct symbol { unsigned char bind, type; unsigned long offset; unsigned int len; + struct symbol *pfunc, *cfunc; }; struct rela { @@ -86,6 +87,7 @@ struct elf { struct elf *elf_open(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, |