diff options
36 files changed, 1564 insertions, 433 deletions
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index cef00d42ed5b..dd2f7b26ca30 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -65,7 +65,7 @@ that had to wait on lock acquisition. - CONFIGURATION -Lock statistics are enabled via CONFIG_LOCK_STATS. +Lock statistics are enabled via CONFIG_LOCK_STAT. - USAGE diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt new file mode 100644 index 000000000000..4aaf0dfb0cb8 --- /dev/null +++ b/Documentation/x86/early-microcode.txt @@ -0,0 +1,43 @@ +Early load microcode +==================== +By Fenghua Yu <fenghua.yu@intel.com> + +Kernel can update microcode in early phase of boot time. Loading microcode early +can fix CPU issues before they are observed during kernel boot time. + +Microcode is stored in an initrd file. The microcode is read from the initrd +file and loaded to CPUs during boot time. + +The format of the combined initrd image is microcode in cpio format followed by +the initrd image (maybe compressed). Kernel parses the combined initrd image +during boot time. The microcode file in cpio name space is: +kernel/x86/microcode/GenuineIntel.bin + +During BSP boot (before SMP starts), if the kernel finds the microcode file in +the initrd file, it parses the microcode and saves matching microcode in memory. +If matching microcode is found, it will be uploaded in BSP and later on in all +APs. + +The cached microcode patch is applied when CPUs resume from a sleep state. + +There are two legacy user space interfaces to load microcode, either through +/dev/cpu/microcode or through /sys/devices/system/cpu/microcode/reload file +in sysfs. + +In addition to these two legacy methods, the early loading method described +here is the third method with which microcode can be uploaded to a system's +CPUs. + +The following example script shows how to generate a new combined initrd file in +/boot/initrd-3.5.0.ucode.img with original microcode microcode.bin and +original initrd image /boot/initrd-3.5.0.img. + +mkdir initrd +cd initrd +mkdir kernel +mkdir kernel/x86 +mkdir kernel/x86/microcode +cp ../microcode.bin kernel/x86/microcode/GenuineIntel.bin +find .|cpio -oc >../ucode.cpio +cd .. +cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ff0e5f3c844e..4ebc7a6e6724 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1054,6 +1054,24 @@ config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE +config MICROCODE_INTEL_LIB + def_bool y + depends on MICROCODE_INTEL + +config MICROCODE_INTEL_EARLY + bool "Early load microcode" + depends on MICROCODE_INTEL && BLK_DEV_INITRD + default y + help + This option provides functionality to read additional microcode data + at the beginning of initrd image. The data tells kernel to load + microcode to CPU's as early as possible. No functional change if no + microcode data is glued to the initrd, therefore it's safe to say Y. + +config MICROCODE_EARLY + def_bool y + depends on MICROCODE_INTEL_EARLY + config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" ---help--- diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 43d921b4752c..6825e2efd1b4 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -57,4 +57,18 @@ static inline struct microcode_ops * __init init_amd_microcode(void) static inline void __exit exit_amd_microcode(void) {} #endif +#ifdef CONFIG_MICROCODE_EARLY +#define MAX_UCODE_COUNT 128 +extern void __init load_ucode_bsp(void); +extern __init void load_ucode_ap(void); +extern int __init save_microcode_in_initrd(void); +#else +static inline void __init load_ucode_bsp(void) {} +static inline __init void load_ucode_ap(void) {} +static inline int __init save_microcode_in_initrd(void) +{ + return 0; +} +#endif + #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h new file mode 100644 index 000000000000..5356f927d411 --- /dev/null +++ b/arch/x86/include/asm/microcode_intel.h @@ -0,0 +1,85 @@ +#ifndef _ASM_X86_MICROCODE_INTEL_H +#define _ASM_X86_MICROCODE_INTEL_H + +#include <asm/microcode.h> + +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int reserved[3]; +}; + +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[0]; +}; + +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[0]; +}; + +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) +#define DWSIZE (sizeof(u32)) + +#define get_totalsize(mc) \ + (((struct microcode_intel *)mc)->hdr.totalsize ? \ + ((struct microcode_intel *)mc)->hdr.totalsize : \ + DEFAULT_UCODE_TOTALSIZE) + +#define get_datasize(mc) \ + (((struct microcode_intel *)mc)->hdr.datasize ? \ + ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + +#define sigmatch(s1, s2, p1, p2) \ + (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) + +#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) + +extern int +get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev); +extern int microcode_sanity_check(void *mc, int print_err); +extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev); +extern int +update_match_revision(struct microcode_header_intel *mc_header, int rev); + +#ifdef CONFIG_MICROCODE_INTEL_EARLY +extern void __init load_ucode_intel_bsp(void); +extern void __cpuinit load_ucode_intel_ap(void); +extern void show_ucode_info_early(void); +#else +static inline __init void load_ucode_intel_bsp(void) {} +static inline __cpuinit void load_ucode_intel_ap(void) {} +static inline void show_ucode_info_early(void) {} +#endif + +#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +extern int save_mc_for_early(u8 *mc); +#else +static inline int save_mc_for_early(u8 *mc) +{ + return 0; +} +#endif + +#endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8277941cbe99..3270116b1488 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -180,6 +180,14 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void detect_extended_topology(struct cpuinfo_x86 *c); extern void detect_ht(struct cpuinfo_x86 *c); +#ifdef CONFIG_X86_32 +extern int have_cpuid_p(void); +#else +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6f414ed88620..6fd3fd769796 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -5,8 +5,6 @@ /* misc architecture specific prototypes */ -void early_idt_handler(void); - void system_call(void); void syscall_init(void); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0fee48e279cc..50a7fc0f824a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -20,10 +20,20 @@ static inline void __native_flush_tlb(void) native_write_cr3(native_read_cr3()); } +static inline void __native_flush_tlb_global_irq_disabled(void) +{ + unsigned long cr4; + + cr4 = native_read_cr4(); + /* clear PGE */ + native_write_cr4(cr4 & ~X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); +} + static inline void __native_flush_tlb_global(void) { unsigned long flags; - unsigned long cr4; /* * Read-modify-write to CR4 - protect it from preemption and @@ -32,11 +42,7 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - cr4 = native_read_cr4(); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); + __native_flush_tlb_global_irq_disabled(); raw_local_irq_restore(flags); } diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ac3b3d002833..7bd3bd310106 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -87,6 +87,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o +obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o +obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o +obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9c3ab43a6954..d814772c5bed 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -37,6 +37,8 @@ #include <asm/mce.h> #include <asm/msr.h> #include <asm/pat.h> +#include <asm/microcode.h> +#include <asm/microcode_intel.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/uv/uv.h> @@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag) } /* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) +int __cpuinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag) { return 1; } -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { } @@ -1223,6 +1220,12 @@ void __cpuinit cpu_init(void) int cpu; int i; + /* + * Load microcode on this cpu if a valid microcode is available. + * This is early microcode loading procedure. + */ + load_ucode_ap(); + cpu = stack_smp_processor_id(); t = &per_cpu(init_tss, cpu); oist = &per_cpu(orig_ist, cpu); @@ -1314,6 +1317,8 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct thread_struct *thread = &curr->thread; + show_ucode_info_early(); + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); for (;;) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 57334f4cd3af..c5e403f6d869 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,6 +26,7 @@ #include <asm/e820.h> #include <asm/bios_ebda.h> #include <asm/bootparam_utils.h> +#include <asm/microcode.h> /* * Manage page tables very early on. @@ -159,17 +160,17 @@ void __init x86_64_start_kernel(char * real_mode_data) /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { -#ifdef CONFIG_EARLY_PRINTK + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, &early_idt_handlers[i]); -#else - set_intr_gate(i, early_idt_handler); -#endif - } load_idt((const struct desc_ptr *)&idt_descr); copy_bootdata(__va(real_mode_data)); + /* + * Load microcode early on BSP. + */ + load_ucode_bsp(); + if (console_loglevel == 10) early_printk("Kernel alive\n"); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 3c3f58a0808f..73afd11799ca 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -144,6 +144,11 @@ ENTRY(startup_32) movl %eax, pa(olpc_ofw_pgd) #endif +#ifdef CONFIG_MICROCODE_EARLY + /* Early load ucode on BSP. */ + call load_ucode_bsp +#endif + /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond __brk_base. The variable @@ -299,6 +304,12 @@ ENTRY(startup_32_smp) movl %eax,%ss leal -__PAGE_OFFSET(%ecx),%esp +#ifdef CONFIG_MICROCODE_EARLY + /* Early load ucode on AP. */ + call load_ucode_ap +#endif + + default_entry: #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d94f6d68be2a..b7de3b25adb5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -336,6 +336,7 @@ early_idt_handlers: i = i + 1 .endr +/* This is global to keep gas from relaxing the jumps */ ENTRY(early_idt_handler) cld @@ -404,6 +405,7 @@ ENTRY(early_idt_handler) addq $16,%rsp # drop vector number and error code decl early_recursion_flag(%rip) INTERRUPT_RETURN +ENDPROC(early_idt_handler) __INITDATA diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 3a04b224d0c0..22db92bbdf1a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = { static void microcode_fini_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - microcode_ops->microcode_fini_cpu(cpu); - uci->valid = 0; } static enum ucode_state microcode_resume_cpu(int cpu) @@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu) static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) { enum ucode_state ustate; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (uci && uci->valid) + return UCODE_OK; if (collect_cpu_info(cpu)) return UCODE_ERROR; diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c new file mode 100644 index 000000000000..577db8417d15 --- /dev/null +++ b/arch/x86/kernel/microcode_core_early.c @@ -0,0 +1,76 @@ +/* + * X86 CPU microcode early update for Linux + * + * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> + * H Peter Anvin" <hpa@zytor.com> + * + * This driver allows to early upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture + * Software Developer's Manual. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> +#include <asm/microcode_intel.h> +#include <asm/processor.h> + +#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) +#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') +#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') +#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') +#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') +#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') +#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') + +#define CPUID_IS(a, b, c, ebx, ecx, edx) \ + (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) + +/* + * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. + * x86_vendor() gets vendor id for BSP. + * + * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify + * coding, we still use x86_vendor() to get vendor id for AP. + * + * x86_vendor() gets vendor information directly through cpuid. + */ +static int __cpuinit x86_vendor(void) +{ + u32 eax = 0x00000000; + u32 ebx, ecx = 0, edx; + + if (!have_cpuid_p()) + return X86_VENDOR_UNKNOWN; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) + return X86_VENDOR_INTEL; + + if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) + return X86_VENDOR_AMD; + + return X86_VENDOR_UNKNOWN; +} + +void __init load_ucode_bsp(void) +{ + int vendor = x86_vendor(); + + if (vendor == X86_VENDOR_INTEL) + load_ucode_intel_bsp(); +} + +void __cpuinit load_ucode_ap(void) +{ + int vendor = x86_vendor(); + + if (vendor == X86_VENDOR_INTEL) + load_ucode_intel_ap(); +} diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3544aed39338..5fb2cebf556b 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -79,7 +79,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> -#include <asm/microcode.h> +#include <asm/microcode_intel.h> #include <asm/processor.h> #include <asm/msr.h> @@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); MODULE_LICENSE("GPL"); -struct microcode_header_intel { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int reserved[3]; -}; - -struct microcode_intel { - struct microcode_header_intel hdr; - unsigned int bits[0]; -}; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[0]; -}; - -#define DEFAULT_UCODE_DATASIZE (2000) -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) -#define DWSIZE (sizeof(u32)) - -#define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.totalsize ? \ - ((struct microcode_intel *)mc)->hdr.totalsize : \ - DEFAULT_UCODE_TOTALSIZE) - -#define get_datasize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); @@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) -{ - return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; -} - -static inline int -update_match_revision(struct microcode_header_intel *mc_header, int rev) -{ - return (mc_header->rev <= rev) ? 0 : 1; -} - -static int microcode_sanity_check(void *mc) -{ - unsigned long total_size, data_size, ext_table_size; - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - int sum, orig_sum, ext_sigcount = 0, i; - struct extended_signature *ext_sig; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - - if (data_size + MC_HEADER_SIZE > total_size) { - pr_err("error! Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - pr_err("error! Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - pr_err("error! Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - pr_err("error! Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - pr_warning("aborting, bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - /* * return 0 - no update found * return 1 - found update */ -static int -get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) +static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) { - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - - if (!update_match_revision(mc_header, rev)) - return 0; - - if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) - return 1; + struct cpu_signature cpu_sig; + unsigned int csig, cpf, crev; - /* Look for ext. headers: */ - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; + collect_cpu_info(cpu, &cpu_sig); - ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + csig = cpu_sig.sig; + cpf = cpu_sig.pf; + crev = cpu_sig.rev; - for (i = 0; i < ext_sigcount; i++) { - if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) - return 1; - ext_sig++; - } - return 0; + return get_matching_microcode(csig, cpf, mc_intel, crev); } -static int apply_microcode(int cpu) +int apply_microcode(int cpu) { struct microcode_intel *mc_intel; struct ucode_cpu_info *uci; @@ -300,6 +144,14 @@ static int apply_microcode(int cpu) if (mc_intel == NULL) return 0; + /* + * Microcode on this CPU could be updated earlier. Only apply the + * microcode patch in mc_intel when it is newer than the one on this + * CPU. + */ + if (get_matching_mc(mc_intel, cpu) == 0) + return 0; + /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, (unsigned long) mc_intel->bits, @@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, unsigned int leftover = size; enum ucode_state state = UCODE_OK; unsigned int curr_mc_size = 0; + unsigned int csig, cpf; while (leftover) { struct microcode_header_intel mc_header; @@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, } if (get_ucode_data(mc, ucode_ptr, mc_size) || - microcode_sanity_check(mc) < 0) { + microcode_sanity_check(mc, 1) < 0) { break; } - if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { + csig = uci->cpu_sig.sig; + cpf = uci->cpu_sig.pf; + if (get_matching_microcode(csig, cpf, mc, new_rev)) { vfree(new_mc); new_rev = mc_header.rev; new_mc = mc; @@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, vfree(uci->mc); uci->mc = (struct microcode_intel *)new_mc; + /* + * If early loading microcode is supported, save this mc into + * permanent memory. So it will be loaded early when a CPU is hot added + * or resumes. + */ + save_mc_for_early(new_mc); + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); out: diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c new file mode 100644 index 000000000000..7890bc838952 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_early.c @@ -0,0 +1,796 @@ +/* + * Intel CPU microcode early update for Linux + * + * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> + * H Peter Anvin" <hpa@zytor.com> + * + * This allows to early upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture + * Software Developer's Manual. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/earlycpio.h> +#include <linux/initrd.h> +#include <linux/cpu.h> +#include <asm/msr.h> +#include <asm/microcode_intel.h> +#include <asm/processor.h> +#include <asm/tlbflush.h> +#include <asm/setup.h> + +unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +struct mc_saved_data { + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; +} mc_saved_data; + +static enum ucode_state __cpuinit +generic_load_microcode_early(struct microcode_intel **mc_saved_p, + unsigned int mc_saved_count, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *ucode_ptr, *new_mc = NULL; + int new_rev = uci->cpu_sig.rev; + enum ucode_state state = UCODE_OK; + unsigned int mc_size; + struct microcode_header_intel *mc_header; + unsigned int csig = uci->cpu_sig.sig; + unsigned int cpf = uci->cpu_sig.pf; + int i; + + for (i = 0; i < mc_saved_count; i++) { + ucode_ptr = mc_saved_p[i]; + + mc_header = (struct microcode_header_intel *)ucode_ptr; + mc_size = get_totalsize(mc_header); + if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) { + new_rev = mc_header->rev; + new_mc = ucode_ptr; + } + } + + if (!new_mc) { + state = UCODE_NFOUND; + goto out; + } + + uci->mc = (struct microcode_intel *)new_mc; +out: + return state; +} + +static void __cpuinit +microcode_pointer(struct microcode_intel **mc_saved, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start, int mc_saved_count) +{ + int i; + + for (i = 0; i < mc_saved_count; i++) + mc_saved[i] = (struct microcode_intel *) + (mc_saved_in_initrd[i] + initrd_start); +} + +#ifdef CONFIG_X86_32 +static void __cpuinit +microcode_phys(struct microcode_intel **mc_saved_tmp, + struct mc_saved_data *mc_saved_data) +{ + int i; + struct microcode_intel ***mc_saved; + + mc_saved = (struct microcode_intel ***) + __pa_symbol(&mc_saved_data->mc_saved); + for (i = 0; i < mc_saved_data->mc_saved_count; i++) { + struct microcode_intel *p; + + p = *(struct microcode_intel **) + __pa(mc_saved_data->mc_saved + i); + mc_saved_tmp[i] = (struct microcode_intel *)__pa(p); + } +} +#endif + +static enum ucode_state __cpuinit +load_microcode(struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int count = mc_saved_data->mc_saved_count; + + if (!mc_saved_data->mc_saved) { + microcode_pointer(mc_saved_tmp, mc_saved_in_initrd, + initrd_start, count); + + return generic_load_microcode_early(mc_saved_tmp, count, uci); + } else { +#ifdef CONFIG_X86_32 + microcode_phys(mc_saved_tmp, mc_saved_data); + return generic_load_microcode_early(mc_saved_tmp, count, uci); +#else + return generic_load_microcode_early(mc_saved_data->mc_saved, + count, uci); +#endif + } +} + +static u8 get_x86_family(unsigned long sig) +{ + u8 x86; + + x86 = (sig >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (sig >> 20) & 0xff; + + return x86; +} + +static u8 get_x86_model(unsigned long sig) +{ + u8 x86, x86_model; + + x86 = get_x86_family(sig); + x86_model = (sig >> 4) & 0xf; + + if (x86 == 0x6 || x86 == 0xf) + x86_model += ((sig >> 16) & 0xf) << 4; + + return x86_model; +} + +/* + * Given CPU signature and a microcode patch, this function finds if the + * microcode patch has matching family and model with the CPU. + */ +static enum ucode_state +matching_model_microcode(struct microcode_header_intel *mc_header, + unsigned long sig) +{ + u8 x86, x86_model; + u8 x86_ucode, x86_model_ucode; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + unsigned long data_size = get_datasize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + + x86 = get_x86_family(sig); + x86_model = get_x86_model(sig); + + x86_ucode = get_x86_family(mc_header->sig); + x86_model_ucode = get_x86_model(mc_header->sig); + + if (x86 == x86_ucode && x86_model == x86_model_ucode) + return UCODE_OK; + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + return UCODE_NFOUND; + + ext_header = (struct extended_sigtable *) + mc_header + data_size + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (i = 0; i < ext_sigcount; i++) { + x86_ucode = get_x86_family(ext_sig->sig); + x86_model_ucode = get_x86_model(ext_sig->sig); + + if (x86 == x86_ucode && x86_model == x86_model_ucode) + return UCODE_OK; + + ext_sig++; + } + + return UCODE_NFOUND; +} + +static int +save_microcode(struct mc_saved_data *mc_saved_data, + struct microcode_intel **mc_saved_src, + unsigned int mc_saved_count) +{ + int i, j; + struct microcode_intel **mc_saved_p; + int ret; + + if (!mc_saved_count) + return -EINVAL; + + /* + * Copy new microcode data. + */ + mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *), + GFP_KERNEL); + if (!mc_saved_p) + return -ENOMEM; + + for (i = 0; i < mc_saved_count; i++) { + struct microcode_intel *mc = mc_saved_src[i]; + struct microcode_header_intel *mc_header = &mc->hdr; + unsigned long mc_size = get_totalsize(mc_header); + mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL); + if (!mc_saved_p[i]) { + ret = -ENOMEM; + goto err; + } + if (!mc_saved_src[i]) { + ret = -EINVAL; + goto err; + } + memcpy(mc_saved_p[i], mc, mc_size); + } + + /* + * Point to newly saved microcode. + */ + mc_saved_data->mc_saved = mc_saved_p; + mc_saved_data->mc_saved_count = mc_saved_count; + + return 0; + +err: + for (j = 0; j <= i; j++) + kfree(mc_saved_p[j]); + kfree(mc_saved_p); + + return ret; +} + +/* + * A microcode patch in ucode_ptr is saved into mc_saved + * - if it has matching signature and newer revision compared to an existing + * patch mc_saved. + * - or if it is a newly discovered microcode patch. + * + * The microcode patch should have matching model with CPU. + */ +static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr, + unsigned int *mc_saved_count_p) +{ + int i; + int found = 0; + unsigned int mc_saved_count = *mc_saved_count_p; + struct microcode_header_intel *mc_header; + + mc_header = (struct microcode_header_intel *)ucode_ptr; + for (i = 0; i < mc_saved_count; i++) { + unsigned int sig, pf; + unsigned int new_rev; + struct microcode_header_intel *mc_saved_header = + (struct microcode_header_intel *)mc_saved[i]; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + new_rev = mc_header->rev; + + if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) { + found = 1; + if (update_match_revision(mc_header, new_rev)) { + /* + * Found an older ucode saved before. + * Replace the older one with this newer + * one. + */ + mc_saved[i] = + (struct microcode_intel *)ucode_ptr; + break; + } + } + } + if (i >= mc_saved_count && !found) + /* + * This ucode is first time discovered in ucode file. + * Save it to memory. + */ + mc_saved[mc_saved_count++] = + (struct microcode_intel *)ucode_ptr; + + *mc_saved_count_p = mc_saved_count; +} + +/* + * Get microcode matching with BSP's model. Only CPUs with the same model as + * BSP can stay in the platform. + */ +static enum ucode_state __init +get_matching_model_microcode(int cpu, unsigned long start, + void *data, size_t size, + struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + struct ucode_cpu_info *uci) +{ + u8 *ucode_ptr = data; + unsigned int leftover = size; + enum ucode_state state = UCODE_OK; + unsigned int mc_size; + struct microcode_header_intel *mc_header; + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count = mc_saved_data->mc_saved_count; + int i; + + while (leftover) { + mc_header = (struct microcode_header_intel *)ucode_ptr; + + mc_size = get_totalsize(mc_header); + if (!mc_size || mc_size > leftover || + microcode_sanity_check(ucode_ptr, 0) < 0) + break; + + leftover -= mc_size; + + /* + * Since APs with same family and model as the BSP may boot in + * the platform, we need to find and save microcode patches + * with the same family and model as the BSP. + */ + if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != + UCODE_OK) { + ucode_ptr += mc_size; + continue; + } + + _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count); + + ucode_ptr += mc_size; + } + + if (leftover) { + state = UCODE_ERROR; + goto out; + } + + if (mc_saved_count == 0) { + state = UCODE_NFOUND; + goto out; + } + + for (i = 0; i < mc_saved_count; i++) + mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; + + mc_saved_data->mc_saved_count = mc_saved_count; +out: + return state; +} + +#define native_rdmsr(msr, val1, val2) \ +do { \ + u64 __val = native_read_msr((msr)); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ +} while (0) + +#define native_wrmsr(msr, low, high) \ + native_write_msr(msr, low, high); + +static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci) +{ + unsigned int val[2]; + u8 x86, x86_model; + struct cpu_signature csig; + unsigned int eax, ebx, ecx, edx; + + csig.sig = 0; + csig.pf = 0; + csig.rev = 0; + + memset(uci, 0, sizeof(*uci)); + + eax = 0x00000001; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + csig.sig = eax; + + x86 = get_x86_family(csig.sig); + x86_model = get_x86_model(csig.sig); + + if ((x86_model >= 5) || (x86 > 6)) { + /* get processor flags from MSR 0x17 */ + native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + csig.pf = 1 << ((val[1] >> 18) & 7); + } + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + csig.rev = val[1]; + + uci->cpu_sig = csig; + uci->valid = 1; + + return 0; +} + +#ifdef DEBUG +static void __ref show_saved_mc(void) +{ + int i, j; + unsigned int sig, pf, rev, total_size, data_size, date; + struct ucode_cpu_info uci; + + if (mc_saved_data.mc_saved_count == 0) { + pr_debug("no micorcode data saved.\n"); + return; + } + pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); + + collect_cpu_info_early(&uci); + + sig = uci.cpu_sig.sig; + pf = uci.cpu_sig.pf; + rev = uci.cpu_sig.rev; + pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n", + smp_processor_id(), sig, pf, rev); + + for (i = 0; i < mc_saved_data.mc_saved_count; i++) { + struct microcode_header_intel *mc_saved_header; + struct extended_sigtable *ext_header; + int ext_sigcount; + struct extended_signature *ext_sig; + + mc_saved_header = (struct microcode_header_intel *) + mc_saved_data.mc_saved[i]; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + rev = mc_saved_header->rev; + total_size = get_totalsize(mc_saved_header); + data_size = get_datasize(mc_saved_header); + date = mc_saved_header->date; + + pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", + i, sig, pf, rev, total_size, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + continue; + + ext_header = (struct extended_sigtable *) + mc_saved_header + data_size + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (j = 0; j < ext_sigcount; j++) { + sig = ext_sig->sig; + pf = ext_sig->pf; + + pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", + j, sig, pf); + + ext_sig++; + } + + } +} +#else +static inline void show_saved_mc(void) +{ +} +#endif + +#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +/* + * Save this mc into mc_saved_data. So it will be loaded early when a CPU is + * hot added or resumes. + * + * Please make sure this mc should be a valid microcode patch before calling + * this function. + */ +int save_mc_for_early(u8 *mc) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count_init; + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; + int ret = 0; + int i; + + /* + * Hold hotplug lock so mc_saved_data is not accessed by a CPU in + * hotplug. + */ + cpu_hotplug_driver_lock(); + + mc_saved_count_init = mc_saved_data.mc_saved_count; + mc_saved_count = mc_saved_data.mc_saved_count; + mc_saved = mc_saved_data.mc_saved; + + if (mc_saved && mc_saved_count) + memcpy(mc_saved_tmp, mc_saved, + mc_saved_count * sizeof(struct mirocode_intel *)); + /* + * Save the microcode patch mc in mc_save_tmp structure if it's a newer + * version. + */ + + _save_mc(mc_saved_tmp, mc, &mc_saved_count); + + /* + * Save the mc_save_tmp in global mc_saved_data. + */ + ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); + if (ret) { + pr_err("Can not save microcode patch.\n"); + goto out; + } + + show_saved_mc(); + + /* + * Free old saved microcod data. + */ + if (mc_saved) { + for (i = 0; i < mc_saved_count_init; i++) + kfree(mc_saved[i]); + kfree(mc_saved); + } + +out: + cpu_hotplug_driver_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(save_mc_for_early); +#endif + +static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; +static __init enum ucode_state +scan_microcode(unsigned long start, unsigned long end, + struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + struct ucode_cpu_info *uci) +{ + unsigned int size = end - start + 1; + struct cpio_data cd; + long offset = 0; +#ifdef CONFIG_X86_32 + char *p = (char *)__pa_symbol(ucode_name); +#else + char *p = ucode_name; +#endif + + cd.data = NULL; + cd.size = 0; + + cd = find_cpio_data(p, (void *)start, size, &offset); + if (!cd.data) + return UCODE_ERROR; + + + return get_matching_model_microcode(0, start, cd.data, cd.size, + mc_saved_data, mc_saved_in_initrd, + uci); +} + +/* + * Print ucode update info. + */ +static void __cpuinit +print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) +{ + int cpu = smp_processor_id(); + + pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", + cpu, + uci->cpu_sig.rev, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); +} + +#ifdef CONFIG_X86_32 + +static int delay_ucode_info; +static int current_mc_date; + +/* + * Print early updated ucode info after printk works. This is delayed info dump. + */ +void __cpuinit show_ucode_info_early(void) +{ + struct ucode_cpu_info uci; + + if (delay_ucode_info) { + collect_cpu_info_early(&uci); + print_ucode_info(&uci, current_mc_date); + delay_ucode_info = 0; + } +} + +/* + * At this point, we can not call printk() yet. Keep microcode patch number in + * mc_saved_data.mc_saved and delay printing microcode info in + * show_ucode_info_early() until printk() works. + */ +static void __cpuinit print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + int *delay_ucode_info_p; + int *current_mc_date_p; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info); + current_mc_date_p = (int *)__pa_symbol(¤t_mc_date); + + *delay_ucode_info_p = 1; + *current_mc_date_p = mc_intel->hdr.date; +} +#else + +/* + * Flush global tlb. We only do this in x86_64 where paging has been enabled + * already and PGE should be enabled as well. + */ +static inline void __cpuinit flush_tlb_early(void) +{ + __native_flush_tlb_global_irq_disabled(); +} + +static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + print_ucode_info(uci, mc_intel->hdr.date); +} +#endif + +static int apply_microcode_early(struct mc_saved_data *mc_saved_data, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + unsigned int val[2]; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return 0; + + /* write microcode via MSR 0x79 */ + native_wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) mc_intel->bits, + (unsigned long) mc_intel->bits >> 16 >> 16); + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (val[1] != mc_intel->hdr.rev) + return -1; + +#ifdef CONFIG_X86_64 + /* Flush global tlb. This is precaution. */ + flush_tlb_early(); +#endif + uci->cpu_sig.rev = val[1]; + + print_ucode(uci); + + return 0; +} + +/* + * This function converts microcode patch offsets previously stored in + * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. + */ +int __init save_microcode_in_initrd(void) +{ + unsigned int count = mc_saved_data.mc_saved_count; + struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; + int ret = 0; + + if (count == 0) + return ret; + + microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); + ret = save_microcode(&mc_saved_data, mc_saved, count); + if (ret) + pr_err("Can not save microcod patches from initrd"); + + show_saved_mc(); + + return ret; +} + +static void __init +_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start_early, + unsigned long initrd_end_early, + struct ucode_cpu_info *uci) +{ + collect_cpu_info_early(uci); + scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, + mc_saved_in_initrd, uci); + load_microcode(mc_saved_data, mc_saved_in_initrd, + initrd_start_early, uci); + apply_microcode_early(mc_saved_data, uci); +} + +void __init +load_ucode_intel_bsp(void) +{ + u64 ramdisk_image, ramdisk_size; + unsigned long initrd_start_early, initrd_end_early; + struct ucode_cpu_info uci; +#ifdef CONFIG_X86_32 + struct boot_params *boot_params_p; + + boot_params_p = (struct boot_params *)__pa_symbol(&boot_params); + ramdisk_image = boot_params_p->hdr.ramdisk_image; + ramdisk_size = boot_params_p->hdr.ramdisk_size; + initrd_start_early = ramdisk_image; + initrd_end_early = initrd_start_early + ramdisk_size; + + _load_ucode_intel_bsp( + (struct mc_saved_data *)__pa_symbol(&mc_saved_data), + (unsigned long *)__pa_symbol(&mc_saved_in_initrd), + initrd_start_early, initrd_end_early, &uci); +#else + ramdisk_image = boot_params.hdr.ramdisk_image; + ramdisk_size = boot_params.hdr.ramdisk_size; + initrd_start_early = ramdisk_image + PAGE_OFFSET; + initrd_end_early = initrd_start_early + ramdisk_size; + + _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, + initrd_start_early, initrd_end_early, &uci); +#endif +} + +void __cpuinit load_ucode_intel_ap(void) +{ + struct mc_saved_data *mc_saved_data_p; + struct ucode_cpu_info uci; + unsigned long *mc_saved_in_initrd_p; + unsigned long initrd_start_addr; +#ifdef CONFIG_X86_32 + unsigned long *initrd_start_p; + + mc_saved_in_initrd_p = + (unsigned long *)__pa_symbol(mc_saved_in_initrd); + mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data); + initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start); + initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p); +#else + mc_saved_data_p = &mc_saved_data; + mc_saved_in_initrd_p = mc_saved_in_initrd; + initrd_start_addr = initrd_start; +#endif + + /* + * If there is no valid ucode previously saved in memory, no need to + * update ucode on this AP. + */ + if (mc_saved_data_p->mc_saved_count == 0) + return; + + collect_cpu_info_early(&uci); + load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, + initrd_start_addr, &uci); + apply_microcode_early(mc_saved_data_p, &uci); +} diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c new file mode 100644 index 000000000000..ce69320d0179 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_lib.c @@ -0,0 +1,174 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> + * H Peter Anvin" <hpa@zytor.com> + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/Assets/PDF/manual/253668.pdf + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <linux/firmware.h> +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/microcode_intel.h> +#include <asm/processor.h> +#include <asm/msr.h> + +static inline int +update_match_cpu(unsigned int csig, unsigned int cpf, + unsigned int sig, unsigned int pf) +{ + return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; +} + +int +update_match_revision(struct microcode_header_intel *mc_header, int rev) +{ + return (mc_header->rev <= rev) ? 0 : 1; +} + +int microcode_sanity_check(void *mc, int print_err) +{ + unsigned long total_size, data_size, ext_table_size; + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + int sum, orig_sum, ext_sigcount = 0, i; + struct extended_signature *ext_sig; + + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); + + if (data_size + MC_HEADER_SIZE > total_size) { + if (print_err) + pr_err("error! Bad data size in microcode data file\n"); + return -EINVAL; + } + + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + if (print_err) + pr_err("error! Unknown microcode update format\n"); + return -EINVAL; + } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + if (print_err) + pr_err("error! Small exttable size in microcode data file\n"); + return -EINVAL; + } + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + if (print_err) + pr_err("error! Bad exttable size in microcode data file\n"); + return -EFAULT; + } + ext_sigcount = ext_header->count; + } + + /* check extended table checksum */ + if (ext_table_size) { + int ext_table_sum = 0; + int *ext_tablep = (int *)ext_header; + + i = ext_table_size / DWSIZE; + while (i--) + ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { + if (print_err) + pr_warn("aborting, bad extended signature table checksum\n"); + return -EINVAL; + } + } + + /* calculate the checksum */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / DWSIZE; + while (i--) + orig_sum += ((int *)mc)[i]; + if (orig_sum) { + if (print_err) + pr_err("aborting, bad checksum\n"); + return -EINVAL; + } + if (!ext_table_size) + return 0; + /* check extended signature checksum */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + sum = orig_sum + - (mc_header->sig + mc_header->pf + mc_header->cksum) + + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + if (print_err) + pr_err("aborting, bad checksum\n"); + return -EINVAL; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(microcode_sanity_check); + +/* + * return 0 - no update found + * return 1 - found update + */ +int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) +{ + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + + if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf)) + return 1; + + /* Look for ext. headers: */ + if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) + return 0; + + ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (i = 0; i < ext_sigcount; i++) { + if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf)) + return 1; + ext_sig++; + } + return 0; +} + +/* + * return 0 - no update found + * return 1 - found update + */ +int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev) +{ + struct microcode_header_intel *mc_header = mc; + + if (!update_match_revision(mc_header, rev)) + return 0; + + return get_matching_sig(csig, cpf, mc, rev); +} +EXPORT_SYMBOL_GPL(get_matching_microcode); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d41815265a0b..4903a03ae876 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -16,6 +16,7 @@ #include <asm/tlb.h> #include <asm/proto.h> #include <asm/dma.h> /* for MAX_DMA_PFN */ +#include <asm/microcode.h> #include "mm_internal.h" @@ -534,6 +535,15 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { +#ifdef CONFIG_MICROCODE_EARLY + /* + * Remember, initrd memory may contain microcode or other useful things. + * Before we lose initrd mem, we need to find a place to hold them + * now that normal virtual memory is enabled. + */ + save_microcode_in_initrd(); +#endif + /* * end could be not aligned, and We can not align that, * decompresser could be confused by aligned initrd_end diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f5e86eee4e0e..e8e34938c57d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1408,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) xen_mc_callback(set_current_cr3, (void *)cr3); } } - static void xen_write_cr3(unsigned long cr3) { BUG_ON(preemptible()); @@ -1434,6 +1433,45 @@ static void xen_write_cr3(unsigned long cr3) xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } +#ifdef CONFIG_X86_64 +/* + * At the start of the day - when Xen launches a guest, it has already + * built pagetables for the guest. We diligently look over them + * in xen_setup_kernel_pagetable and graft as appropiate them in the + * init_level4_pgt and its friends. Then when we are happy we load + * the new init_level4_pgt - and continue on. + * + * The generic code starts (start_kernel) and 'init_mem_mapping' sets + * up the rest of the pagetables. When it has completed it loads the cr3. + * N.B. that baremetal would start at 'start_kernel' (and the early + * #PF handler would create bootstrap pagetables) - so we are running + * with the same assumptions as what to do when write_cr3 is executed + * at this point. + * + * Since there are no user-page tables at all, we have two variants + * of xen_write_cr3 - the early bootup (this one), and the late one + * (xen_write_cr3). The reason we have to do that is that in 64-bit + * the Linux kernel and user-space are both in ring 3 while the + * hypervisor is in ring 0. + */ +static void __init xen_write_cr3_init(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + this_cpu_write(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + + pv_mmu_ops.write_cr3 = &xen_write_cr3; +} +#endif + static int xen_pgd_alloc(struct mm_struct *mm) { pgd_t *pgd = mm->pgd; @@ -2102,11 +2140,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .write_cr2 = xen_write_cr2, .read_cr3 = xen_read_cr3, -#ifdef CONFIG_X86_32 .write_cr3 = xen_write_cr3_init, -#else - .write_cr3 = xen_write_cr3, -#endif .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, diff --git a/drivers/char/random.c b/drivers/char/random.c index 85e81ec1451e..594bda9dcfc8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -445,7 +445,7 @@ static struct entropy_store input_pool = { .poolinfo = &poolinfo_table[0], .name = "input", .limit = 1, - .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; @@ -454,7 +454,7 @@ static struct entropy_store blocking_pool = { .name = "blocking", .limit = 1, .pull = &input_pool, - .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), + .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), .pool = blocking_pool_data }; @@ -462,7 +462,7 @@ static struct entropy_store nonblocking_pool = { .poolinfo = &poolinfo_table[1], .name = "nonblocking", .pull = &input_pool, - .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), + .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), .pool = nonblocking_pool_data }; diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index fa080ebd568f..ffeebc7e9f1c 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c @@ -75,7 +75,7 @@ static unsigned long past_skip; static struct pci_dev *fbd_dev; -static spinlock_t i7300_idle_lock; +static raw_spinlock_t i7300_idle_lock; static int i7300_idle_active; static u8 i7300_idle_thrtctl_saved; @@ -457,7 +457,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, idle_begin_time = ktime_get(); } - spin_lock_irqsave(&i7300_idle_lock, flags); + raw_spin_lock_irqsave(&i7300_idle_lock, flags); if (val == IDLE_START) { cpumask_set_cpu(smp_processor_id(), idle_cpumask); @@ -506,7 +506,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, } } end: - spin_unlock_irqrestore(&i7300_idle_lock, flags); + raw_spin_unlock_irqrestore(&i7300_idle_lock, flags); return 0; } @@ -548,7 +548,7 @@ struct debugfs_file_info { static int __init i7300_idle_init(void) { - spin_lock_init(&i7300_idle_lock); + raw_spin_lock_init(&i7300_idle_lock); total_us = 0; if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c index 3bc244d2636a..a62c4a47d52c 100644 --- a/drivers/usb/chipidea/debug.c +++ b/drivers/usb/chipidea/debug.c @@ -222,7 +222,7 @@ static struct { } dbg_data = { .idx = 0, .tty = 0, - .lck = __RW_LOCK_UNLOCKED(lck) + .lck = __RW_LOCK_UNLOCKED(dbg_data.lck) }; /** diff --git a/fs/file.c b/fs/file.c index 2b3570b7caeb..3906d9577a18 100644 --- a/fs/file.c +++ b/fs/file.c @@ -516,7 +516,7 @@ struct files_struct init_files = { .close_on_exec = init_files.close_on_exec_init, .open_fds = init_files.open_fds_init, }, - .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), + .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), }; /* diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index 2533fddd34a6..d8d4c898c1bb 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h @@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, if (size == 8 && sizeof(unsigned long) != 8) wrong_size_cmpxchg(ptr); - local_irq_save(flags); + raw_local_irq_save(flags); switch (size) { case 1: prev = *(u8 *)ptr; if (prev == old) @@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, default: wrong_size_cmpxchg(ptr); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return prev; } @@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr, u64 prev; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prev = *(u64 *)ptr; if (prev == old) *(u64 *)ptr = new; - local_irq_restore(flags); + raw_local_irq_restore(flags); return prev; } diff --git a/include/linux/idr.h b/include/linux/idr.h index de7e190f1af4..e5eb125effe6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -136,7 +136,7 @@ struct ida { struct ida_bitmap *free_bitmap; }; -#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } +#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) int ida_pre_get(struct ida *ida, gfp_t gfp_mask); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index bfe88c4aa251..f1e877b79ed8 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -412,7 +412,7 @@ struct lock_class_key { }; #define lockdep_depth(tsk) (0) -#define lockdep_assert_held(l) do { } while (0) +#define lockdep_assert_held(l) do { (void)(l); } while (0) #define lockdep_recursing(tsk) (0) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 600060e25ec6..18299057402f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -30,92 +30,12 @@ #include <linux/preempt.h> #include <asm/processor.h> -typedef struct { - unsigned sequence; - spinlock_t lock; -} seqlock_t; - -/* - * These macros triggered gcc-3.x compile-time problems. We think these are - * OK now. Be cautious. - */ -#define __SEQLOCK_UNLOCKED(lockname) \ - { 0, __SPIN_LOCK_UNLOCKED(lockname) } - -#define seqlock_init(x) \ - do { \ - (x)->sequence = 0; \ - spin_lock_init(&(x)->lock); \ - } while (0) - -#define DEFINE_SEQLOCK(x) \ - seqlock_t x = __SEQLOCK_UNLOCKED(x) - -/* Lock out other writers and update the count. - * Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. - */ -static inline void write_seqlock(seqlock_t *sl) -{ - spin_lock(&sl->lock); - ++sl->sequence; - smp_wmb(); -} - -static inline void write_sequnlock(seqlock_t *sl) -{ - smp_wmb(); - sl->sequence++; - spin_unlock(&sl->lock); -} - -static inline int write_tryseqlock(seqlock_t *sl) -{ - int ret = spin_trylock(&sl->lock); - - if (ret) { - ++sl->sequence; - smp_wmb(); - } - return ret; -} - -/* Start of read calculation -- fetch last complete writer token */ -static __always_inline unsigned read_seqbegin(const seqlock_t *sl) -{ - unsigned ret; - -repeat: - ret = ACCESS_ONCE(sl->sequence); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - - return ret; -} - -/* - * Test if reader processed invalid data. - * - * If sequence value changed then writer changed data while in section. - */ -static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) -{ - smp_rmb(); - - return unlikely(sl->sequence != start); -} - - /* * Version using sequence counter only. * This can be used when code has its own mutex protecting the * updating starting before the write_seqcountbeqin() and ending * after the write_seqcount_end(). */ - typedef struct seqcount { unsigned sequence; } seqcount_t; @@ -218,7 +138,6 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) { smp_rmb(); - return __read_seqcount_retry(s, start); } @@ -252,31 +171,101 @@ static inline void write_seqcount_barrier(seqcount_t *s) s->sequence+=2; } +typedef struct { + struct seqcount seqcount; + spinlock_t lock; +} seqlock_t; + /* - * Possible sw/hw IRQ protected versions of the interfaces. + * These macros triggered gcc-3.x compile-time problems. We think these are + * OK now. Be cautious. */ -#define write_seqlock_irqsave(lock, flags) \ - do { local_irq_save(flags); write_seqlock(lock); } while (0) -#define write_seqlock_irq(lock) \ - do { local_irq_disable(); write_seqlock(lock); } while (0) -#define write_seqlock_bh(lock) \ - do { local_bh_disable(); write_seqlock(lock); } while (0) +#define __SEQLOCK_UNLOCKED(lockname) \ + { \ + .seqcount = SEQCNT_ZERO, \ + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ + } + +#define seqlock_init(x) \ + do { \ + seqcount_init(&(x)->seqcount); \ + spin_lock_init(&(x)->lock); \ + } while (0) -#define write_sequnlock_irqrestore(lock, flags) \ - do { write_sequnlock(lock); local_irq_restore(flags); } while(0) -#define write_sequnlock_irq(lock) \ - do { write_sequnlock(lock); local_irq_enable(); } while(0) -#define write_sequnlock_bh(lock) \ - do { write_sequnlock(lock); local_bh_enable(); } while(0) +#define DEFINE_SEQLOCK(x) \ + seqlock_t x = __SEQLOCK_UNLOCKED(x) -#define read_seqbegin_irqsave(lock, flags) \ - ({ local_irq_save(flags); read_seqbegin(lock); }) +/* + * Read side functions for starting and finalizing a read side section. + */ +static inline unsigned read_seqbegin(const seqlock_t *sl) +{ + return read_seqcount_begin(&sl->seqcount); +} -#define read_seqretry_irqrestore(lock, iv, flags) \ - ({ \ - int ret = read_seqretry(lock, iv); \ - local_irq_restore(flags); \ - ret; \ - }) +static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) +{ + return read_seqcount_retry(&sl->seqcount, start); +} + +/* + * Lock out other writers and update the count. + * Acts like a normal spin_lock/unlock. + * Don't need preempt_disable() because that is in the spin_lock already. + */ +static inline void write_seqlock(seqlock_t *sl) +{ + spin_lock(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock(&sl->lock); +} + +static inline void write_seqlock_bh(seqlock_t *sl) +{ + spin_lock_bh(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock_bh(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_bh(&sl->lock); +} + +static inline void write_seqlock_irq(seqlock_t *sl) +{ + spin_lock_irq(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock_irq(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_irq(&sl->lock); +} + +static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) +{ + unsigned long flags; + + spin_lock_irqsave(&sl->lock, flags); + write_seqcount_begin(&sl->seqcount); + return flags; +} + +#define write_seqlock_irqsave(lock, flags) \ + do { flags = __write_seqlock_irqsave(lock); } while (0) + +static inline void +write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_irqrestore(&sl->lock, flags); +} #endif /* __LINUX_SEQLOCK_H */ diff --git a/kernel/futex.c b/kernel/futex.c index 9618b6e9fb36..fbc07a29ec53 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2472,8 +2472,6 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, if (!futex_cmpxchg_enabled) return -ENOSYS; - WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); - rcu_read_lock(); ret = -ESRCH; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 83e368b005fc..a9642d528630 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -142,8 +142,6 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!futex_cmpxchg_enabled) return -ENOSYS; - WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); - rcu_read_lock(); ret = -ESRCH; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 7981e5b2350d..8a0efac4f99d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3190,9 +3190,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, #endif if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { debug_locks_off(); - printk("BUG: MAX_LOCK_DEPTH too low!\n"); + printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", + curr->lockdep_depth, MAX_LOCK_DEPTH); printk("turning off the locking correctness validator.\n"); + + lockdep_print_held_locks(current); + debug_show_all_locks(); dump_stack(); + return 0; } @@ -3203,7 +3208,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, } static int -print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, +print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, unsigned long ip) { if (!debug_locks_off()) @@ -3246,7 +3251,7 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, return 0; if (curr->lockdep_depth <= 0) - return print_unlock_inbalance_bug(curr, lock, ip); + return print_unlock_imbalance_bug(curr, lock, ip); return 1; } @@ -3317,7 +3322,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name, goto found_it; prev_hlock = hlock; } - return print_unlock_inbalance_bug(curr, lock, ip); + return print_unlock_imbalance_bug(curr, lock, ip); found_it: lockdep_init_map(lock, name, key, 0); @@ -3384,7 +3389,7 @@ lock_release_non_nested(struct task_struct *curr, goto found_it; prev_hlock = hlock; } - return print_unlock_inbalance_bug(curr, lock, ip); + return print_unlock_imbalance_bug(curr, lock, ip); found_it: if (hlock->instance == lock) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index b10a42bb0165..072bb066bb7d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -23,7 +23,7 @@ * NTP timekeeping variables: */ -DEFINE_SPINLOCK(ntp_lock); +DEFINE_RAW_SPINLOCK(ntp_lock); /* USER_HZ period (usecs): */ @@ -348,7 +348,7 @@ void ntp_clear(void) { unsigned long flags; - spin_lock_irqsave(&ntp_lock, flags); + raw_spin_lock_irqsave(&ntp_lock, flags); time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; @@ -362,7 +362,7 @@ void ntp_clear(void) /* Clear PPS state variables */ pps_clear(); - spin_unlock_irqrestore(&ntp_lock, flags); + raw_spin_unlock_irqrestore(&ntp_lock, flags); } @@ -372,9 +372,9 @@ u64 ntp_tick_length(void) unsigned long flags; s64 ret; - spin_lock_irqsave(&ntp_lock, flags); + raw_spin_lock_irqsave(&ntp_lock, flags); ret = tick_length; - spin_unlock_irqrestore(&ntp_lock, flags); + raw_spin_unlock_irqrestore(&ntp_lock, flags); return ret; } @@ -395,7 +395,7 @@ int second_overflow(unsigned long secs) int leap = 0; unsigned long flags; - spin_lock_irqsave(&ntp_lock, flags); + raw_spin_lock_irqsave(&ntp_lock, flags); /* * Leap second processing. If in leap-insert state at the end of the @@ -479,7 +479,7 @@ int second_overflow(unsigned long secs) time_adjust = 0; out: - spin_unlock_irqrestore(&ntp_lock, flags); + raw_spin_unlock_irqrestore(&ntp_lock, flags); return leap; } @@ -672,7 +672,7 @@ int do_adjtimex(struct timex *txc) getnstimeofday(&ts); - spin_lock_irq(&ntp_lock); + raw_spin_lock_irq(&ntp_lock); if (txc->modes & ADJ_ADJTIME) { long save_adjust = time_adjust; @@ -714,7 +714,7 @@ int do_adjtimex(struct timex *txc) /* fill PPS status fields */ pps_fill_timex(txc); - spin_unlock_irq(&ntp_lock); + raw_spin_unlock_irq(&ntp_lock); txc->time.tv_sec = ts.tv_sec; txc->time.tv_usec = ts.tv_nsec; @@ -912,7 +912,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) pts_norm = pps_normalize_ts(*phase_ts); - spin_lock_irqsave(&ntp_lock, flags); + raw_spin_lock_irqsave(&ntp_lock, flags); /* clear the error bits, they will be set again if needed */ time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); @@ -925,7 +925,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) * just start the frequency interval */ if (unlikely(pps_fbase.tv_sec == 0)) { pps_fbase = *raw_ts; - spin_unlock_irqrestore(&ntp_lock, flags); + raw_spin_unlock_irqrestore(&ntp_lock, flags); return; } @@ -940,7 +940,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) time_status |= STA_PPSJITTER; /* restart the frequency calibration interval */ pps_fbase = *raw_ts; - spin_unlock_irqrestore(&ntp_lock, flags); + raw_spin_unlock_irqrestore(&ntp_lock, flags); pr_err("hardpps: PPSJITTER: bad pulse\n"); return; } @@ -957,7 +957,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) hardpps_update_phase(pts_norm.nsec); - spin_unlock_irqrestore(&ntp_lock, flags); + raw_spin_unlock_irqrestore(&ntp_lock, flags); } EXPORT_SYMBOL(hardpps); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 27689422aa92..4a944676358e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -113,9 +113,9 @@ static int get_softlockup_thresh(void) * resolution, and we don't need to waste time with a big divide when * 2^30ns == 1.074s. */ -static unsigned long get_timestamp(int this_cpu) +static unsigned long get_timestamp(void) { - return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ + return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ } static void set_sample_period(void) @@ -133,9 +133,7 @@ static void set_sample_period(void) /* Commands for resetting the watchdog */ static void __touch_watchdog(void) { - int this_cpu = smp_processor_id(); - - __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); + __this_cpu_write(watchdog_touch_ts, get_timestamp()); } void touch_softlockup_watchdog(void) @@ -196,7 +194,7 @@ static int is_hardlockup(void) static int is_softlockup(unsigned long touch_ts) { - unsigned long now = get_timestamp(smp_processor_id()); + unsigned long now = get_timestamp(); /* Warn about unreasonable delays: */ if (time_after(now, touch_ts + get_softlockup_thresh())) diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 7aae0f2a5e0a..c3eb261a7df3 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose); * Normal standalone locks, for the circular and irq-context * dependency tests: */ -static DEFINE_SPINLOCK(lock_A); -static DEFINE_SPINLOCK(lock_B); -static DEFINE_SPINLOCK(lock_C); -static DEFINE_SPINLOCK(lock_D); +static DEFINE_RAW_SPINLOCK(lock_A); +static DEFINE_RAW_SPINLOCK(lock_B); +static DEFINE_RAW_SPINLOCK(lock_C); +static DEFINE_RAW_SPINLOCK(lock_D); static DEFINE_RWLOCK(rwlock_A); static DEFINE_RWLOCK(rwlock_B); @@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D); * but X* and Y* are different classes. We do this so that * we do not trigger a real lockup: */ -static DEFINE_SPINLOCK(lock_X1); -static DEFINE_SPINLOCK(lock_X2); -static DEFINE_SPINLOCK(lock_Y1); -static DEFINE_SPINLOCK(lock_Y2); -static DEFINE_SPINLOCK(lock_Z1); -static DEFINE_SPINLOCK(lock_Z2); +static DEFINE_RAW_SPINLOCK(lock_X1); +static DEFINE_RAW_SPINLOCK(lock_X2); +static DEFINE_RAW_SPINLOCK(lock_Y1); +static DEFINE_RAW_SPINLOCK(lock_Y2); +static DEFINE_RAW_SPINLOCK(lock_Z1); +static DEFINE_RAW_SPINLOCK(lock_Z2); static DEFINE_RWLOCK(rwlock_X1); static DEFINE_RWLOCK(rwlock_X2); @@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2); */ #define INIT_CLASS_FUNC(class) \ static noinline void \ -init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ - struct rw_semaphore *rwsem) \ +init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \ + struct mutex *mutex, struct rw_semaphore *rwsem)\ { \ - spin_lock_init(lock); \ + raw_spin_lock_init(lock); \ rwlock_init(rwlock); \ mutex_init(mutex); \ init_rwsem(rwsem); \ @@ -168,10 +168,10 @@ static void init_shared_classes(void) * Shortcuts for lock/unlock API variants, to keep * the testcases compact: */ -#define L(x) spin_lock(&lock_##x) -#define U(x) spin_unlock(&lock_##x) +#define L(x) raw_spin_lock(&lock_##x) +#define U(x) raw_spin_unlock(&lock_##x) #define LU(x) L(x); U(x) -#define SI(x) spin_lock_init(&lock_##x) +#define SI(x) raw_spin_lock_init(&lock_##x) #define WL(x) write_lock(&rwlock_##x) #define WU(x) write_unlock(&rwlock_##x) @@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) #define I2(x) \ do { \ - spin_lock_init(&lock_##x); \ + raw_spin_lock_init(&lock_##x); \ rwlock_init(&rwlock_##x); \ mutex_init(&mutex_##x); \ init_rwsem(&rwsem_##x); \ diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 7e0d6a58fc83..7542afbb22b3 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) goto dont_wake_writers; } - /* if we are allowed to wake writers try to grant a single write lock - * if there's a writer at the front of the queue - * - we leave the 'waiting count' incremented to signify potential - * contention + /* + * as we support write lock stealing, we can't set sem->activity + * to -1 here to indicate we get the lock. Instead, we wake it up + * to let it go get it again. */ if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { - sem->activity = -1; - list_del(&waiter->list); - tsk = waiter->task; - /* Don't touch waiter after ->task has been NULLed */ - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + wake_up_process(waiter->task); goto out; } @@ -121,18 +114,10 @@ static inline struct rw_semaphore * __rwsem_wake_one_writer(struct rw_semaphore *sem) { struct rwsem_waiter *waiter; - struct task_struct *tsk; - - sem->activity = -1; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - list_del(&waiter->list); + wake_up_process(waiter->task); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); return sem; } @@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore - * - we increment the waiting count anyway to indicate an exclusive lock */ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) { @@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity = -1; - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ + tsk = current; waiter.task = tsk; waiter.flags = RWSEM_WAITING_FOR_WRITE; - get_task_struct(tsk); - list_add_tail(&waiter.list, &sem->wait_list); - /* we don't need to touch the semaphore struct anymore */ - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - /* wait to be given the lock */ + /* wait for someone to release the lock */ for (;;) { - if (!waiter.task) + /* + * That is the key to support write lock stealing: allows the + * task already on CPU to get the lock soon rather than put + * itself into sleep and waiting for system woke it or someone + * else in the head of the wait list up. + */ + if (sem->activity == 0) break; - schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + schedule(); + raw_spin_lock_irqsave(&sem->wait_lock, flags); } + /* got the lock */ + sem->activity = -1; + list_del(&waiter.list); - tsk->state = TASK_RUNNING; - out: - ; + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } void __sched __down_write(struct rw_semaphore *sem) @@ -262,8 +241,8 @@ int __down_write_trylock(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ + if (sem->activity == 0) { + /* got the lock */ sem->activity = -1; ret = 1; } diff --git a/lib/rwsem.c b/lib/rwsem.c index 8337e1b9bb8d..ad5e0df16ab4 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -2,6 +2,8 @@ * * Written by David Howells (dhowells@redhat.com). * Derived from arch/i386/kernel/semaphore.c + * + * Writer lock-stealing by Alex Shi <alex.shi@intel.com> */ #include <linux/rwsem.h> #include <linux/sched.h> @@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop, adjustment; + signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) @@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) */ goto out; - /* There's a writer at the front of the queue - try to grant it the - * write lock. However, we only wake this writer if we can transition - * the active part of the count from 0 -> 1 - */ - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - if (waiter->list.next == &sem->wait_list) - adjustment -= RWSEM_WAITING_BIAS; - - try_again_write: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (oldcount & RWSEM_ACTIVE_MASK) - /* Someone grabbed the sem already */ - goto undo_write; - - /* We must be careful not to touch 'waiter' after we set ->task = NULL. - * It is an allocated on the waiter's stack and may become invalid at - * any time after that point (due to a wakeup from another source). - */ - list_del(&waiter->list); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + /* Wake up the writing waiter and let the task grab the sem: */ + wake_up_process(waiter->task); goto out; readers_only: @@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) out: return sem; +} + +/* Try to get write sem, caller holds sem->wait_lock: */ +static int try_get_writer_sem(struct rw_semaphore *sem, + struct rwsem_waiter *waiter) +{ + struct rwsem_waiter *fwaiter; + long oldcount, adjustment; - /* undo the change to the active count, but check for a transition - * 1->0 */ - undo_write: + /* only steal when first waiter is writing */ + fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) + return 0; + + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + /* Only one waiter in the queue: */ + if (fwaiter == waiter && waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + +try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (!(oldcount & RWSEM_ACTIVE_MASK)) { + /* No active lock: */ + struct task_struct *tsk = waiter->task; + + list_del(&waiter->list); + smp_mb(); + put_task_struct(tsk); + tsk->state = TASK_RUNNING; + return 1; + } + /* some one grabbed the sem already */ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) - goto out; + return 0; goto try_again_write; } @@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem, for (;;) { if (!waiter.task) break; + + raw_spin_lock_irq(&sem->wait_lock); + /* Try to get the writer sem, may steal from the head writer: */ + if (flags == RWSEM_WAITING_FOR_WRITE) + if (try_get_writer_sem(sem, &waiter)) { + raw_spin_unlock_irq(&sem->wait_lock); + return sem; + } + raw_spin_unlock_irq(&sem->wait_lock); schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); } |