diff options
256 files changed, 5743 insertions, 3573 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 5c8d74968090..fc103d7a0474 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome! when publicizing a pointer to a structure that can be traversed by an RCU read-side critical section. -5. If call_rcu(), or a related primitive such as call_rcu_bh() or - call_rcu_sched(), is used, the callback function must be - written to be called from softirq context. In particular, +5. If call_rcu(), or a related primitive such as call_rcu_bh(), + call_rcu_sched(), or call_srcu() is used, the callback function + must be written to be called from softirq context. In particular, it cannot block. 6. Since synchronize_rcu() can block, it cannot be called from @@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome! updater uses call_rcu_sched() or synchronize_sched(), then the corresponding readers must disable preemption, possibly by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). - If the updater uses synchronize_srcu(), the the corresponding - readers must use srcu_read_lock() and srcu_read_unlock(), - and with the same srcu_struct. The rules for the expedited - primitives are the same as for their non-expedited counterparts. - Mixing things up will result in confusion and broken kernels. + If the updater uses synchronize_srcu() or call_srcu(), + the the corresponding readers must use srcu_read_lock() and + srcu_read_unlock(), and with the same srcu_struct. The rules for + the expedited primitives are the same as for their non-expedited + counterparts. Mixing things up will result in confusion and + broken kernels. One exception to this rule: rcu_read_lock() and rcu_read_unlock() may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() @@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome! victim CPU from ever going offline.) 14. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), - synchronize_srcu(), and synchronize_srcu_expedited()) may only - be invoked from process context. Unlike other forms of RCU, it - -is- permissible to block in an SRCU read-side critical section - (demarked by srcu_read_lock() and srcu_read_unlock()), hence the - "SRCU": "sleepable RCU". Please note that if you don't need - to sleep in read-side critical sections, you should be using - RCU rather than SRCU, because RCU is almost always faster and - easier to use than is SRCU. + synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu()) + may only be invoked from process context. Unlike other forms of + RCU, it -is- permissible to block in an SRCU read-side critical + section (demarked by srcu_read_lock() and srcu_read_unlock()), + hence the "SRCU": "sleepable RCU". Please note that if you + don't need to sleep in read-side critical sections, you should be + using RCU rather than SRCU, because RCU is almost always faster + and easier to use than is SRCU. If you need to enter your read-side critical section in a hardirq or exception handler, and then exit that same read-side @@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome! cleanup_srcu_struct(). These are passed a "struct srcu_struct" that defines the scope of a given SRCU domain. Once initialized, the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() - synchronize_srcu(), and synchronize_srcu_expedited(). A given - synchronize_srcu() waits only for SRCU read-side critical + synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). + A given synchronize_srcu() waits only for SRCU read-side critical sections governed by srcu_read_lock() and srcu_read_unlock() calls that have been passed the same srcu_struct. This property is what makes sleeping read-side critical sections tolerable -- @@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome! requiring SRCU's read-side deadlock immunity or low read-side realtime latency. - Note that, rcu_assign_pointer() relates to SRCU just as they do + Note that, rcu_assign_pointer() relates to SRCU just as it does to other forms of RCU. 15. The whole point of call_rcu(), synchronize_rcu(), and friends diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt index e439a0edee22..38428c125135 100644 --- a/Documentation/RCU/rcubarrier.txt +++ b/Documentation/RCU/rcubarrier.txt @@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows: 2. Execute rcu_barrier(). 3. Allow the module to be unloaded. -Quick Quiz #1: Why is there no srcu_barrier()? - The rcutorture module makes use of rcu_barrier in its exit function as follows: @@ -162,7 +160,7 @@ for any pre-existing callbacks to complete. Then lines 55-62 print status and do operation-specific cleanup, and then return, permitting the module-unload operation to be completed. -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might be required? Your module might have additional complications. For example, if your @@ -242,7 +240,7 @@ reaches zero, as follows: 4 complete(&rcu_barrier_completion); 5 } -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes immediately (thus incrementing rcu_barrier_cpu_count to the value one), but the other CPU's rcu_barrier_func() invocations are delayed for a full grace period? Couldn't this result in @@ -259,12 +257,7 @@ so that your module may be safely unloaded. Answers to Quick Quizzes -Quick Quiz #1: Why is there no srcu_barrier()? - -Answer: Since there is no call_srcu(), there can be no outstanding SRCU - callbacks. Therefore, there is no need to wait for them. - -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might be required? Answer: Interestingly enough, rcu_barrier() was not originally @@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally implementing rcutorture, and found that rcu_barrier() solves this problem as well. -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes immediately (thus incrementing rcu_barrier_cpu_count to the value one), but the other CPU's rcu_barrier_func() invocations are delayed for a full grace period? Couldn't this result in diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 4ddf3913fd8c..7dce8a17eac2 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt @@ -174,11 +174,20 @@ torture_type The type of RCU to test, with string values as follows: and synchronize_rcu_bh_expedited(). "srcu": srcu_read_lock(), srcu_read_unlock() and + call_srcu(). + + "srcu_sync": srcu_read_lock(), srcu_read_unlock() and synchronize_srcu(). "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and synchronize_srcu_expedited(). + "srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(), + and call_srcu(). + + "srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(), + and synchronize_srcu(). + "sched": preempt_disable(), preempt_enable(), and call_rcu_sched(). diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 6bbe8dcdc3da..69ee188515e7 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -833,9 +833,9 @@ sched: Critical sections Grace period Barrier SRCU: Critical sections Grace period Barrier - srcu_read_lock synchronize_srcu N/A - srcu_read_unlock synchronize_srcu_expedited - srcu_read_lock_raw + srcu_read_lock synchronize_srcu srcu_barrier + srcu_read_unlock call_srcu + srcu_read_lock_raw synchronize_srcu_expedited srcu_read_unlock_raw srcu_dereference diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 506c7390c2b9..13f1aa09b938 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -86,7 +86,7 @@ There is also a gitweb interface available at http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git More information about kexec-tools can be found at -http://www.kernel.org/pub/linux/utils/kernel/kexec/README.html +http://horms.net/projects/kexec/ 3) Unpack the tarball with the tar command, as follows: diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373e..12783fa833c3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set maximum number of finished RCU callbacks to process in one batch. + rcutree.fanout_leaf= [KNL,BOOT] + Increase the number of CPUs assigned to each + leaf rcu_node structure. Useful for very large + systems. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. diff --git a/MAINTAINERS b/MAINTAINERS index c82c343168e8..fe643e7b9df6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5565,7 +5565,7 @@ F: Documentation/networking/LICENSE.qla3xxx F: drivers/net/ethernet/qlogic/qla3xxx.* QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Anirban Chakraborty <anirban.chakraborty@qlogic.com> +M: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com> M: Sony Chacko <sony.chacko@qlogic.com> M: linux-driver@qlogic.com L: netdev@vger.kernel.org @@ -5573,7 +5573,6 @@ S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER -M: Anirban Chakraborty <anirban.chakraborty@qlogic.com> M: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com> M: Ron Mercer <ron.mercer@qlogic.com> M: linux-driver@qlogic.com @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Saber-toothed Squirrel # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 10dcec7e7321..f7b84aced654 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -43,8 +43,8 @@ pmu { compatible = "arm,cortex-a9-pmu"; - interrupts = <0 8 0x04 - 0 9 0x04>; + interrupts = <0 6 0x04 + 0 7 0x04>; }; L2: l2-cache { @@ -119,8 +119,8 @@ gmac0: eth@e2000000 { compatible = "st,spear600-gmac"; reg = <0xe2000000 0x8000>; - interrupts = <0 23 0x4 - 0 24 0x4>; + interrupts = <0 33 0x4 + 0 34 0x4>; interrupt-names = "macirq", "eth_wake_irq"; status = "disabled"; }; @@ -202,6 +202,7 @@ kbd@e0300000 { compatible = "st,spear300-kbd"; reg = <0xe0300000 0x1000>; + interrupts = <0 52 0x4>; status = "disabled"; }; @@ -224,7 +225,7 @@ serial@e0000000 { compatible = "arm,pl011", "arm,primecell"; reg = <0xe0000000 0x1000>; - interrupts = <0 36 0x4>; + interrupts = <0 35 0x4>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts index c13fd1f3b09f..e4e912f95024 100644 --- a/arch/arm/boot/dts/spear320-evb.dts +++ b/arch/arm/boot/dts/spear320-evb.dts @@ -15,8 +15,8 @@ /include/ "spear320.dtsi" / { - model = "ST SPEAr300 Evaluation Board"; - compatible = "st,spear300-evb", "st,spear300"; + model = "ST SPEAr320 Evaluation Board"; + compatible = "st,spear320-evb", "st,spear320"; #address-cells = <1>; #size-cells = <1>; @@ -26,7 +26,7 @@ ahb { pinmux@b3000000 { - st,pinmux-mode = <3>; + st,pinmux-mode = <4>; pinctrl-names = "default"; pinctrl-0 = <&state_default>; diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi index 089f0a42c50e..a3c36e47d7ef 100644 --- a/arch/arm/boot/dts/spear600.dtsi +++ b/arch/arm/boot/dts/spear600.dtsi @@ -181,6 +181,7 @@ timer@f0000000 { compatible = "st,spear-timer"; reg = <0xf0000000 0x400>; + interrupt-parent = <&vic0>; interrupts = <16>; }; }; diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c index 0f41bd1c47c3..66db5f13af84 100644 --- a/arch/arm/mach-spear3xx/spear3xx.c +++ b/arch/arm/mach-spear3xx/spear3xx.c @@ -87,7 +87,7 @@ void __init spear3xx_map_io(void) static void __init spear3xx_timer_init(void) { - char pclk_name[] = "pll3_48m_clk"; + char pclk_name[] = "pll3_clk"; struct clk *gpt_clk, *pclk; spear3xx_clk_init(); diff --git a/arch/arm/mach-spear6xx/spear6xx.c b/arch/arm/mach-spear6xx/spear6xx.c index 2e2e3596583e..9af67d003c62 100644 --- a/arch/arm/mach-spear6xx/spear6xx.c +++ b/arch/arm/mach-spear6xx/spear6xx.c @@ -423,7 +423,7 @@ void __init spear6xx_map_io(void) static void __init spear6xx_timer_init(void) { - char pclk_name[] = "pll3_48m_clk"; + char pclk_name[] = "pll3_clk"; struct clk *gpt_clk, *pclk; spear6xx_clk_init(); diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index f7264621e58d..149fbefc1a4d 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -180,9 +180,7 @@ void __cpuinit start_secondary(void) notify_cpu_starting(cpu); - ipi_call_lock(); set_cpu_online(cpu, true); - ipi_call_unlock(); local_irq_enable(); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 1113b8aba07f..963d2db53bfa 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -382,7 +382,6 @@ smp_callin (void) set_numa_node(cpu_to_node_map[cpuid]); set_numa_mem(local_memory_node(cpu_to_node_map[cpuid])); - ipi_call_lock_irq(); spin_lock(&vector_lock); /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); @@ -390,7 +389,6 @@ smp_callin (void) set_cpu_online(cpuid, true); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); - ipi_call_unlock_irq(); smp_setup_percpu_timer(); diff --git a/arch/m32r/boot/compressed/Makefile b/arch/m32r/boot/compressed/Makefile index 177716b1d613..01729c2979ba 100644 --- a/arch/m32r/boot/compressed/Makefile +++ b/arch/m32r/boot/compressed/Makefile @@ -43,9 +43,9 @@ endif OBJCOPYFLAGS += -R .empty_zero_page -suffix_$(CONFIG_KERNEL_GZIP) = gz -suffix_$(CONFIG_KERNEL_BZIP2) = bz2 -suffix_$(CONFIG_KERNEL_LZMA) = lzma +suffix-$(CONFIG_KERNEL_GZIP) = gz +suffix-$(CONFIG_KERNEL_BZIP2) = bz2 +suffix-$(CONFIG_KERNEL_LZMA) = lzma $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE $(call if_changed,ld) diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 370d60881977..28a09529f206 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -28,7 +28,7 @@ static unsigned long free_mem_ptr; static unsigned long free_mem_end_ptr; #ifdef CONFIG_KERNEL_BZIP2 -static void *memset(void *s, int c, size_t n) +void *memset(void *s, int c, size_t n) { char *ss = s; @@ -39,6 +39,16 @@ static void *memset(void *s, int c, size_t n) #endif #ifdef CONFIG_KERNEL_GZIP +void *memcpy(void *dest, const void *src, size_t n) +{ + char *d = dest; + const char *s = src; + while (n--) + *d++ = *s++; + + return dest; +} + #define BOOT_HEAP_SIZE 0x10000 #include "../../../../lib/decompress_inflate.c" #endif diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h index 527527584dd0..4313aa62b51b 100644 --- a/arch/m32r/include/asm/ptrace.h +++ b/arch/m32r/include/asm/ptrace.h @@ -113,9 +113,6 @@ struct pt_regs { #define PTRACE_OLDSETOPTIONS 21 -/* options set using PTRACE_SETOPTIONS */ -#define PTRACE_O_TRACESYSGOOD 0x00000001 - #ifdef __KERNEL__ #include <asm/m32r.h> /* M32R_PSW_BSM, M32R_PSW_BPM */ diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h index cf7829a61551..c689b828dfe2 100644 --- a/arch/m32r/include/asm/smp.h +++ b/arch/m32r/include/asm/smp.h @@ -79,11 +79,6 @@ static __inline__ int cpu_number_map(int cpu) return cpu; } -static __inline__ unsigned int num_booting_cpus(void) -{ - return cpumask_weight(&cpu_callout_map); -} - extern void smp_send_timer(void); extern unsigned long send_IPI_mask_phys(const cpumask_t*, int, int); diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 4c03361537aa..51f5e9aa4901 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -591,17 +591,16 @@ void user_enable_single_step(struct task_struct *child) if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0) != sizeof(insn)) - return -EIO; + return; compute_next_pc(insn, pc, &next_pc, child); if (next_pc & 0x80000000) - return -EIO; + return; if (embed_debug_trap(child, next_pc)) - return -EIO; + return; invalidate_cache(); - return 0; } void user_disable_single_step(struct task_struct *child) diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index f3fb2c029cfc..d0f60b97bbc5 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -286,7 +286,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, case -ERESTARTNOINTR: regs->r0 = regs->orig_r0; if (prev_insn(regs) < 0) - return -EFAULT; + return; } } diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 09ab87ee6fef..b3e10fdd3898 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -288,6 +288,7 @@ config MIPS_MALTA select SYS_HAS_CPU_MIPS32_R1 select SYS_HAS_CPU_MIPS32_R2 select SYS_HAS_CPU_MIPS64_R1 + select SYS_HAS_CPU_MIPS64_R2 select SYS_HAS_CPU_NEVADA select SYS_HAS_CPU_RM7000 select SYS_HAS_EARLY_PRINTK @@ -1423,6 +1424,7 @@ config CPU_SB1 config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON + select ARCH_SPARSEMEM_ENABLE select CPU_HAS_PREFETCH select CPU_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_SMP diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 6210b8d84109..b311be45a720 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -21,6 +21,7 @@ config BCM47XX_BCMA select BCMA select BCMA_HOST_SOC select BCMA_DRIVER_MIPS + select BCMA_HOST_PCI if PCI select BCMA_DRIVER_PCI_HOSTMODE if PCI default y help diff --git a/arch/mips/bcm63xx/dev-pcmcia.c b/arch/mips/bcm63xx/dev-pcmcia.c index de4d917fd54d..a551bab5ecb9 100644 --- a/arch/mips/bcm63xx/dev-pcmcia.c +++ b/arch/mips/bcm63xx/dev-pcmcia.c @@ -79,11 +79,11 @@ static int __init config_pcmcia_cs(unsigned int cs, return ret; } -static const __initdata struct { +static const struct { unsigned int cs; unsigned int base; unsigned int size; -} pcmcia_cs[3] = { +} pcmcia_cs[3] __initconst = { { .cs = MPI_CS_PCMCIA_COMMON, .base = BCM_PCMCIA_COMMON_BASE_PA, diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index f9e275a50d98..2f4f6d5e05b6 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -82,10 +82,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY help Lock the kernel's implementation of memcpy() into L2. -config ARCH_SPARSEMEM_ENABLE - def_bool y - select SPARSEMEM_STATIC - config IOMMU_HELPER bool diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4b93048044eb..ee1fb9f7f517 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -185,7 +185,6 @@ static void __cpuinit octeon_init_secondary(void) octeon_init_cvmcount(); octeon_irq_setup_secondary(); - raw_local_irq_enable(); } /** @@ -233,6 +232,7 @@ static void octeon_smp_finish(void) /* to generate the first CPU timer interrupt */ write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); + local_irq_enable(); } /** diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 2e1ad4c652b7..82ad35ce2b45 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -17,7 +17,6 @@ #include <linux/irqflags.h> #include <linux/types.h> #include <asm/barrier.h> -#include <asm/bug.h> #include <asm/byteorder.h> /* sigh ... */ #include <asm/cpu-features.h> #include <asm/sgidefs.h> diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 285a41fa0b18..eee10dc07ac1 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -8,6 +8,7 @@ #ifndef __ASM_CMPXCHG_H #define __ASM_CMPXCHG_H +#include <linux/bug.h> #include <linux/irqflags.h> #include <asm/war.h> diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index f9fa2a479dd0..95e40c1e8ed1 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -94,6 +94,7 @@ #define PRID_IMP_24KE 0x9600 #define PRID_IMP_74K 0x9700 #define PRID_IMP_1004K 0x9900 +#define PRID_IMP_M14KC 0x9c00 /* * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE @@ -260,12 +261,12 @@ enum cpu_type_enum { */ CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K, CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350, - CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, + CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_M14KC, /* * MIPS64 class processors */ - CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, + CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2, CPU_XLR, CPU_XLP, @@ -288,7 +289,7 @@ enum cpu_type_enum { #define MIPS_CPU_ISA_M64R2 0x00000100 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_I | MIPS_CPU_ISA_II | \ - MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 ) + MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2) #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \ MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2) diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h index 86548da650e7..991b659e2548 100644 --- a/arch/mips/include/asm/gic.h +++ b/arch/mips/include/asm/gic.h @@ -206,7 +206,7 @@ #define GIC_VPE_EIC_SHADOW_SET_BASE 0x0100 #define GIC_VPE_EIC_SS(intr) \ - (GIC_EIC_SHADOW_SET_BASE + (4 * intr)) + (GIC_VPE_EIC_SHADOW_SET_BASE + (4 * intr)) #define GIC_VPE_EIC_VEC_BASE 0x0800 #define GIC_VPE_EIC_VEC(intr) \ @@ -330,6 +330,17 @@ struct gic_intr_map { #define GIC_FLAG_TRANSPARENT 0x02 }; +/* + * This is only used in EIC mode. This helps to figure out which + * shared interrupts we need to process when we get a vector interrupt. + */ +#define GIC_MAX_SHARED_INTR 0x5 +struct gic_shared_intr_map { + unsigned int num_shared_intr; + unsigned int intr_list[GIC_MAX_SHARED_INTR]; + unsigned int local_intr_mask; +}; + extern void gic_init(unsigned long gic_base_addr, unsigned long gic_addrspace_size, struct gic_intr_map *intrmap, unsigned int intrmap_size, unsigned int irqbase); @@ -338,5 +349,7 @@ extern unsigned int gic_get_int(void); extern void gic_send_ipi(unsigned int intr); extern unsigned int plat_ipi_call_int_xlate(unsigned int); extern unsigned int plat_ipi_resched_int_xlate(unsigned int); +extern void gic_bind_eic_interrupt(int irq, int set); +extern unsigned int gic_get_timer_pending(void); #endif /* _ASM_GICREGS_H */ diff --git a/arch/mips/include/asm/inst.h b/arch/mips/include/asm/inst.h index 7ebfc392e58d..ab84064283db 100644 --- a/arch/mips/include/asm/inst.h +++ b/arch/mips/include/asm/inst.h @@ -251,7 +251,7 @@ struct f_format { /* FPU register format */ unsigned int func : 6; }; -struct ma_format { /* FPU multipy and add format (MIPS IV) */ +struct ma_format { /* FPU multiply and add format (MIPS IV) */ unsigned int opcode : 6; unsigned int fr : 5; unsigned int ft : 5; @@ -324,7 +324,7 @@ struct f_format { /* FPU register format */ unsigned int opcode : 6; }; -struct ma_format { /* FPU multipy and add format (MIPS IV) */ +struct ma_format { /* FPU multiply and add format (MIPS IV) */ unsigned int fmt : 2; unsigned int func : 4; unsigned int fd : 5; diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index a58f22998a86..29d9c23c20c7 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -17,6 +17,7 @@ #include <linux/types.h> #include <asm/addrspace.h> +#include <asm/bug.h> #include <asm/byteorder.h> #include <asm/cpu.h> #include <asm/cpu-features.h> diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index fb698dc09bc9..78dbb8a86da2 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -136,6 +136,7 @@ extern void free_irqno(unsigned int irq); * IE7. Since R2 their number has to be read from the c0_intctl register. */ #define CP0_LEGACY_COMPARE_IRQ 7 +#define CP0_LEGACY_PERFCNT_IRQ 7 extern int cp0_compare_irq; extern int cp0_compare_irq_shift; diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 94d4faad29a1..fdcd78ca1b03 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -99,7 +99,7 @@ #define CKCTL_6368_USBH_CLK_EN (1 << 15) #define CKCTL_6368_DISABLE_GLESS_EN (1 << 16) #define CKCTL_6368_NAND_CLK_EN (1 << 17) -#define CKCTL_6368_IPSEC_CLK_EN (1 << 17) +#define CKCTL_6368_IPSEC_CLK_EN (1 << 18) #define CKCTL_6368_ALL_SAFE_EN (CKCTL_6368_SWPKT_USB_EN | \ CKCTL_6368_SWPKT_SAR_EN | \ diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h index d11aa02a956a..5447d9fc4219 100644 --- a/arch/mips/include/asm/mips-boards/maltaint.h +++ b/arch/mips/include/asm/mips-boards/maltaint.h @@ -86,6 +86,16 @@ #define GIC_CPU_INT4 4 /* . */ #define GIC_CPU_INT5 5 /* Core Interrupt 5 */ +/* MALTA GIC local interrupts */ +#define GIC_INT_TMR (GIC_CPU_INT5) +#define GIC_INT_PERFCTR (GIC_CPU_INT5) + +/* GIC constants */ +/* Add 2 to convert non-eic hw int # to eic vector # */ +#define GIC_CPU_TO_VEC_OFFSET (2) +/* If we map an intr to pin X, GIC will actually generate vector X+1 */ +#define GIC_PIN_TO_VEC_OFFSET (1) + #define GIC_EXT_INTR(x) x /* External Interrupts used for IPI */ diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index c9420aa97e32..e71ff4c317f2 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -48,7 +48,7 @@ #define CP0_VPECONF0 $1, 2 #define CP0_VPECONF1 $1, 3 #define CP0_YQMASK $1, 4 -#define CP0_VPESCHEDULE $1, 5 +#define CP0_VPESCHEDULE $1, 5 #define CP0_VPESCHEFBK $1, 6 #define CP0_TCSTATUS $2, 1 #define CP0_TCBIND $2, 2 diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 5d33621b5658..4f8ddba8c360 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -22,7 +22,7 @@ struct task_struct; * switch_to(n) should switch tasks to task nr n, first * checking that n isn't the current task, in which case it does nothing. */ -extern asmlinkage void *resume(void *last, void *next, void *next_ti); +extern asmlinkage void *resume(void *last, void *next, void *next_ti, u32 __usedfpu); extern unsigned int ll_bit; extern struct task_struct *ll_task; @@ -66,11 +66,13 @@ do { \ #define switch_to(prev, next, last) \ do { \ + u32 __usedfpu; \ __mips_mt_fpaff_switch_to(prev); \ if (cpu_has_dsp) \ __save_dsp(prev); \ __clear_software_ll_bit(); \ - (last) = resume(prev, next, task_thread_info(next)); \ + __usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU); \ + (last) = resume(prev, next, task_thread_info(next), __usedfpu); \ } while (0) #define finish_arch_switch(prev) \ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index e2eca7d10598..ca97e0ecb64b 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -60,6 +60,8 @@ struct thread_info { register struct thread_info *__current_thread_info __asm__("$28"); #define current_thread_info() __current_thread_info +#endif /* !__ASSEMBLY__ */ + /* thread information allocation */ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) #define THREAD_SIZE_ORDER (1) @@ -85,8 +87,6 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define STACK_WARN (THREAD_SIZE / 8) -#endif /* !__ASSEMBLY__ */ - #define PREEMPT_ACTIVE 0x10000000 /* diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 6ae7ce4ac63e..f4630e1082ab 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -4,7 +4,7 @@ * Copyright (C) xxxx the Anonymous * Copyright (C) 1994 - 2006 Ralf Baechle * Copyright (C) 2003, 2004 Maciej W. Rozycki - * Copyright (C) 2001, 2004 MIPS Inc. + * Copyright (C) 2001, 2004, 2011, 2012 MIPS Technologies, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -199,6 +199,7 @@ void __init check_wait(void) cpu_wait = rm7k_wait_irqoff; break; + case CPU_M14KC: case CPU_24K: case CPU_34K: case CPU_1004K: @@ -810,6 +811,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_5KC; __cpu_name[cpu] = "MIPS 5Kc"; break; + case PRID_IMP_5KE: + c->cputype = CPU_5KE; + __cpu_name[cpu] = "MIPS 5KE"; + break; case PRID_IMP_20KC: c->cputype = CPU_20KC; __cpu_name[cpu] = "MIPS 20Kc"; @@ -831,6 +836,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_74K; __cpu_name[cpu] = "MIPS 74Kc"; break; + case PRID_IMP_M14KC: + c->cputype = CPU_M14KC; + __cpu_name[cpu] = "MIPS M14Kc"; + break; case PRID_IMP_1004K: c->cputype = CPU_1004K; __cpu_name[cpu] = "MIPS 1004Kc"; diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c index 57ba13edb03a..3fc1691110dc 100644 --- a/arch/mips/kernel/mips_ksyms.c +++ b/arch/mips/kernel/mips_ksyms.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05 by Ralf Baechle + * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05, 12 by Ralf Baechle * Copyright (C) 1999, 2000, 01 Silicon Graphics, Inc. */ #include <linux/interrupt.h> @@ -35,6 +35,12 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(kernel_thread); /* + * Functions that operate on entire pages. Mostly used by memory management. + */ +EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); + +/* * Userspace access stuff. */ EXPORT_SYMBOL(__copy_user); diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index ce89c8061708..0441f54b2a6a 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -31,7 +31,7 @@ /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) + * struct thread_info *next_ti, int usedfpu) */ .align 7 LEAF(resume) diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index f29099b104c4..eb5e394a4650 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -162,11 +162,6 @@ static unsigned int counters_total_to_per_cpu(unsigned int counters) return counters >> vpe_shift(); } -static unsigned int counters_per_cpu_to_total(unsigned int counters) -{ - return counters << vpe_shift(); -} - #else /* !CONFIG_MIPS_MT_SMP */ #define vpe_id() 0 diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 293898391e67..9c51be5a163a 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -43,7 +43,7 @@ /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) ) + * struct thread_info *next_ti, int usedfpu) */ LEAF(resume) mfc0 t1, CP0_STATUS @@ -51,18 +51,9 @@ LEAF(resume) cpu_save_nonscratch a0 sw ra, THREAD_REG31(a0) - /* - * check if we need to save FPU registers - */ - lw t3, TASK_THREAD_INFO(a0) - lw t0, TI_FLAGS(t3) - li t1, _TIF_USEDFPU - and t2, t0, t1 - beqz t2, 1f - nor t1, zero, t1 + beqz a3, 1f - and t0, t0, t1 - sw t0, TI_FLAGS(t3) + PTR_L t3, TASK_THREAD_INFO(a0) /* * clear saved user stack CU1 bit diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index 9414f9354469..42d2a3938420 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -41,7 +41,7 @@ /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) + * struct thread_info *next_ti, int usedfpu) */ .align 5 LEAF(resume) @@ -53,16 +53,10 @@ /* * check if we need to save FPU registers */ - PTR_L t3, TASK_THREAD_INFO(a0) - LONG_L t0, TI_FLAGS(t3) - li t1, _TIF_USEDFPU - and t2, t0, t1 - beqz t2, 1f - nor t1, zero, t1 - and t0, t0, t1 - LONG_S t0, TI_FLAGS(t3) + beqz a3, 1f + PTR_L t3, TASK_THREAD_INFO(a0) /* * clear saved user stack CU1 bit */ diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 3046e2986006..8e393b8443f7 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -15,7 +15,6 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/spinlock.h> -#include <linux/init.h> #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/reboot.h> @@ -197,13 +196,6 @@ static void bmips_init_secondary(void) write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0)); #endif - - /* make sure there won't be a timer interrupt for a little while */ - write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); - - irq_enable_hazard(); - set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE); - irq_enable_hazard(); } /* @@ -212,6 +204,13 @@ static void bmips_init_secondary(void) static void bmips_smp_finish(void) { pr_info("SMP: CPU%d is running\n", smp_processor_id()); + + /* make sure there won't be a timer interrupt for a little while */ + write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); + + irq_enable_hazard(); + set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE); + irq_enable_hazard(); } /* diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 48650c818040..1268392f1d27 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -122,13 +122,21 @@ asmlinkage __cpuinit void start_secondary(void) notify_cpu_starting(cpu); - mp_ops->smp_finish(); + set_cpu_online(cpu, true); + set_cpu_sibling_map(cpu); cpu_set(cpu, cpu_callin_map); synchronise_count_slave(); + /* + * irq will be enabled in ->smp_finish(), enabling it too early + * is dangerous. + */ + WARN_ON_ONCE(!irqs_disabled()); + mp_ops->smp_finish(); + cpu_idle(); } @@ -196,8 +204,6 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) while (!cpu_isset(cpu, cpu_callin_map)) udelay(100); - set_cpu_online(cpu, true); - return 0; } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index f5dd38f1d015..15b5f3cfd20c 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -322,7 +322,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot) /* * Common setup before any secondaries are started - * Make sure all CPU's are in a sensible state before we boot any of the + * Make sure all CPUs are in a sensible state before we boot any of the * secondaries. * * For MIPS MT "SMTC" operation, we set up all TCs, spread as evenly @@ -340,12 +340,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) /* * TCContext gets an offset from the base of the IPIQ array * to be used in low-level code to detect the presence of - * an active IPI queue + * an active IPI queue. */ write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16); /* Bind tc to vpe */ write_tc_c0_tcbind(vpe); - /* In general, all TCs should have the same cpu_data indications */ + /* In general, all TCs should have the same cpu_data indications. */ memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips)); /* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */ if (cpu_data[0].cputype == CPU_34K || @@ -358,8 +358,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) } /* - * Tweak to get Count registes in as close a sync as possible. - * Value seems good for 34K-class cores. + * Tweak to get Count registes in as close a sync as possible. The + * value seems good for 34K-class cores. */ #define CP0_SKEW 8 @@ -615,7 +615,6 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle) void smtc_init_secondary(void) { - local_irq_enable(); } void smtc_smp_finish(void) @@ -631,6 +630,8 @@ void smtc_smp_finish(void) if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id)) write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); + local_irq_enable(); + printk("TC %d going on-line as CPU %d\n", cpu_data[smp_processor_id()].tc_id, smp_processor_id()); } diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c index 99f913c8d7a6..842d55e411fd 100644 --- a/arch/mips/kernel/sync-r4k.c +++ b/arch/mips/kernel/sync-r4k.c @@ -111,7 +111,6 @@ void __cpuinit synchronise_count_master(void) void __cpuinit synchronise_count_slave(void) { int i; - unsigned long flags; unsigned int initcount; int ncpus; @@ -123,8 +122,6 @@ void __cpuinit synchronise_count_slave(void) return; #endif - local_irq_save(flags); - /* * Not every cpu is online at the time this gets called, * so we first wait for the master to say everyone is ready @@ -154,7 +151,5 @@ void __cpuinit synchronise_count_slave(void) } /* Arrange for an interrupt in a short while */ write_c0_compare(read_c0_count() + COUNTON); - - local_irq_restore(flags); } #undef NR_LOOPS diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2d0c2a277f52..c3c293543703 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -132,6 +132,9 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) unsigned long ra = regs->regs[31]; unsigned long pc = regs->cp0_epc; + if (!task) + task = current; + if (raw_show_trace || !__kernel_text_address(pc)) { show_raw_backtrace(sp); return; @@ -1249,6 +1252,7 @@ static inline void parity_protection_init(void) break; case CPU_5KC: + case CPU_5KE: write_c0_ecc(0x80000000); back_to_back_c0_hazard(); /* Set the PE bit (bit 31) in the c0_errctl register. */ @@ -1498,6 +1502,7 @@ extern void flush_tlb_handlers(void); * Timer interrupt */ int cp0_compare_irq; +EXPORT_SYMBOL_GPL(cp0_compare_irq); int cp0_compare_irq_shift; /* @@ -1597,7 +1602,7 @@ void __cpuinit per_cpu_trap_init(bool is_boot_cpu) cp0_perfcount_irq = -1; } else { cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ; - cp0_compare_irq_shift = cp0_compare_irq; + cp0_compare_irq_shift = CP0_LEGACY_PERFCNT_IRQ; cp0_perfcount_irq = -1; } diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 924da5eb7031..df243a64f430 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include <asm/asm-offsets.h> #include <asm/page.h> +#include <asm/thread_info.h> #include <asm-generic/vmlinux.lds.h> #undef mips @@ -72,7 +73,7 @@ SECTIONS .data : { /* Data */ . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */ - INIT_TASK_DATA(PAGE_SIZE) + INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 4aa20280613e..fd6203f14f1f 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -3,8 +3,8 @@ # obj-y += cache.o dma-default.o extable.o fault.o \ - gup.o init.o mmap.o page.o tlbex.o \ - tlbex-fault.o uasm.o + gup.o init.o mmap.o page.o page-funcs.o \ + tlbex.o tlbex-fault.o uasm.o obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o obj-$(CONFIG_64BIT) += pgtable-64.o diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 5109be96d98d..f092c265dc63 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -977,7 +977,7 @@ static void __cpuinit probe_pcache(void) c->icache.linesz = 2 << lsize; else c->icache.linesz = lsize; - c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.sets = 32 << (((config1 >> 22) + 1) & 7); c->icache.ways = 1 + ((config1 >> 16) & 7); icache_size = c->icache.sets * @@ -997,7 +997,7 @@ static void __cpuinit probe_pcache(void) c->dcache.linesz = 2 << lsize; else c->dcache.linesz= lsize; - c->dcache.sets = 64 << ((config1 >> 13) & 7); + c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7); c->dcache.ways = 1 + ((config1 >> 7) & 7); dcache_size = c->dcache.sets * @@ -1051,6 +1051,7 @@ static void __cpuinit probe_pcache(void) case CPU_R14000: break; + case CPU_M14KC: case CPU_24K: case CPU_34K: case CPU_74K: diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S new file mode 100644 index 000000000000..48a6b38ff13e --- /dev/null +++ b/arch/mips/mm/page-funcs.S @@ -0,0 +1,50 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated clear_page/copy_page functions. + * + * Copyright (C) 2012 MIPS Technologies, Inc. + * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/regdef.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#define cpu_clear_page_function_name clear_page_cpu +#define cpu_copy_page_function_name copy_page_cpu +#else +#define cpu_clear_page_function_name clear_page +#define cpu_copy_page_function_name copy_page +#endif + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x058 bytes + * R4600 v1.7: 0x05c bytes + * R4600 v2.0: 0x060 bytes + * With prefetching, 16 word strides 0x120 bytes + */ +EXPORT(__clear_page_start) +LEAF(cpu_clear_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 288 +END(cpu_clear_page_function_name) +EXPORT(__clear_page_end) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x11c bytes + * R4600 v1.7: 0x080 bytes + * R4600 v2.0: 0x07c bytes + * With prefetching, 16 word strides 0x540 bytes + */ +EXPORT(__copy_page_start) +LEAF(cpu_copy_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 1344 +END(cpu_copy_page_function_name) +EXPORT(__copy_page_end) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index cc0b626858b3..98f530e18216 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -6,6 +6,7 @@ * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2007 Maciej W. Rozycki * Copyright (C) 2008 Thiemo Seufer + * Copyright (C) 2012 MIPS Technologies, Inc. */ #include <linux/init.h> #include <linux/kernel.h> @@ -71,45 +72,6 @@ static struct uasm_reloc __cpuinitdata relocs[5]; #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x058 bytes - * R4600 v1.7: 0x05c bytes - * R4600 v2.0: 0x060 bytes - * With prefetching, 16 word strides 0x120 bytes - */ - -static u32 clear_page_array[0x120 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); -#else -void clear_page(void *page) __attribute__((alias("clear_page_array"))); -#endif - -EXPORT_SYMBOL(clear_page); - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x11c bytes - * R4600 v1.7: 0x080 bytes - * R4600 v2.0: 0x07c bytes - * With prefetching, 16 word strides 0x540 bytes - */ -static u32 copy_page_array[0x540 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void -copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); -#else -void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); -#endif - -EXPORT_SYMBOL(copy_page); - - static int pref_bias_clear_store __cpuinitdata; static int pref_bias_copy_load __cpuinitdata; static int pref_bias_copy_store __cpuinitdata; @@ -282,10 +244,15 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off) } } +extern u32 __clear_page_start; +extern u32 __clear_page_end; +extern u32 __copy_page_start; +extern u32 __copy_page_end; + void __cpuinit build_clear_page(void) { int off; - u32 *buf = (u32 *)&clear_page_array; + u32 *buf = &__clear_page_start; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; int i; @@ -356,17 +323,17 @@ void __cpuinit build_clear_page(void) uasm_i_jr(&buf, RA); uasm_i_nop(&buf); - BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); + BUG_ON(buf > &__clear_page_end); uasm_resolve_relocs(relocs, labels); pr_debug("Synthesized clear page handler (%u instructions).\n", - (u32)(buf - clear_page_array)); + (u32)(buf - &__clear_page_start)); pr_debug("\t.set push\n"); pr_debug("\t.set noreorder\n"); - for (i = 0; i < (buf - clear_page_array); i++) - pr_debug("\t.word 0x%08x\n", clear_page_array[i]); + for (i = 0; i < (buf - &__clear_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]); pr_debug("\t.set pop\n"); } @@ -427,7 +394,7 @@ static inline void build_copy_store_pref(u32 **buf, int off) void __cpuinit build_copy_page(void) { int off; - u32 *buf = (u32 *)©_page_array; + u32 *buf = &__copy_page_start; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; int i; @@ -595,21 +562,23 @@ void __cpuinit build_copy_page(void) uasm_i_jr(&buf, RA); uasm_i_nop(&buf); - BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); + BUG_ON(buf > &__copy_page_end); uasm_resolve_relocs(relocs, labels); pr_debug("Synthesized copy page handler (%u instructions).\n", - (u32)(buf - copy_page_array)); + (u32)(buf - &__copy_page_start)); pr_debug("\t.set push\n"); pr_debug("\t.set noreorder\n"); - for (i = 0; i < (buf - copy_page_array); i++) - pr_debug("\t.word 0x%08x\n", copy_page_array[i]); + for (i = 0; i < (buf - &__copy_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]); pr_debug("\t.set pop\n"); } #ifdef CONFIG_SIBYTE_DMA_PAGEOPS +extern void clear_page_cpu(void *page); +extern void copy_page_cpu(void *to, void *from); /* * Pad descriptors to cacheline, since each is exclusively owned by a diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 0bc485b3cd60..03eb0ef91580 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -9,6 +9,7 @@ * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2008, 2009 Cavium Networks, Inc. + * Copyright (C) 2011 MIPS Technologies, Inc. * * ... and the days got worse and worse and now you see * I've gone completly out of my mind. @@ -494,6 +495,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_R14000: case CPU_4KC: case CPU_4KEC: + case CPU_M14KC: case CPU_SB1: case CPU_SB1A: case CPU_4KSC: diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c index bf80921f2f56..284dea54faf5 100644 --- a/arch/mips/mti-malta/malta-pci.c +++ b/arch/mips/mti-malta/malta-pci.c @@ -241,8 +241,9 @@ void __init mips_pcibios_init(void) return; } - if (controller->io_resource->start < 0x00001000UL) /* FIXME */ - controller->io_resource->start = 0x00001000UL; + /* Change start address to avoid conflicts with ACPI and SMB devices */ + if (controller->io_resource->start < 0x00002000UL) + controller->io_resource->start = 0x00002000UL; iomem_resource.end &= 0xfffffffffULL; /* 64 GB */ ioport_resource.end = controller->io_resource->end; @@ -253,7 +254,7 @@ void __init mips_pcibios_init(void) } /* Enable PCI 2.1 compatibility in PIIX4 */ -static void __init quirk_dlcsetup(struct pci_dev *dev) +static void __devinit quirk_dlcsetup(struct pci_dev *dev) { u8 odlc, ndlc; (void) pci_read_config_byte(dev, 0x82, &odlc); diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index b7f37d4982fa..2e28f653f66d 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -111,7 +111,7 @@ static void __init pci_clock_check(void) unsigned int __iomem *jmpr_p = (unsigned int *) ioremap(MALTA_JMPRS_REG, sizeof(unsigned int)); int jmpr = (__raw_readl(jmpr_p) >> 2) & 0x07; - static const int pciclocks[] __initdata = { + static const int pciclocks[] __initconst = { 33, 20, 25, 30, 12, 16, 37, 10 }; int pciclock = pciclocks[jmpr]; diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c index acb677a1227c..b3df7c2aad1e 100644 --- a/arch/mips/netlogic/xlp/setup.c +++ b/arch/mips/netlogic/xlp/setup.c @@ -82,8 +82,10 @@ void __init prom_free_prom_memory(void) void xlp_mmu_init(void) { + /* enable extended TLB and Large Fixed TLB */ write_c0_config6(read_c0_config6() | 0x24); - current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1; + + /* set page mask of Fixed TLB in config7 */ write_c0_config7(PM_DEFAULT_MASK >> (13 + (ffz(PM_DEFAULT_MASK >> 13) / 2))); } @@ -100,6 +102,10 @@ void __init prom_init(void) nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1)); #ifdef CONFIG_SMP nlm_wakeup_secondary_cpus(0xffffffff); + + /* update TLB size after waking up threads */ + current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1; + register_smp_ops(&nlm_smp_ops); #endif } diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index d1f2d4c52d42..b6e378211a2c 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -78,6 +78,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) switch (current_cpu_type()) { case CPU_5KC: + case CPU_M14KC: case CPU_20KC: case CPU_24K: case CPU_25KF: diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index baba3bcaa3c2..4d80a856048d 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -322,6 +322,10 @@ static int __init mipsxx_init(void) op_model_mipsxx_ops.num_counters = counters; switch (current_cpu_type()) { + case CPU_M14KC: + op_model_mipsxx_ops.cpu_type = "mips/M14Kc"; + break; + case CPU_20KC: op_model_mipsxx_ops.cpu_type = "mips/20K"; break; diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c index d5d4c018fb04..0857ab8c3919 100644 --- a/arch/mips/pci/fixup-fuloong2e.c +++ b/arch/mips/pci/fixup-fuloong2e.c @@ -48,7 +48,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -static void __init loongson2e_nec_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_nec_fixup(struct pci_dev *pdev) { unsigned int val; @@ -60,7 +60,7 @@ static void __init loongson2e_nec_fixup(struct pci_dev *pdev) pci_write_config_dword(pdev, 0xe4, 1 << 5); } -static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func0_fixup(struct pci_dev *pdev) { unsigned char c; @@ -135,7 +135,7 @@ static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev) printk(KERN_INFO"via686b fix: ISA bridge done\n"); } -static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func1_fixup(struct pci_dev *pdev) { printk(KERN_INFO"via686b fix: IDE\n"); @@ -168,19 +168,19 @@ static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev) printk(KERN_INFO"via686b fix: IDE done\n"); } -static void __init loongson2e_686b_func2_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func2_fixup(struct pci_dev *pdev) { /* irq routing */ pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 10); } -static void __init loongson2e_686b_func3_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func3_fixup(struct pci_dev *pdev) { /* irq routing */ pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 11); } -static void __init loongson2e_686b_func5_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func5_fixup(struct pci_dev *pdev) { unsigned int val; unsigned char c; diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c index 4b9768d5d729..a7b917dcf604 100644 --- a/arch/mips/pci/fixup-lemote2f.c +++ b/arch/mips/pci/fixup-lemote2f.c @@ -96,21 +96,21 @@ int pcibios_plat_dev_init(struct pci_dev *dev) } /* CS5536 SPEC. fixup */ -static void __init loongson_cs5536_isa_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_isa_fixup(struct pci_dev *pdev) { /* the uart1 and uart2 interrupt in PIC is enabled as default */ pci_write_config_dword(pdev, PCI_UART1_INT_REG, 1); pci_write_config_dword(pdev, PCI_UART2_INT_REG, 1); } -static void __init loongson_cs5536_ide_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ide_fixup(struct pci_dev *pdev) { /* setting the mutex pin as IDE function */ pci_write_config_dword(pdev, PCI_IDE_CFG_REG, CS5536_IDE_FLASH_SIGNATURE); } -static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_acc_fixup(struct pci_dev *pdev) { /* enable the AUDIO interrupt in PIC */ pci_write_config_dword(pdev, PCI_ACC_INT_REG, 1); @@ -118,14 +118,14 @@ static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev) pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0xc0); } -static void __init loongson_cs5536_ohci_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ohci_fixup(struct pci_dev *pdev) { /* enable the OHCI interrupt in PIC */ /* THE OHCI, EHCI, UDC, OTG are shared with interrupt in PIC */ pci_write_config_dword(pdev, PCI_OHCI_INT_REG, 1); } -static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ehci_fixup(struct pci_dev *pdev) { u32 hi, lo; @@ -137,7 +137,7 @@ static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev) pci_write_config_dword(pdev, PCI_EHCI_FLADJ_REG, 0x2000); } -static void __init loongson_nec_fixup(struct pci_dev *pdev) +static void __devinit loongson_nec_fixup(struct pci_dev *pdev) { unsigned int val; diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c index 0f48498bc231..70073c98ed32 100644 --- a/arch/mips/pci/fixup-malta.c +++ b/arch/mips/pci/fixup-malta.c @@ -49,10 +49,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -static void __init malta_piix_func0_fixup(struct pci_dev *pdev) +static void __devinit malta_piix_func0_fixup(struct pci_dev *pdev) { unsigned char reg_val; - static int piixirqmap[16] __initdata = { /* PIIX PIRQC[A:D] irq mappings */ + static int piixirqmap[16] __devinitdata = { /* PIIX PIRQC[A:D] irq mappings */ 0, 0, 0, 3, 4, 5, 6, 7, 0, 9, 10, 11, @@ -83,7 +83,7 @@ static void __init malta_piix_func0_fixup(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, malta_piix_func0_fixup); -static void __init malta_piix_func1_fixup(struct pci_dev *pdev) +static void __devinit malta_piix_func1_fixup(struct pci_dev *pdev) { unsigned char reg_val; diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c index e08f49cb6875..8e4f8288eca2 100644 --- a/arch/mips/pci/fixup-mpc30x.c +++ b/arch/mips/pci/fixup-mpc30x.c @@ -22,13 +22,13 @@ #include <asm/vr41xx/mpc30x.h> -static const int internal_func_irqs[] __initdata = { +static const int internal_func_irqs[] __initconst = { VRC4173_CASCADE_IRQ, VRC4173_AC97_IRQ, VRC4173_USB_IRQ, }; -static const int irq_tab_mpc30x[] __initdata = { +static const int irq_tab_mpc30x[] __initconst = { [12] = VRC4173_PCMCIA1_IRQ, [13] = VRC4173_PCMCIA2_IRQ, [29] = MQ200_IRQ, diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c index f0bb9146e6c0..d02900a72916 100644 --- a/arch/mips/pci/fixup-sb1250.c +++ b/arch/mips/pci/fixup-sb1250.c @@ -15,7 +15,7 @@ * Set the BCM1250, etc. PCI host bridge's TRDY timeout * to the finite max. */ -static void __init quirk_sb1250_pci(struct pci_dev *dev) +static void __devinit quirk_sb1250_pci(struct pci_dev *dev) { pci_write_config_byte(dev, 0x40, 0xff); } @@ -25,7 +25,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI, /* * The BCM1250, etc. PCI/HT bridge reports as a host bridge. */ -static void __init quirk_sb1250_ht(struct pci_dev *dev) +static void __devinit quirk_sb1250_ht(struct pci_dev *dev) { dev->class = PCI_CLASS_BRIDGE_PCI << 8; } @@ -35,7 +35,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_HT, /* * Set the SP1011 HT/PCI bridge's TRDY timeout to the finite max. */ -static void __init quirk_sp1011(struct pci_dev *dev) +static void __devinit quirk_sp1011(struct pci_dev *dev) { pci_write_config_byte(dev, 0x64, 0xff); } diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c index a1e7e6d80c8c..bc13e29d2bb3 100644 --- a/arch/mips/pci/ops-tx4927.c +++ b/arch/mips/pci/ops-tx4927.c @@ -495,7 +495,7 @@ irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id) } #ifdef CONFIG_TOSHIBA_FPCIB0 -static void __init tx4927_quirk_slc90e66_bridge(struct pci_dev *dev) +static void __devinit tx4927_quirk_slc90e66_bridge(struct pci_dev *dev) { struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(dev->bus); diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c index 0fbe4c0c170a..fdc24440294c 100644 --- a/arch/mips/pci/pci-ip27.c +++ b/arch/mips/pci/pci-ip27.c @@ -212,7 +212,7 @@ static inline void pci_enable_swapping(struct pci_dev *dev) bridge->b_widget.w_tflush; /* Flush */ } -static void __init pci_fixup_ioc3(struct pci_dev *d) +static void __devinit pci_fixup_ioc3(struct pci_dev *d) { pci_disable_swapping(d); } diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 1644805a6730..172af1cd5867 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -41,6 +41,7 @@ #include <linux/irq.h> #include <linux/irqdesc.h> #include <linux/console.h> +#include <linux/pci_regs.h> #include <asm/io.h> @@ -156,35 +157,55 @@ struct pci_controller nlm_pci_controller = { .io_offset = 0x00000000UL, }; +/* + * The top level PCIe links on the XLS PCIe controller appear as + * bridges. Given a device, this function finds which link it is + * on. + */ +static struct pci_dev *xls_get_pcie_link(const struct pci_dev *dev) +{ + struct pci_bus *bus, *p; + + /* Find the bridge on bus 0 */ + bus = dev->bus; + for (p = bus->parent; p && p->number != 0; p = p->parent) + bus = p; + + return p ? bus->self : NULL; +} + static int get_irq_vector(const struct pci_dev *dev) { + struct pci_dev *lnk; + if (!nlm_chip_is_xls()) - return PIC_PCIX_IRQ; /* for XLR just one IRQ*/ + return PIC_PCIX_IRQ; /* for XLR just one IRQ */ /* * For XLS PCIe, there is an IRQ per Link, find out which * link the device is on to assign interrupts - */ - if (dev->bus->self == NULL) + */ + lnk = xls_get_pcie_link(dev); + if (lnk == NULL) return 0; - switch (dev->bus->self->devfn) { - case 0x0: + switch (PCI_SLOT(lnk->devfn)) { + case 0: return PIC_PCIE_LINK0_IRQ; - case 0x8: + case 1: return PIC_PCIE_LINK1_IRQ; - case 0x10: + case 2: if (nlm_chip_is_xls_b()) return PIC_PCIE_XLSB0_LINK2_IRQ; else return PIC_PCIE_LINK2_IRQ; - case 0x18: + case 3: if (nlm_chip_is_xls_b()) return PIC_PCIE_XLSB0_LINK3_IRQ; else return PIC_PCIE_LINK3_IRQ; } - WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn); + WARN(1, "Unexpected devfn %d\n", lnk->devfn); return 0; } @@ -202,7 +223,27 @@ void arch_teardown_msi_irq(unsigned int irq) int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; + struct pci_dev *lnk; int irq, ret; + u16 val; + + /* MSI not supported on XLR */ + if (!nlm_chip_is_xls()) + return 1; + + /* + * Enable MSI on the XLS PCIe controller bridge which was disabled + * at enumeration, the bridge MSI capability is at 0x50 + */ + lnk = xls_get_pcie_link(dev); + if (lnk == NULL) + return 1; + + pci_read_config_word(lnk, 0x50 + PCI_MSI_FLAGS, &val); + if ((val & PCI_MSI_FLAGS_ENABLE) == 0) { + val |= PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(lnk, 0x50 + PCI_MSI_FLAGS, val); + } irq = get_irq_vector(dev); if (irq <= 0) @@ -327,7 +368,7 @@ static int __init pcibios_init(void) } } else { /* XLR PCI controller ACK */ - irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack); + irq_set_handler_data(PIC_PCIX_IRQ, xlr_pci_ack); } return 0; diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index b71fae231049..5edab2bc6fc0 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -115,11 +115,11 @@ static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action) */ static void __cpuinit yos_init_secondary(void) { - set_c0_status(ST0_CO | ST0_IE | ST0_IM); } static void __cpuinit yos_smp_finish(void) { + set_c0_status(ST0_CO | ST0_IM | ST0_IE); } /* Hook for after all CPUs are online */ diff --git a/arch/mips/powertv/asic/asic-calliope.c b/arch/mips/powertv/asic/asic-calliope.c index 0a170e0ffeaa..7773f3d956b0 100644 --- a/arch/mips/powertv/asic/asic-calliope.c +++ b/arch/mips/powertv/asic/asic-calliope.c @@ -28,7 +28,7 @@ #define CALLIOPE_ADDR(x) (CALLIOPE_IO_BASE + (x)) -const struct register_map calliope_register_map __initdata = { +const struct register_map calliope_register_map __initconst = { .eic_slow0_strt_add = {.phys = CALLIOPE_ADDR(0x800000)}, .eic_cfg_bits = {.phys = CALLIOPE_ADDR(0x800038)}, .eic_ready_status = {.phys = CALLIOPE_ADDR(0x80004c)}, diff --git a/arch/mips/powertv/asic/asic-cronus.c b/arch/mips/powertv/asic/asic-cronus.c index bbc0c122be5e..da076db7b7ed 100644 --- a/arch/mips/powertv/asic/asic-cronus.c +++ b/arch/mips/powertv/asic/asic-cronus.c @@ -28,7 +28,7 @@ #define CRONUS_ADDR(x) (CRONUS_IO_BASE + (x)) -const struct register_map cronus_register_map __initdata = { +const struct register_map cronus_register_map __initconst = { .eic_slow0_strt_add = {.phys = CRONUS_ADDR(0x000000)}, .eic_cfg_bits = {.phys = CRONUS_ADDR(0x000038)}, .eic_ready_status = {.phys = CRONUS_ADDR(0x00004C)}, diff --git a/arch/mips/powertv/asic/asic-gaia.c b/arch/mips/powertv/asic/asic-gaia.c index 91dda682752c..47683b370e74 100644 --- a/arch/mips/powertv/asic/asic-gaia.c +++ b/arch/mips/powertv/asic/asic-gaia.c @@ -23,7 +23,7 @@ #include <linux/init.h> #include <asm/mach-powertv/asic.h> -const struct register_map gaia_register_map __initdata = { +const struct register_map gaia_register_map __initconst = { .eic_slow0_strt_add = {.phys = GAIA_IO_BASE + 0x000000}, .eic_cfg_bits = {.phys = GAIA_IO_BASE + 0x000038}, .eic_ready_status = {.phys = GAIA_IO_BASE + 0x00004C}, diff --git a/arch/mips/powertv/asic/asic-zeus.c b/arch/mips/powertv/asic/asic-zeus.c index 4a05bb096476..6ff4b10f09da 100644 --- a/arch/mips/powertv/asic/asic-zeus.c +++ b/arch/mips/powertv/asic/asic-zeus.c @@ -28,7 +28,7 @@ #define ZEUS_ADDR(x) (ZEUS_IO_BASE + (x)) -const struct register_map zeus_register_map __initdata = { +const struct register_map zeus_register_map __initconst = { .eic_slow0_strt_add = {.phys = ZEUS_ADDR(0x000000)}, .eic_cfg_bits = {.phys = ZEUS_ADDR(0x000038)}, .eic_ready_status = {.phys = ZEUS_ADDR(0x00004c)}, diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index 682efb0c108d..64eb71b15280 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -269,7 +269,7 @@ txx9_i8259_irq_setup(int irq) return err; } -static void __init quirk_slc90e66_bridge(struct pci_dev *dev) +static void __devinit quirk_slc90e66_bridge(struct pci_dev *dev) { int irq; /* PCI/ISA Bridge interrupt */ u8 reg_64; diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 6ab0bee2a54f..4d584ae29ae1 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -459,10 +459,11 @@ static int handle_signal(int sig, else ret = setup_frame(sig, ka, oldset, regs); if (ret) - return; + return ret; signal_delivered(sig, info, ka, regs, - test_thread_flag(TIF_SINGLESTEP)); + test_thread_flag(TIF_SINGLESTEP)); + return 0; } /* diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index 090d35d36973..e62c223e4c45 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -876,9 +876,7 @@ static void __init smp_online(void) notify_cpu_starting(cpu); - ipi_call_lock(); set_cpu_online(cpu, true); - ipi_call_unlock(); local_irq_enable(); } diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index a47828d31fe6..6266730efd61 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -300,9 +300,7 @@ smp_cpu_init(int cpunum) notify_cpu_starting(cpunum); - ipi_call_lock(); set_cpu_online(cpunum, true); - ipi_call_unlock(); /* Initialise the idle task for this CPU */ atomic_inc(&init_mm.mm_count); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e4cb34322de4..e1417c42155c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused) if (system_state == SYSTEM_RUNNING) vdso_data->processorCount++; #endif - ipi_call_lock(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); /* Update sibling maps */ @@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused) of_node_put(np); } of_node_put(l2_cache); - ipi_call_unlock(); local_irq_enable(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 15cca26ccb6c..8dca9c248ac7 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -717,9 +717,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid) init_cpu_vtimer(); pfault_init(); notify_cpu_starting(smp_processor_id()); - ipi_call_lock(); set_cpu_online(smp_processor_id(), true); - ipi_call_unlock(); local_irq_enable(); /* cpu_idle will call schedule for us */ cpu_idle(); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f591598d92f6..781bcb10b8bd 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -103,8 +103,6 @@ void __cpuinit smp_callin(void) if (cheetah_pcache_forced_on) cheetah_enable_pcache(); - local_irq_enable(); - callin_flag = 1; __asm__ __volatile__("membar #Sync\n\t" "flush %%g6" : : : "memory"); @@ -124,9 +122,8 @@ void __cpuinit smp_callin(void) while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) rmb(); - ipi_call_lock_irq(); set_cpu_online(cpuid, true); - ipi_call_unlock_irq(); + local_irq_enable(); /* idle thread is expected to have preempt disabled */ preempt_disable(); @@ -1308,9 +1305,7 @@ int __cpu_disable(void) mdelay(1); local_irq_disable(); - ipi_call_lock(); set_cpu_online(cpu, false); - ipi_call_unlock(); cpu_map_rebuild(); diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 84873fbe8f27..e686c5ac90be 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -198,17 +198,7 @@ void __cpuinit online_secondary(void) notify_cpu_starting(smp_processor_id()); - /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - */ - ipi_call_lock(); set_cpu_online(smp_processor_id(), 1); - ipi_call_unlock(); __get_cpu_var(cpu_state) = CPU_ONLINE; /* Set up tile-specific state for this cpu. */ diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1f2521434554..b0c5276861ec 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -49,6 +49,9 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 + # Use -mpreferred-stack-boundary=3 if supported. + KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 49331bedc158..70780689599a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end) } #endif /* CONFIG_SMP */ +#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" + +#define b_replacement(number) "663"#number +#define e_replacement(number) "664"#number + +#define alt_slen "662b-661b" +#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" + +#define ALTINSTR_ENTRY(feature, number) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(number)"f - .\n" /* new instruction */ \ + " .word " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_slen "\n" /* source len */ \ + " .byte " alt_rlen(number) "\n" /* replacement len */ + +#define DISCARD_ENTRY(number) /* rlen <= slen */ \ + " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" + +#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ + b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" + /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ - \ - "661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .long 661b - .\n" /* label */ \ - " .long 663f - .\n" /* new instruction */ \ - " .word " __stringify(feature) "\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ - " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" + OLDINSTR(oldinstr) \ + ".section .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".previous\n" \ + ".section .discard,\"aw\",@progbits\n" \ + DISCARD_ENTRY(1) \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR(oldinstr) \ + ".section .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".previous\n" \ + ".section .discard,\"aw\",@progbits\n" \ + DISCARD_ENTRY(1) \ + DISCARD_ENTRY(2) \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous" /* * This must be included *after* the definition of ALTERNATIVE due to @@ -140,6 +171,19 @@ static inline int alternatives_text_reserved(void *start, void *end) : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) /* + * Like alternative_call, but there are two features and respective functions. + * If CPU has feature2, function2 is used. + * Otherwise, if CPU has feature1, function1 is used. + * Otherwise, old function is used. + */ +#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ + output, input...) \ + asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ + "call %P[new2]", feature2) \ + : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2), ## input) + +/* * use this macro(s) if you need more than one output parameter * in alternative_io */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eaff4790ed96..88093c1d44fd 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -306,7 +306,8 @@ struct apic { unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, + const struct cpumask *mask); void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); @@ -331,9 +332,9 @@ struct apic { unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); + int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid); /* ipi */ void (*send_IPI_mask)(const struct cpumask *mask, int vector); @@ -537,6 +538,11 @@ static inline const struct cpumask *default_target_cpus(void) #endif } +static inline const struct cpumask *online_target_cpus(void) +{ + return cpu_online_mask; +} + DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); @@ -586,21 +592,50 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif -static inline unsigned int -default_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid) { - return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; + unsigned long cpu_mask = cpumask_bits(cpumask)[0] & + cpumask_bits(andmask)[0] & + cpumask_bits(cpu_online_mask)[0] & + APIC_ALL_CPUS; + + if (likely(cpu_mask)) { + *apicid = (unsigned int)cpu_mask; + return 0; + } else { + return -EINVAL; + } } -static inline unsigned int +extern int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid); + +static inline void +flat_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; +} - return (unsigned int)(mask1 & mask2 & mask3); +static inline void +default_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + cpumask_copy(retmask, cpumask_of(cpu)); } static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index cc70c1c78ca4..75ce3f47d204 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -4,9 +4,7 @@ enum reboot_type { BOOT_TRIPLE = 't', BOOT_KBD = 'k', -#ifdef CONFIG_X86_32 BOOT_BIOS = 'b', -#endif BOOT_ACPI = 'a', BOOT_EFI = 'e', BOOT_CF9 = 'p', diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index dbe82a5c5eac..d3d74698dce9 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT - printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", + printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", virtual_dma_count, virtual_dma_residue, calls, bytes, dma_wait); calls = 0; diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index dc580c42851c..c0fa356e90de 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -44,28 +44,14 @@ struct nmiaction { const char *name; }; -#define register_nmi_handler(t, fn, fg, n) \ +#define register_nmi_handler(t, fn, fg, n, init...) \ ({ \ - static struct nmiaction fn##_na = { \ + static struct nmiaction init fn##_na = { \ .handler = (fn), \ .name = (n), \ .flags = (fg), \ }; \ - __register_nmi_handler((t), &fn##_na); \ -}) - -/* - * For special handlers that register/unregister in the - * init section only. This should be considered rare. - */ -#define register_nmi_handler_initonly(t, fn, fg, n) \ -({ \ - static struct nmiaction fn##_na __initdata = { \ - .handler = (fn), \ - .name = (n), \ - .flags = (fg), \ - }; \ - __register_nmi_handler((t), &fn##_na); \ + __register_nmi_handler((t), &fn##_na); \ }) int __register_nmi_handler(unsigned int, struct nmiaction *); diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b3a531746026..5ad24a89b19b 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -7,9 +7,13 @@ #undef DEBUG #ifdef DEBUG -#define DBG(x...) printk(x) +#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__) #else -#define DBG(x...) +#define DBG(fmt, ...) \ +do { \ + if (0) \ + printk(fmt, ##__VA_ARGS__); \ +} while (0) #endif #define PCI_PROBE_BIOS 0x0001 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index c78f14a0df00..dab39350e51e 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -234,7 +234,7 @@ extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); #else -static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) { *nr = 0; return NULL; diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 98391db840c6..f2b489cf1602 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -2,9 +2,9 @@ #define _ASM_X86_PGTABLE_2LEVEL_H #define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) + pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e)) /* * Certain architectures need to do special things when PTEs diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index cb00ccc7d571..4cc9f2b7cdc3 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -9,13 +9,13 @@ */ #define pte_ERROR(e) \ - printk("%s:%d: bad pte %p(%08lx%08lx).\n", \ + pr_err("%s:%d: bad pte %p(%08lx%08lx)\n", \ __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %p(%016Lx).\n", \ + pr_err("%s:%d: bad pmd %p(%016Lx)\n", \ __FILE__, __LINE__, &(e), pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %p(%016Lx).\n", \ + pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) /* Rules for using set_pte: the pte being assigned *must* be diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 975f709e09ae..8251be02301e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[]; extern void paging_init(void); #define pte_ERROR(e) \ - printk("%s:%d: bad pte %p(%016lx).\n", \ + pr_err("%s:%d: bad pte %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %p(%016lx).\n", \ + pr_err("%s:%d: bad pmd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pmd_val(e)) #define pud_ERROR(e) \ - printk("%s:%d: bad pud %p(%016lx).\n", \ + pr_err("%s:%d: bad pud %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pud_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %p(%016lx).\n", \ + pr_err("%s:%d: bad pgd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) struct mm_struct; diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index fce3f4ae5bd6..fe1ec5bcd846 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -21,8 +21,9 @@ struct real_mode_header { u32 wakeup_header; #endif /* APM/BIOS reboot */ -#ifdef CONFIG_X86_32 u32 machine_real_restart_asm; +#ifdef CONFIG_X86_64 + u32 machine_real_restart_seg; #endif }; diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 92f297069e87..a82c4f1b4d83 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -18,8 +18,8 @@ extern struct machine_ops machine_ops; void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); -void machine_real_restart(unsigned int type); -/* These must match dispatch_table in reboot_32.S */ +void __noreturn machine_real_restart(unsigned int type); +/* These must match dispatch in arch/x86/realmore/rm/reboot.S */ #define MRR_BIOS 0 #define MRR_APM 1 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f48394513c37..2ffa95dc2333 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) -/* We don't mark CPUs online until __cpu_up(), so we need another measure */ -static inline int num_booting_cpus(void) -{ - return cpumask_weight(cpu_callout_mask); -} #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() static inline int wbinvd_on_all_cpus(void) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 8e796fbbf9c6..d8def8b3dba0 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -17,6 +17,8 @@ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long +copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); +__must_check unsigned long copy_user_generic_string(void *to, const void *from, unsigned len); __must_check unsigned long copy_user_generic_unrolled(void *to, const void *from, unsigned len); @@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len) { unsigned ret; - alternative_call(copy_user_generic_unrolled, + /* + * If CPU has ERMS feature, use copy_user_enhanced_fast_string. + * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. + * Otherwise, use copy_user_generic_unrolled. + */ + alternative_call_2(copy_user_generic_unrolled, copy_user_generic_string, X86_FEATURE_REP_GOOD, + copy_user_enhanced_fast_string, + X86_FEATURE_ERMS, ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), "=d" (len)), "1" (to), "2" (from), "3" (len) diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 6149b476d9df..a06983cdc125 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -140,6 +140,9 @@ #define IPI_RESET_LIMIT 1 /* after this # consecutive successes, bump up the throttle if it was lowered */ #define COMPLETE_THRESHOLD 5 +/* after this # of giveups (fall back to kernel IPI's) disable the use of + the BAU for a period of time */ +#define GIVEUP_LIMIT 100 #define UV_LB_SUBNODEID 0x10 @@ -166,7 +169,6 @@ #define FLUSH_RETRY_TIMEOUT 2 #define FLUSH_GIVEUP 3 #define FLUSH_COMPLETE 4 -#define FLUSH_RETRY_BUSYBUG 5 /* * tuning the action when the numalink network is extremely delayed @@ -175,7 +177,7 @@ microseconds */ #define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ -#define CONGESTED_PERIOD 30 /* time for the bau to be +#define DISABLED_PERIOD 10 /* time for the bau to be disabled, in seconds */ /* see msg_type: */ #define MSG_NOOP 0 @@ -520,6 +522,12 @@ struct ptc_stats { unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ + unsigned long s_overipilimit; /* over the ipi reset limit */ + unsigned long s_giveuplimit; /* disables, over giveup limit*/ + unsigned long s_enters; /* entries to the driver */ + unsigned long s_ipifordisabled; /* fall back to IPI; disabled */ + unsigned long s_plugged; /* plugged by h/w bug*/ + unsigned long s_congested; /* giveup on long wait */ /* destination statistics */ unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ @@ -586,8 +594,8 @@ struct bau_control { int timeout_tries; int ipi_attempts; int conseccompletes; - int baudisabled; - int set_bau_off; + short nobau; + short baudisabled; short cpu; short osnode; short uvhub_cpu; @@ -596,14 +604,16 @@ struct bau_control { short cpus_in_socket; short cpus_in_uvhub; short partition_base_pnode; - short using_desc; /* an index, like uvhub_cpu */ - unsigned int inuse_map; + short busy; /* all were busy (war) */ unsigned short message_number; unsigned short uvhub_quiesce; short socket_acknowledge_count[DEST_Q_SIZE]; cycles_t send_message; + cycles_t period_end; + cycles_t period_time; spinlock_t uvhub_lock; spinlock_t queue_lock; + spinlock_t disable_lock; /* tunables */ int max_concurr; int max_concurr_const; @@ -614,9 +624,9 @@ struct bau_control { int complete_threshold; int cong_response_us; int cong_reps; - int cong_period; - unsigned long clocks_per_100_usec; - cycles_t period_time; + cycles_t disabled_period; + int period_giveups; + int giveup_limit; long period_requests; struct hub_and_pnode *thp; }; diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 92e54abf89e0..f90f0a587c66 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h @@ -9,15 +9,6 @@ #include <asm/ipi.h> #include <linux/cpumask.h> -/* - * Need to use more than cpu 0, because we need more vectors - * when MSI-X are used. - */ -static const struct cpumask *x2apic_target_cpus(void) -{ - return cpu_online_mask; -} - static int x2apic_apic_id_valid(int apicid) { return 1; @@ -28,15 +19,6 @@ static int x2apic_apic_id_registered(void) return 1; } -/* - * For now each logical cpu is in its own vector allocation domain. - */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) { diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c090af10ac7d..38155f667144 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -156,7 +156,6 @@ struct x86_cpuinit_ops { /** * struct x86_platform_ops - platform specific runtime functions * @calibrate_tsc: calibrate TSC - * @wallclock_init: init the wallclock device * @get_wallclock: get time from HW clock like RTC etc. * @set_wallclock: set time back to HW clock * @is_untracked_pat_range exclude from PAT logic @@ -164,10 +163,10 @@ struct x86_cpuinit_ops { * @i8042_detect pre-detect if i8042 controller exists * @save_sched_clock_state: save state for sched_clock() on suspend * @restore_sched_clock_state: restore state for sched_clock() on resume + * @apic_post_init: adjust apic if neeeded */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); - void (*wallclock_init)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); void (*iommu_shutdown)(void); @@ -177,6 +176,7 @@ struct x86_platform_ops { int (*i8042_detect)(void); void (*save_sched_clock_state)(void); void (*restore_sched_clock_state)(void); + void (*apic_post_init)(void); }; struct pci_dev; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1f84794f0759..931280ff8299 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "SMP alternatives: " fmt + #include <linux/module.h> #include <linux/sched.h> #include <linux/mutex.h> @@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str) __setup("noreplace-paravirt", setup_noreplace_paravirt); #endif -#define DPRINTK(fmt, args...) if (debug_alternative) \ - printk(KERN_DEBUG fmt, args) +#define DPRINTK(fmt, ...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +} while (0) /* * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes @@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp) * If this still occurs then you should see a hang * or crash shortly after this line: */ - printk("lockdep: fixing up alternatives.\n"); + pr_info("lockdep: fixing up alternatives\n"); #endif if (noreplace_smp || smp_alt_once || skip_smp_alternatives) @@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp) if (smp == smp_mode) { /* nothing */ } else if (smp) { - printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); + pr_info("switching to SMP code\n"); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); } else { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + pr_info("switching to UP code\n"); set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) @@ -546,7 +551,7 @@ void __init alternative_instructions(void) #ifdef CONFIG_SMP if (smp_alt_once) { if (1 == num_possible_cpus()) { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + pr_info("switching to UP code\n"); set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); @@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data) struct text_poke_param *p; int i; - if (atomic_dec_and_test(&stop_machine_first)) { + if (atomic_xchg(&stop_machine_first, 0)) { for (i = 0; i < tpp->nparams; i++) { p = &tpp->params[i]; text_poke(p->addr, p->opcode, p->len); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index be16854591cc..f29f6dd6bc08 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -2,6 +2,9 @@ * Shared support code for AMD K8 northbridges and derivates. * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/slab.h> #include <linux/init.h> @@ -258,7 +261,7 @@ void amd_flush_garts(void) } spin_unlock_irqrestore(&gart_lock, flags); if (!flushed) - printk("nothing to flush?\n"); + pr_notice("nothing to flush?\n"); } EXPORT_SYMBOL_GPL(amd_flush_garts); @@ -269,11 +272,10 @@ static __init int init_amd_nbs(void) err = amd_cache_northbridges(); if (err < 0) - printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); + pr_notice("Cannot enumerate AMD northbridges\n"); if (amd_cache_gart() < 0) - printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " - "GART support disabled.\n"); + pr_notice("Cannot initialize GART flush words, GART support disabled\n"); return err; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 39a222e094af..c421512ca5eb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2123,6 +2123,25 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } +int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid) +{ + unsigned int cpu; + + for_each_cpu_and(cpu, cpumask, andmask) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + } + + if (likely(cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu); + return 0; + } + + return -EINVAL; +} + /* * Power management */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0e881c46e8c8..00c77cf78e9e 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static const struct cpumask *flat_target_cpus(void) -{ - return cpu_online_mask; -} - -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - /* * Set up the logical destination ID. * @@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) } static void - flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) +flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; int cpu = smp_processor_id(); @@ -186,7 +167,7 @@ static struct apic apic_flat = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = flat_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, @@ -210,8 +191,7 @@ static struct apic apic_flat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, @@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static const struct cpumask *physflat_target_cpus(void) -{ - return cpu_online_mask; -} - -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { default_send_IPI_mask_sequence_phys(cpumask, vector); @@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector) physflat_send_IPI_mask(cpu_online_mask, vector); } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu); -} - static int physflat_probe(void) { if (apic == &apic_physflat || num_possible_cpus() > 8) @@ -345,13 +282,13 @@ static struct apic apic_physflat = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = physflat_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = physflat_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, @@ -370,8 +307,7 @@ static struct apic apic_physflat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = physflat_send_IPI_mask, .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index a6e4c6e06c08..e145f28b4099 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_copy(retmask, cpumask_of(cpu)); } static u32 noop_apic_read(u32 reg) @@ -159,8 +159,7 @@ struct apic apic_noop = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = noop_send_IPI_mask, .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 6ec6d5d297c3..bc552cff2578 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -static const struct cpumask *numachip_target_cpus(void) -{ - return cpu_online_mask; -} - -static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) { union numachip_csr_g3_ext_irq_gen int_gen; @@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector) __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } -static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if (likely((unsigned)cpu < nr_cpu_ids)) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; -} - -static unsigned int -numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu); -} - static int __init numachip_probe(void) { return apic == &apic_numachip; @@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = numachip_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = numachip_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = { .set_apic_id = set_apic_id, .apic_id_mask = 0xffU << 24, - .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = numachip_send_IPI_mask, .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 31fbdbfbf960..d50e3640d5ae 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void) return 1; } -static const struct cpumask *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; @@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid) return 1; } -/* As we are using single CPU as destination, pick only one CPU here */ -static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu = cpumask_first(cpumask); - - if (cpu < nr_cpu_ids) - return cpu_physical_id(cpu); - return BAD_APICID; -} - -static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - return cpu_physical_id(cpu); - } - return BAD_APICID; -} - static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; @@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } /* NULL entry stops DMI scanning */ }; -static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static int probe_bigsmp(void) { if (def_to_bigsmp) @@ -205,13 +164,13 @@ static struct apic apic_bigsmp = { /* phys delivery to target CPU: */ .irq_dest_mode = 0, - .target_cpus = bigsmp_target_cpus, + .target_cpus = default_target_cpus, .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, .check_apicid_present = bigsmp_check_apicid_present, - .vector_allocation_domain = bigsmp_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, @@ -229,8 +188,7 @@ static struct apic apic_bigsmp = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = bigsmp_send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index db4ab1be3c79..0874799a98c6 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void) WARN(1, "Command failed, status = %x\n", mip_status); } -static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - - static void es7000_wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) @@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) return 1; } -static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; - int cpu, uninitialized_var(apicid); + unsigned int cpu, uninitialized_var(apicid); /* * The cpus in the mask must all be on the apic cluster. */ - for_each_cpu(cpu, cpumask) { + for_each_cpu_and(cpu, cpumask, cpu_online_mask) { int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { WARN(1, "Not a valid mask!"); - return BAD_APICID; + return -EINVAL; } - apicid = new_apicid; + apicid |= new_apicid; round++; } - return apicid; + if (!round) + return -EINVAL; + *dest_id = apicid; + return 0; } -static unsigned int +static int es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { - int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); cpumask_var_t cpumask; + *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; + return 0; cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); + es7000_cpu_mask_to_apicid(cpumask, apicid); free_cpumask_var(cpumask); - return apicid; + return 0; } static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) @@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .vector_allocation_domain = es7000_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = es7000_init_apic_ldr_cluster, .ioapic_phys_id_map = es7000_ioapic_phys_id_map, @@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, .send_IPI_mask = es7000_send_IPI_mask, @@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .vector_allocation_domain = es7000_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = es7000_init_apic_ldr, .ioapic_phys_id_map = es7000_ioapic_phys_id_map, @@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, .send_IPI_mask = es7000_send_IPI_mask, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5f0ff597437c..406eee784684 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi entry = alloc_irq_pin_list(node); if (!entry) { - printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", - node, apic, pin); + pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", + node, apic, pin); return -ENOMEM; } entry->apic = apic; @@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ioapic_mask_entry(apic, pin); entry = ioapic_read_entry(apic, pin); if (entry.irr) - printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", + pr_err("Unable to reset IRR for apic: %d, pin :%d\n", mpc_ioapic_id(apic), pin); } @@ -895,7 +895,7 @@ static int irq_polarity(int idx) } case 2: /* reserved */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); polarity = 1; break; } @@ -906,7 +906,7 @@ static int irq_polarity(int idx) } default: /* invalid */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); polarity = 1; break; } @@ -948,7 +948,7 @@ static int irq_trigger(int idx) } default: { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 1; break; } @@ -962,7 +962,7 @@ static int irq_trigger(int idx) } case 2: /* reserved */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 1; break; } @@ -973,7 +973,7 @@ static int irq_trigger(int idx) } default: /* invalid */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 0; break; } @@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin) * Debugging check, we are in big trouble if this message pops up! */ if (mp_irqs[idx].dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; @@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 8; - unsigned int old_vector; + static int current_offset = VECTOR_OFFSET_START % 16; int cpu, err; cpumask_var_t tmp_mask; @@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) return -ENOMEM; - old_vector = cfg->vector; - if (old_vector) { - cpumask_and(tmp_mask, mask, cpu_online_mask); - cpumask_and(tmp_mask, cfg->domain, tmp_mask); - if (!cpumask_empty(tmp_mask)) { - free_cpumask_var(tmp_mask); - return 0; - } - } - /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; - for_each_cpu_and(cpu, mask, cpu_online_mask) { - int new_cpu; - int vector, offset; + cpumask_clear(cfg->old_domain); + cpu = cpumask_first_and(mask, cpu_online_mask); + while (cpu < nr_cpu_ids) { + int new_cpu, vector, offset; - apic->vector_allocation_domain(cpu, tmp_mask); + apic->vector_allocation_domain(cpu, tmp_mask, mask); + + if (cpumask_subset(tmp_mask, cfg->domain)) { + err = 0; + if (cpumask_equal(tmp_mask, cfg->domain)) + break; + /* + * New cpumask using the vector is a proper subset of + * the current in use mask. So cleanup the vector + * allocation for the members that are not used anymore. + */ + cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); + cfg->move_in_progress = 1; + cpumask_and(cfg->domain, cfg->domain, tmp_mask); + break; + } vector = current_vector; offset = current_offset; next: - vector += 8; + vector += 16; if (vector >= first_system_vector) { - /* If out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; + offset = (offset + 1) % 16; vector = FIRST_EXTERNAL_VECTOR + offset; } - if (unlikely(current_vector == vector)) + + if (unlikely(current_vector == vector)) { + cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); + cpumask_andnot(tmp_mask, mask, cfg->old_domain); + cpu = cpumask_first_and(tmp_mask, cpu_online_mask); continue; + } if (test_bit(vector, used_vectors)) goto next; @@ -1162,7 +1171,7 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (old_vector) { + if (cfg->vector) { cfg->move_in_progress = 1; cpumask_copy(cfg->old_domain, cfg->domain); } @@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, if (!IO_APIC_IRQ(irq)) return; - /* - * For legacy irqs, cfg->domain starts with cpu 0 for legacy - * controllers like 8259. Now that IO-APIC can handle this irq, update - * the cfg->domain. - */ - if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) - apic->vector_allocation_domain(0, cfg->domain); if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), + &dest)) { + pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", + mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); + __clear_irq_vector(irq, cfg); + + return; + } apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, cfg->vector, irq, attr->trigger, attr->polarity, dest); if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", + pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); __clear_irq_vector(irq, cfg); @@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi) * Set up the timer pin, possibly with the 8259A-master behind. */ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, - unsigned int pin, int vector) + unsigned int pin, int vector) { struct IO_APIC_route_entry entry; + unsigned int dest; if (irq_remapping_enabled) return; @@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, * We use logical delivery to get the timer IRQ * to the first CPU. */ + if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), + apic->target_cpus(), &dest))) + dest = BAD_APICID; + entry.dest_mode = apic->irq_dest_mode; entry.mask = 0; /* don't mask IRQ for edge */ - entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); + entry.dest = dest; entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) reg_03.raw = io_apic_read(ioapic_idx, 3); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) i, ir_entry->index ); - printk("%1d %1d %1d %1d %1d " + pr_cont("%1d %1d %1d %1d %1d " "%1d %1d %X %02X\n", ir_entry->format, ir_entry->mask, @@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) i, entry.dest ); - printk("%1d %1d %1d %1d %1d " + pr_cont("%1d %1d %1d %1d %1d " "%1d %1d %02X\n", entry.mask, entry.trigger, @@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void) continue; printk(KERN_DEBUG "IRQ%d ", irq); for_each_irq_pin(entry, cfg->irq_2_pin) - printk("-> %d:%d", entry->apic, entry->pin); - printk("\n"); + pr_cont("-> %d:%d", entry->apic, entry->pin); + pr_cont("\n"); } printk(KERN_INFO ".................................... done.\n"); @@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base) printk(KERN_DEBUG); for (i = 0; i < 8; i++) - printk(KERN_CONT "%08x", apic_read(base + i*0x10)); + pr_cont("%08x", apic_read(base + i*0x10)); - printk(KERN_CONT "\n"); + pr_cont("\n"); } __apicdebuginit(void) print_local_APIC(void *dummy) @@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); } } - printk("\n"); + pr_cont("\n"); } __apicdebuginit(void) print_local_APICs(int maxcpu) @@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) reg_00.raw = io_apic_read(ioapic_idx, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) - printk("could not set ID!\n"); + pr_cont("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); } @@ -2210,71 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg) cfg->move_in_progress = 0; } -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(cfg)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - } -} - -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) -{ - struct irq_cfg *cfg = data->chip_data; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -1; - - if (assign_irq_vector(data->irq, data->chip_data, mask)) - return -1; - - cpumask_copy(data->affinity, mask); - - *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); - return 0; -} - -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - unsigned int dest, irq = data->irq; - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = __ioapic_set_affinity(data, mask, &dest); - if (!ret) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, data->chip_data); - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return ret; -} - asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -2362,6 +2310,87 @@ void irq_force_complete_move(int irq) static inline void irq_complete_move(struct irq_cfg *cfg) { } #endif +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + u8 vector = cfg->vector; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + unsigned int reg; + + apic = entry->apic; + pin = entry->pin; + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(cfg)) + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + } +} + +/* + * Either sets data->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and + * leaves data->affinity untouched. + */ +int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id) +{ + struct irq_cfg *cfg = data->chip_data; + unsigned int irq = data->irq; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + cpumask_copy(data->affinity, mask); + + return 0; +} + +static int +ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + unsigned int dest, irq = data->irq; + unsigned long flags; + int ret; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + raw_spin_lock_irqsave(&ioapic_lock, flags); + ret = __ioapic_set_affinity(data, mask, &dest); + if (!ret) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, data->chip_data); + ret = IRQ_SET_MASK_OK_NOCOPY; + } + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return ret; +} + static void ack_apic_edge(struct irq_data *data) { irq_complete_move(data->chip_data); @@ -2541,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_ack = ir_ack_apic_edge; chip->irq_eoi = ir_ack_apic_level; -#ifdef CONFIG_SMP chip->irq_set_affinity = set_remapped_irq_affinity; -#endif } #endif /* CONFIG_IRQ_REMAP */ @@ -2554,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_unmask = unmask_ioapic_irq, .irq_ack = ack_apic_edge, .irq_eoi = ack_apic_level, -#ifdef CONFIG_SMP .irq_set_affinity = ioapic_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3038,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, if (err) return err; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; if (irq_remapped(cfg)) { compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); @@ -3072,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, return err; } -#ifdef CONFIG_SMP static int msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -3092,9 +3119,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) __write_msi_msg(data->msi_desc, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, @@ -3105,9 +3131,7 @@ static struct irq_chip msi_chip = { .irq_unmask = unmask_msi_irq, .irq_mask = mask_msi_irq, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3192,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq) } #ifdef CONFIG_DMAR_TABLE -#ifdef CONFIG_SMP static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -3214,19 +3237,15 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, dmar_msi_write(irq, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ - static struct irq_chip dmar_msi_type = { .name = "DMAR_MSI", .irq_unmask = dmar_msi_unmask, .irq_mask = dmar_msi_mask, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = dmar_msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3247,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER -#ifdef CONFIG_SMP static int hpet_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -3267,19 +3285,15 @@ static int hpet_msi_set_affinity(struct irq_data *data, hpet_msi_write(data->handler_data, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ - static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .irq_unmask = hpet_msi_unmask, .irq_mask = hpet_msi_mask, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = hpet_msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3314,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) */ #ifdef CONFIG_HT_IRQ -#ifdef CONFIG_SMP - static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) { struct ht_irq_msg msg; @@ -3340,25 +3352,23 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) return -1; target_ht_irq(data->irq, dest, cfg->vector); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#endif - static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .irq_mask = mask_ht_irq, .irq_unmask = unmask_ht_irq, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = ht_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; + struct ht_irq_msg msg; + unsigned dest; int err; if (disable_apic) @@ -3366,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus()); + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | + ((apic->irq_dest_mode == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; + write_ht_irq_msg(irq, &msg); - write_ht_irq_msg(irq, &msg); + irq_set_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); - } - return err; + return 0; } #endif /* CONFIG_HT_IRQ */ @@ -3563,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id) /* Sanity check */ if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + pr_err("IOAPIC[%d]: Unable to change apic_id!\n", + ioapic); return -1; } } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f00a68cca37a..d661ee95cabf 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return 0x0F; -} - -static inline unsigned int +static int numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { - return 0x0F; + *apicid = 0x0F; + return 0; } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ @@ -441,20 +438,6 @@ static int probe_numaq(void) return found_numaq; } -static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - static void numaq_setup_portio_remap(void) { int num_quads = num_online_nodes(); @@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = { .check_apicid_used = numaq_check_apicid_used, .check_apicid_present = numaq_check_apicid_present, - .vector_allocation_domain = numaq_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = numaq_init_apic_ldr, .ioapic_phys_id_map = numaq_ioapic_phys_id_map, @@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, .send_IPI_mask = numaq_send_IPI_mask, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1b291da09e60..eb35ef9ee63f 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void) #endif } -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* - * Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - /* should be called last. */ static int probe_default(void) { @@ -105,7 +90,7 @@ static struct apic apic_default = { .check_apicid_used = default_check_apicid_used, .check_apicid_present = default_check_apicid_present, - .vector_allocation_domain = default_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, @@ -123,8 +108,7 @@ static struct apic apic_default = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = default_send_IPI_mask_logical, .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, @@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void) if (apic->setup_apic_routing) apic->setup_apic_routing(); + + if (x86_platform.apic_post_init) + x86_platform.apic_post_init(); } void __init generic_apic_probe(void) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 3fe986698929..1793dba7a741 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -23,11 +23,6 @@ #include <asm/ipi.h> #include <asm/setup.h> -static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ @@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void) } } - if (is_vsmp_box()) { - /* need to update phys_pkg_id */ - apic->phys_pkg_id = apicid_phys_pkg_id; - } + if (x86_platform.apic_post_init) + x86_platform.apic_post_init(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 659897c00755..77c95c0e1bf7 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) "summit: %s: " fmt, __func__ + #include <linux/mm.h> #include <linux/init.h> #include <asm/io.h> @@ -235,8 +237,8 @@ static int summit_apic_id_registered(void) static void summit_setup_apic_routing(void) { - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); + pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n", + nr_ioapics); } static int summit_cpu_present_to_apicid(int mps_cpu) @@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid) return 1; } -static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; - int cpu, apicid = 0; + unsigned int cpu, apicid = 0; /* * The cpus in the mask must all be on the apic cluster. */ - for_each_cpu(cpu, cpumask) { + for_each_cpu_and(cpu, cpumask, cpu_online_mask) { int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk("%s: Not a valid mask!\n", __func__); - return BAD_APICID; + pr_err("Not a valid mask!\n"); + return -EINVAL; } apicid |= new_apicid; round++; } - return apicid; + if (!round) + return -EINVAL; + *dest_id = apicid; + return 0; } -static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) +static int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask, + unsigned int *apicid) { - int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); cpumask_var_t cpumask; + *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; + return 0; cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = summit_cpu_mask_to_apicid(cpumask); + summit_cpu_mask_to_apicid(cpumask, apicid); free_cpumask_var(cpumask); - return apicid; + return 0; } /* @@ -320,20 +327,6 @@ static int probe_summit(void) return 0; } -static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - #ifdef CONFIG_X86_SUMMIT_NUMA static struct rio_table_hdr *rio_table_hdr; static struct scal_detail *scal_devs[MAX_NUMNODES]; @@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) } } if (i == rio_table_hdr->num_rio_dev) { - printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); + pr_err("Couldn't find owner Cyclone for Winnipeg!\n"); return last_bus; } @@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) } } if (i == rio_table_hdr->num_scal_dev) { - printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); + pr_err("Couldn't find owner Twister for Cyclone!\n"); return last_bus; } @@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) num_buses = 9; break; default: - printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); + pr_info("Unsupported Winnipeg type!\n"); return last_bus; } @@ -411,13 +404,15 @@ static int build_detail_arrays(void) int i, scal_detail_size, rio_detail_size; if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { - printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); + pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n", + MAX_NUMNODES, rio_table_hdr->num_scal_dev); return 0; } switch (rio_table_hdr->version) { default: - printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); + pr_warn("Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); return 0; case 2: scal_detail_size = 11; @@ -462,7 +457,7 @@ void setup_summit(void) offset = *((unsigned short *)(ptr + offset)); } if (!rio_table_hdr) { - printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); + pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n"); return; } @@ -509,7 +504,7 @@ static struct apic apic_summit = { .check_apicid_used = summit_check_apicid_used, .check_apicid_present = summit_check_apicid_present, - .vector_allocation_domain = summit_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = summit_init_apic_ldr, .ioapic_phys_id_map = summit_ioapic_phys_id_map, @@ -527,7 +522,6 @@ static struct apic apic_summit = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, .send_IPI_mask = summit_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ff35cff0e1a7..c88baa4ff0e5 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) } static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } @@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid) { - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); + u32 dest = 0; + u16 cluster; + int i; - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - else - return BAD_APICID; -} + for_each_cpu_and(i, cpumask, andmask) { + if (!cpumask_test_cpu(i, cpu_online_mask)) + continue; + dest = per_cpu(x86_cpu_to_logical_apicid, i); + cluster = x2apic_cluster(i); + break; + } -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; + if (!dest) + return -EINVAL; - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; + for_each_cpu_and(i, cpumask, andmask) { + if (!cpumask_test_cpu(i, cpu_online_mask)) + continue; + if (cluster != x2apic_cluster(i)) + continue; + dest |= per_cpu(x86_cpu_to_logical_apicid, i); } - return per_cpu(x86_cpu_to_logical_apicid, cpu); + *apicid = dest; + + return 0; } static void init_x2apic_ldr(void) @@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void) return 0; } +static const struct cpumask *x2apic_cluster_target_cpus(void) +{ + return cpu_all_mask; +} + +/* + * Each x2apic cluster is an allocation domain. + */ +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + /* + * To minimize vector pressure, default case of boot, device bringup + * etc will use a single cpu for the interrupt destination. + * + * On explicit migration requests coming from irqbalance etc, + * interrupts will be routed to the x2apic cluster (cluster-id + * derived from the first cpu in the mask) members specified + * in the mask. + */ + if (mask == x2apic_cluster_target_cpus()) + cpumask_copy(retmask, cpumask_of(cpu)); + else + cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); +} + static struct apic apic_x2apic_cluster = { .name = "cluster x2apic", @@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = x2apic_target_cpus, + .target_cpus = x2apic_cluster_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = x2apic_vector_allocation_domain, + .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = { .set_apic_id = x2apic_set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c17e982db275..e03a1e180e81 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - - return per_cpu(x86_cpu_to_apicid, cpu); -} - static void init_x2apic_ldr(void) { } @@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = x2apic_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = x2apic_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = { .set_apic_id = x2apic_set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = x2apic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c6d03f7a4401..8cfade9510a4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); -static const struct cpumask *uv_target_cpus(void) -{ - return cpu_online_mask; -} - -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { #ifdef CONFIG_SMP @@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; - else - return BAD_APICID; -} - -static unsigned int +static int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { - int cpu; + int unsigned cpu; /* * We're using fixed IRQ delivery, can only return one phys APIC ID. @@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + + if (likely(cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + return 0; + } + + return -EINVAL; } static unsigned int x2apic_get_apic_id(unsigned long x) @@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = uv_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = uv_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .send_IPI_mask = uv_send_IPI_mask, diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 07b0c0db466c..d65464e43503 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -201,6 +201,8 @@ * http://www.microsoft.com/whdc/archive/amp_12.mspx] */ +#define pr_fmt(fmt) "apm: " fmt + #include <linux/module.h> #include <linux/poll.h> @@ -485,11 +487,11 @@ static void apm_error(char *str, int err) if (error_table[i].key == err) break; if (i < ERROR_COUNT) - printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); + pr_notice("%s: %s\n", str, error_table[i].msg); else if (err < 0) - printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); + pr_notice("%s: linux error code %i\n", str, err); else - printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", + pr_notice("%s: unknown error code %#2.2x\n", str, err); } @@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) static int notified; if (notified++ == 0) - printk(KERN_ERR "apm: an event queue overflowed\n"); + pr_err("an event queue overflowed\n"); if (++as->event_tail >= APM_MAX_EVENTS) as->event_tail = 0; } @@ -1447,7 +1449,7 @@ static void apm_mainloop(void) static int check_apm_user(struct apm_user *as, const char *func) { if (as == NULL || as->magic != APM_BIOS_MAGIC) { - printk(KERN_ERR "apm: %s passed bad filp\n", func); + pr_err("%s passed bad filp\n", func); return 1; } return 0; @@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp) as1 = as1->next) ; if (as1 == NULL) - printk(KERN_ERR "apm: filp not in user list\n"); + pr_err("filp not in user list\n"); else as1->next = as->next; } @@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp) struct apm_user *as; as = kmalloc(sizeof(*as), GFP_KERNEL); - if (as == NULL) { - printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", - sizeof(*as)); + if (as == NULL) return -ENOMEM; - } + as->magic = APM_BIOS_MAGIC; as->event_tail = as->event_head = 0; as->suspends_pending = as->standbys_pending = 0; @@ -2313,16 +2313,16 @@ static int __init apm_init(void) } if (apm_info.disabled) { - printk(KERN_NOTICE "apm: disabled on user request.\n"); + pr_notice("disabled on user request.\n"); return -ENODEV; } if ((num_online_cpus() > 1) && !power_off && !smp) { - printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); + pr_notice("disabled - APM is not SMP safe.\n"); apm_info.disabled = 1; return -ENODEV; } if (!acpi_disabled) { - printk(KERN_NOTICE "apm: overridden by ACPI.\n"); + pr_notice("overridden by ACPI.\n"); apm_info.disabled = 1; return -ENODEV; } @@ -2356,8 +2356,7 @@ static int __init apm_init(void) kapmd_task = kthread_create(apm, NULL, "kapmd"); if (IS_ERR(kapmd_task)) { - printk(KERN_ERR "apm: disabled - Unable to start kernel " - "thread.\n"); + pr_err("disabled - Unable to start kernel thread\n"); err = PTR_ERR(kapmd_task); kapmd_task = NULL; remove_proc_entry("apm", NULL); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 46674fbb62ba..c97bb7b5a9f8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -55,8 +55,8 @@ static void __init check_fpu(void) if (!boot_cpu_data.hard_math) { #ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); + pr_emerg("No coprocessor found and no math emulation present\n"); + pr_emerg("Giving up\n"); for (;;) ; #endif return; @@ -86,7 +86,7 @@ static void __init check_fpu(void) boot_cpu_data.fdiv_bug = fdiv_bug; if (boot_cpu_data.fdiv_bug) - printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); + pr_warn("Hmm, FPU with FDIV bug\n"); } static void __init check_hlt(void) @@ -94,16 +94,16 @@ static void __init check_hlt(void) if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) return; - printk(KERN_INFO "Checking 'hlt' instruction... "); + pr_info("Checking 'hlt' instruction... "); if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); + pr_cont("disabled\n"); return; } halt(); halt(); halt(); halt(); - printk(KERN_CONT "OK.\n"); + pr_cont("OK\n"); } /* @@ -116,7 +116,7 @@ static void __init check_popad(void) #ifndef CONFIG_X86_POPAD_OK int res, inp = (int) &res; - printk(KERN_INFO "Checking for popad bug... "); + pr_info("Checking for popad bug... "); __asm__ __volatile__( "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " : "=&a" (res) @@ -127,9 +127,9 @@ static void __init check_popad(void) * CPU hard. Too bad. */ if (res != 12345678) - printk(KERN_CONT "Buggy.\n"); + pr_cont("Buggy\n"); else - printk(KERN_CONT "OK.\n"); + pr_cont("OK\n"); #endif } @@ -161,7 +161,7 @@ void __init check_bugs(void) { identify_boot_cpu(); #ifndef CONFIG_SMP - printk(KERN_INFO "CPU: "); + pr_info("CPU: "); print_cpu_info(&boot_cpu_data); #endif check_config(); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index da27c5d2168a..5a5a5dc1ff15 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -7,6 +7,9 @@ * Copyright 2008 Intel Corporation * Author: Andi Kleen */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/thread_info.h> #include <linux/capability.h> #include <linux/miscdevice.h> @@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void) cpu_relax(); if (!m->finished && retries >= 4) { - pr_err("MCE: skipping error being logged currently!\n"); + pr_err("skipping error being logged currently!\n"); break; } } @@ -1167,8 +1170,9 @@ int memory_failure(unsigned long pfn, int vector, int flags) { /* mce_severity() should not hand us an ACTION_REQUIRED error */ BUG_ON(flags & MF_ACTION_REQUIRED); - printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" - "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); + pr_err("Uncorrected memory error in page 0x%lx ignored\n" + "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", + pfn); return 0; } @@ -1358,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void) b = cap & MCG_BANKCNT_MASK; if (!banks) - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); + pr_info("CPU supports %d MCE banks\n", b); if (b > MAX_NR_BANKS) { - printk(KERN_WARNING - "MCE: Using only %u machine check banks out of %u\n", + pr_warn("Using only %u machine check banks out of %u\n", MAX_NR_BANKS, b); b = MAX_NR_BANKS; } @@ -1419,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void) static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { - pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); + pr_info("unknown CPU type - not enabling MCE support\n"); return -EOPNOTSUPP; } @@ -1574,7 +1577,7 @@ static void __mcheck_cpu_init_timer(void) /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", + pr_err("CPU#%d: Unexpected int18 (Machine Check)\n", smp_processor_id()); } @@ -1893,8 +1896,7 @@ static int __init mcheck_enable(char *str) get_option(&str, &monarch_timeout); } } else { - printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", - str); + pr_info("mce argument %s ignored. Please use /sys\n", str); return 0; } return 1; diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index bdda2e6c673b..35ffda5d0727 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, /* Compute the maximum size with which we can make a range: */ if (range_startk) - max_align = ffs(range_startk) - 1; + max_align = __ffs(range_startk); else - max_align = 32; + max_align = BITS_PER_LONG - 1; - align = fls(range_sizek) - 1; + align = __fls(range_sizek); if (align > max_align) align = max_align; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 75772ae6c65f..e9fe907cd249 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -361,11 +361,7 @@ static void __init print_mtrr_state(void) } pr_debug("MTRR variable ranges %sabled:\n", mtrr_state.enabled & 2 ? "en" : "dis"); - if (size_or_mask & 0xffffffffUL) - high_width = ffs(size_or_mask & 0xffffffffUL) - 1; - else - high_width = ffs(size_or_mask>>32) + 32 - 1; - high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; + high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a15df4be151f..821d53b696d1 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -374,7 +374,7 @@ struct x86_pmu { /* * Intel DebugStore bits */ - int bts :1, + unsigned int bts :1, bts_active :1, pebs :1, pebs_active :1, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1f4c8add6759..382366977d4c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -5,6 +5,8 @@ * among events on a single PMU. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/stddef.h> #include <linux/types.h> #include <linux/init.h> @@ -136,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +#define SNB_DMND_DATA_RD (1ULL << 0) +#define SNB_DMND_RFO (1ULL << 1) +#define SNB_DMND_IFETCH (1ULL << 2) +#define SNB_DMND_WB (1ULL << 3) +#define SNB_PF_DATA_RD (1ULL << 4) +#define SNB_PF_RFO (1ULL << 5) +#define SNB_PF_IFETCH (1ULL << 6) +#define SNB_LLC_DATA_RD (1ULL << 7) +#define SNB_LLC_RFO (1ULL << 8) +#define SNB_LLC_IFETCH (1ULL << 9) +#define SNB_BUS_LOCKS (1ULL << 10) +#define SNB_STRM_ST (1ULL << 11) +#define SNB_OTHER (1ULL << 15) +#define SNB_RESP_ANY (1ULL << 16) +#define SNB_NO_SUPP (1ULL << 17) +#define SNB_LLC_HITM (1ULL << 18) +#define SNB_LLC_HITE (1ULL << 19) +#define SNB_LLC_HITS (1ULL << 20) +#define SNB_LLC_HITF (1ULL << 21) +#define SNB_LOCAL (1ULL << 22) +#define SNB_REMOTE (0xffULL << 23) +#define SNB_SNP_NONE (1ULL << 31) +#define SNB_SNP_NOT_NEEDED (1ULL << 32) +#define SNB_SNP_MISS (1ULL << 33) +#define SNB_NO_FWD (1ULL << 34) +#define SNB_SNP_FWD (1ULL << 35) +#define SNB_HITM (1ULL << 36) +#define SNB_NON_DRAM (1ULL << 37) + +#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) +#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) +#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) + +#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ + SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ + SNB_HITM) + +#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) +#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) + +#define SNB_L3_ACCESS SNB_RESP_ANY +#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) + +static __initconst const u64 snb_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, + }, + }, +}; + static __initconst const u64 snb_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -233,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids }, [ C(NODE) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, }, @@ -1000,7 +1080,7 @@ static void intel_pmu_reset(void) local_irq_save(flags); - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); + pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); for (idx = 0; idx < x86_pmu.num_counters; idx++) { wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); @@ -1707,7 +1787,7 @@ static __init void intel_clovertown_quirk(void) * But taken together it might just make sense to not enable PEBS on * these chips. */ - printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); + pr_warn("PEBS disabled due to CPU errata\n"); x86_pmu.pebs = 0; x86_pmu.pebs_constraints = NULL; } @@ -1781,8 +1861,8 @@ static __init void intel_arch_events_quirk(void) /* disable event that reported as not presend by cpuid */ for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; - printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", - intel_arch_events_map[bit].name); + pr_warn("CPUID marked event: \'%s\' unavailable\n", + intel_arch_events_map[bit].name); } } @@ -1801,7 +1881,7 @@ static __init void intel_nehalem_quirk(void) intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; ebx.split.no_branch_misses_retired = 0; x86_pmu.events_maskl = ebx.full; - printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); + pr_info("CPU erratum AAJ80 worked around\n"); } } @@ -1962,6 +2042,8 @@ __init int intel_pmu_init(void) case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_snb(); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 19faffc60886..7563fda9f033 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -18,6 +18,7 @@ static struct event_constraint constraint_empty = EVENT_CONSTRAINT(0, 0, 0); DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -33,10 +34,81 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); + +static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + u64 count; + + rdmsrl(event->hw.event_base, count); + + return count; +} + +/* + * generic get constraint function for shared match/mask registers. + */ +static struct event_constraint * +uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + unsigned long flags; + bool ok = false; + + /* + * reg->alloc can be set due to existing state, so for fake box we + * need to ignore this, otherwise we might fail to allocate proper + * fake state for this extra reg constraint. + */ + if (reg1->idx == EXTRA_REG_NONE || + (!uncore_box_is_fake(box) && reg1->alloc)) + return NULL; + + er = &box->shared_regs[reg1->idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || + (er->config1 == reg1->config && er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (ok) { + if (!uncore_box_is_fake(box)) + reg1->alloc = 1; + return NULL; + } + + return &constraint_empty; +} + +static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + /* + * Only put constraint if extra reg was actually allocated. Also + * takes care of event which do not use an extra shared reg. + * + * Also, if this is a fake box we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent box + * state either since it will be thrown out. + */ + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + er = &box->shared_regs[reg1->idx]; + atomic_dec(&er->ref); + reg1->alloc = 0; +} /* Sandy Bridge-EP uncore support */ static struct intel_uncore_type snbep_uncore_cbox; @@ -64,18 +136,15 @@ static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) pci_write_config_dword(pdev, box_ctl, config); } -static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; - pci_write_config_dword(pdev, hwc->config_base, hwc->config | - SNBEP_PMON_CTL_EN); + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } -static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; @@ -83,8 +152,7 @@ static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, pci_write_config_dword(pdev, hwc->config_base, hwc->config); } -static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, - struct perf_event *event) +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; @@ -92,14 +160,15 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + return count; } static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; - pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, - SNBEP_PMON_BOX_CTL_INT); + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); } static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) @@ -112,7 +181,6 @@ static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) rdmsrl(msr, config); config |= SNBEP_PMON_BOX_CTL_FRZ; wrmsrl(msr, config); - return; } } @@ -126,12 +194,10 @@ static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) rdmsrl(msr, config); config &= ~SNBEP_PMON_BOX_CTL_FRZ; wrmsrl(msr, config); - return; } } -static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; @@ -150,68 +216,15 @@ static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, wrmsrl(hwc->config_base, hwc->config); } -static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 count; - - rdmsrl(hwc->event_base, count); - return count; -} - static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) { unsigned msr = uncore_msr_box_ctl(box); + if (msr) wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); } -static struct event_constraint * -snbep_uncore_get_constraint(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct intel_uncore_extra_reg *er; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - unsigned long flags; - bool ok = false; - - if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc)) - return NULL; - - er = &box->shared_regs[reg1->idx]; - raw_spin_lock_irqsave(&er->lock, flags); - if (!atomic_read(&er->ref) || er->config1 == reg1->config) { - atomic_inc(&er->ref); - er->config1 = reg1->config; - ok = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - if (ok) { - if (box->phys_id >= 0) - reg1->alloc = 1; - return NULL; - } - return &constraint_empty; -} - -static void snbep_uncore_put_constraint(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct intel_uncore_extra_reg *er; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - - if (box->phys_id < 0 || !reg1->alloc) - return; - - er = &box->shared_regs[reg1->idx]; - atomic_dec(&er->ref); - reg1->alloc = 0; -} - -static int snbep_uncore_hw_config(struct intel_uncore_box *box, - struct perf_event *event) +static int snbep_uncore_hw_config(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; @@ -221,14 +234,16 @@ static int snbep_uncore_hw_config(struct intel_uncore_box *box, SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; reg1->config = event->attr.config1 & SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; - } else if (box->pmu->type == &snbep_uncore_pcu) { - reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; - reg1->config = event->attr.config1 & - SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; } else { - return 0; + if (box->pmu->type == &snbep_uncore_pcu) { + reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; + reg1->config = event->attr.config1 & SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; + } else { + return 0; + } } reg1->idx = 0; + return 0; } @@ -272,10 +287,19 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = { &format_attr_thresh5.attr, &format_attr_occ_invert.attr, &format_attr_occ_edge.attr, - &format_attr_filter_brand0.attr, - &format_attr_filter_brand1.attr, - &format_attr_filter_brand2.attr, - &format_attr_filter_brand3.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *snbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, NULL, }; @@ -314,15 +338,20 @@ static struct attribute_group snbep_uncore_pcu_format_group = { .attrs = snbep_uncore_pcu_formats_attr, }; +static struct attribute_group snbep_uncore_qpi_format_group = { + .name = "format", + .attrs = snbep_uncore_qpi_formats_attr, +}; + static struct intel_uncore_ops snbep_uncore_msr_ops = { .init_box = snbep_uncore_msr_init_box, .disable_box = snbep_uncore_msr_disable_box, .enable_box = snbep_uncore_msr_enable_box, .disable_event = snbep_uncore_msr_disable_event, .enable_event = snbep_uncore_msr_enable_event, - .read_counter = snbep_uncore_msr_read_counter, - .get_constraint = snbep_uncore_get_constraint, - .put_constraint = snbep_uncore_put_constraint, + .read_counter = uncore_msr_read_counter, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, .hw_config = snbep_uncore_hw_config, }; @@ -485,8 +514,13 @@ static struct intel_uncore_type snbep_uncore_qpi = { .num_counters = 4, .num_boxes = 2, .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &snbep_uncore_pci_ops, .event_descs = snbep_uncore_qpi_events, - SNBEP_UNCORE_PCI_COMMON_INIT(), + .format_group = &snbep_uncore_qpi_format_group, }; @@ -603,10 +637,8 @@ static void snbep_pci2phy_map_init(void) } /* end of Sandy Bridge-EP uncore support */ - /* Sandy Bridge uncore support */ -static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -616,20 +648,11 @@ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); } -static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) { wrmsrl(event->hw.config_base, 0); } -static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box, - struct perf_event *event) -{ - u64 count; - rdmsrl(event->hw.event_base, count); - return count; -} - static void snb_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) { @@ -648,15 +671,15 @@ static struct attribute *snb_uncore_formats_attr[] = { }; static struct attribute_group snb_uncore_format_group = { - .name = "format", - .attrs = snb_uncore_formats_attr, + .name = "format", + .attrs = snb_uncore_formats_attr, }; static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, - .read_counter = snb_uncore_msr_read_counter, + .read_counter = uncore_msr_read_counter, }; static struct event_constraint snb_uncore_cbox_constraints[] = { @@ -697,12 +720,10 @@ static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, - NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); } -static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -744,7 +765,7 @@ static struct intel_uncore_ops nhm_uncore_msr_ops = { .enable_box = nhm_uncore_msr_enable_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = nhm_uncore_msr_enable_event, - .read_counter = snb_uncore_msr_read_counter, + .read_counter = uncore_msr_read_counter, }; static struct intel_uncore_type nhm_uncore = { @@ -769,8 +790,1041 @@ static struct intel_uncore_type *nhm_msr_uncores[] = { }; /* end of Nehalem uncore support */ -static void uncore_assign_hw_event(struct intel_uncore_box *box, - struct perf_event *event, int idx) +/* Nehalem-EX uncore support */ +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); +DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); +DEFINE_UNCORE_FORMAT_ATTR(mm_cfg, mm_cfg, "config:63"); +DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); + +static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); +} + +static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config &= ~((1ULL << uncore_num_counters(box)) - 1); + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config |= (1ULL << uncore_num_counters(box)) - 1; + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx >= UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); + else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + else + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +#define NHMEX_UNCORE_OPS_COMMON_INIT() \ + .init_box = nhmex_uncore_msr_init_box, \ + .disable_box = nhmex_uncore_msr_disable_box, \ + .enable_box = nhmex_uncore_msr_enable_box, \ + .disable_event = nhmex_uncore_msr_disable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops nhmex_uncore_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_uncore_msr_enable_event, +}; + +static struct attribute *nhmex_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_edge.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_ubox_format_group = { + .name = "format", + .attrs = nhmex_uncore_ubox_formats_attr, +}; + +static struct intel_uncore_type nhmex_uncore_ubox = { + .name = "ubox", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, + .perf_ctr = NHMEX_U_MSR_PMON_CTR, + .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_ubox_format_group +}; + +static struct attribute *nhmex_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_cbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_cbox_formats_attr, +}; + +static struct intel_uncore_type nhmex_uncore_cbox = { + .name = "cbox", + .num_counters = 6, + .num_boxes = 8, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, + .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_C_MSR_OFFSET, + .pair_ctr_ctl = 1, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static struct uncore_event_desc nhmex_uncore_wbox_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type nhmex_uncore_wbox = { + .name = "wbox", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_W_MSR_PMON_CNT0, + .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, + .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, + .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, + .pair_ctr_ctl = 1, + .event_descs = nhmex_uncore_wbox_events, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int ctr, ev_sel; + + ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> + NHMEX_B_PMON_CTR_SHIFT; + ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> + NHMEX_B_PMON_CTL_EV_SEL_SHIFT; + + /* events that do not use the match/mask registers */ + if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || + (ctr == 2 && ev_sel != 0x4) || ctr == 3) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_B0_MSR_MATCH; + else + reg1->reg = NHMEX_B1_MSR_MATCH; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, reg1->config); + wrmsrl(reg1->reg + 1, reg2->config); + } + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); +} + +/* + * The Bbox has 4 counters, but each counter monitors different events. + * Use bits 6-7 in the event config to select counter. + */ +static struct event_constraint nhmex_uncore_bbox_constraints[] = { + EVENT_CONSTRAINT(0 , 1, 0xc0), + EVENT_CONSTRAINT(0x40, 2, 0xc0), + EVENT_CONSTRAINT(0x80, 4, 0xc0), + EVENT_CONSTRAINT(0xc0, 8, 0xc0), + EVENT_CONSTRAINT_END, +}; + +static struct attribute *nhmex_uncore_bbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_counter.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_bbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_bbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_bbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_bbox_msr_enable_event, + .hw_config = nhmex_bbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_bbox = { + .name = "bbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_B0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, + .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_B_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .constraints = nhmex_uncore_bbox_constraints, + .ops = &nhmex_uncore_bbox_ops, + .format_group = &nhmex_uncore_bbox_format_group +}; + +static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + + if (event->attr.config & NHMEX_S_PMON_MM_CFG_EN) { + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + } else { + reg1->config = ~0ULL; + reg2->config = ~0ULL; + } + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_S0_MSR_MM_CFG; + else + reg1->reg = NHMEX_S1_MSR_MM_CFG; + + reg1->idx = 0; + + return 0; +} + +static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + wrmsrl(reg1->reg, 0); + if (reg1->config != ~0ULL || reg2->config != ~0ULL) { + wrmsrl(reg1->reg + 1, reg1->config); + wrmsrl(reg1->reg + 2, reg2->config); + wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); + } + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); +} + +static struct attribute *nhmex_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_mm_cfg.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_sbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_sbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_sbox_msr_enable_event, + .hw_config = nhmex_sbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_S0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_S_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .ops = &nhmex_uncore_sbox_ops, + .format_group = &nhmex_uncore_sbox_format_group +}; + +enum { + EXTRA_REG_NHMEX_M_FILTER, + EXTRA_REG_NHMEX_M_DSP, + EXTRA_REG_NHMEX_M_ISS, + EXTRA_REG_NHMEX_M_MAP, + EXTRA_REG_NHMEX_M_MSC_THR, + EXTRA_REG_NHMEX_M_PGT, + EXTRA_REG_NHMEX_M_PLD, + EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, +}; + +static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { + MBOX_INC_SEL_EXTAR_REG(0x0, DSP), + MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x9, ISS), + /* event 0xa uses two extra registers */ + MBOX_INC_SEL_EXTAR_REG(0xa, ISS), + MBOX_INC_SEL_EXTAR_REG(0xa, PLD), + MBOX_INC_SEL_EXTAR_REG(0xb, PLD), + /* events 0xd ~ 0x10 use the same extra register */ + MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x16, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), + EVENT_EXTRA_END +}; + +static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + bool ret = false; + u64 mask; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || er->config == config) { + atomic_inc(&er->ref); + er->config = config; + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; + } + /* + * The ZDP_CTL_FVC MSR has 4 fields which are used to control + * events 0xd ~ 0x10. Besides these 4 fields, there are additional + * fields which are shared. + */ + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (WARN_ON_ONCE(idx >= 4)) + return false; + + /* mask of the shared fields */ + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + + raw_spin_lock_irqsave(&er->lock, flags); + /* add mask of the non-shared field if it's in use */ + if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) + mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + + if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | + NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + er->config &= ~mask; + er->config |= (config & mask); + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; +} + +static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + atomic_dec(&er->ref); + return; + } + + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + atomic_sub(1 << (idx * 8), &er->ref); +} + +u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); + u64 config = reg1->config; + + /* get the non-shared control bits and shift them */ + idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + if (new_idx > orig_idx) { + idx = new_idx - orig_idx; + config <<= 3 * idx; + } else { + idx = orig_idx - new_idx; + config >>= 3 * idx; + } + + /* add the shared control bits back */ + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + if (modify) { + /* adjust the main event selector */ + if (new_idx > orig_idx) + hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + else + hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + reg1->config = config; + reg1->idx = ~0xff | new_idx; + } + return config; +} + +static struct event_constraint * +nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int i, idx[2], alloc = 0; + u64 config1 = reg1->config; + + idx[0] = __BITS_VALUE(reg1->idx, 0, 8); + idx[1] = __BITS_VALUE(reg1->idx, 1, 8); +again: + for (i = 0; i < 2; i++) { + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + idx[i] = 0xff; + + if (idx[i] == 0xff) + continue; + + if (!nhmex_mbox_get_shared_reg(box, idx[i], + __BITS_VALUE(config1, i, 32))) + goto fail; + alloc |= (0x1 << i); + } + + /* for the match/mask registers */ + if ((uncore_box_is_fake(box) || !reg2->alloc) && + !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) + goto fail; + + /* + * If it's a fake box -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + */ + if (!uncore_box_is_fake(box)) { + if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) + nhmex_mbox_alter_er(event, idx[0], true); + reg1->alloc |= alloc; + reg2->alloc = 1; + } + return NULL; +fail: + if (idx[0] != 0xff && !(alloc & 0x1) && + idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + /* + * events 0xd ~ 0x10 are functional identical, but are + * controlled by different fields in the ZDP_CTL_FVC + * register. If we failed to take one field, try the + * rest 3 choices. + */ + BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); + idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + idx[0] = (idx[0] + 1) % 4; + idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { + config1 = nhmex_mbox_alter_er(event, idx[0], false); + goto again; + } + } + + if (alloc & 0x1) + nhmex_mbox_put_shared_reg(box, idx[0]); + if (alloc & 0x2) + nhmex_mbox_put_shared_reg(box, idx[1]); + return &constraint_empty; +} + +static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + + if (uncore_box_is_fake(box)) + return; + + if (reg1->alloc & 0x1) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); + if (reg1->alloc & 0x2) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); + reg1->alloc = 0; + + if (reg2->alloc) { + nhmex_mbox_put_shared_reg(box, reg2->idx); + reg2->alloc = 0; + } +} + +static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) +{ + if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return er->idx; + return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; +} + +static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_type *type = box->pmu->type; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + struct extra_reg *er; + unsigned msr; + int reg_idx = 0; + + if (WARN_ON_ONCE(reg1->idx != -1)) + return -EINVAL; + /* + * The mbox events may require 2 extra MSRs at the most. But only + * the lower 32 bits in these MSRs are significant, so we can use + * config1 to pass two MSRs' config. + */ + for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + if (er->idx == __BITS_VALUE(reg1->idx, 0, 8) || + er->idx == __BITS_VALUE(reg1->idx, 1, 8)) + continue; + if (WARN_ON_ONCE(reg_idx >= 2)) + return -EINVAL; + + msr = er->msr + type->msr_offset * box->pmu->pmu_idx; + if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) + return -EINVAL; + + /* always use the 32~63 bits to pass the PLD config */ + if (er->idx == EXTRA_REG_NHMEX_M_PLD) + reg_idx = 1; + + reg1->idx &= ~(0xff << (reg_idx * 8)); + reg1->reg &= ~(0xffff << (reg_idx * 16)); + reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); + reg1->reg |= msr << (reg_idx * 16); + reg1->config = event->attr.config1; + reg_idx++; + } + /* use config2 to pass the filter config */ + reg2->idx = EXTRA_REG_NHMEX_M_FILTER; + if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) + reg2->config = event->attr.config2; + else + reg2->config = ~0ULL; + if (box->pmu->pmu_idx == 0) + reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; + else + reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; + + return 0; +} + +static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return box->shared_regs[idx].config; + + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + return config; +} + +static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx; + + idx = __BITS_VALUE(reg1->idx, 0, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), + nhmex_mbox_shared_reg_config(box, idx)); + idx = __BITS_VALUE(reg1->idx, 1, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), + nhmex_mbox_shared_reg_config(box, idx)); + + wrmsrl(reg2->reg, 0); + if (reg2->config != ~0ULL) { + wrmsrl(reg2->reg + 1, + reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); + wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & + (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); + wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); + } + + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); +DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); +DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); +DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); +DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); +DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cfg, filter_cfg, "config2:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); +DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); + +static struct attribute *nhmex_uncore_mbox_formats_attr[] = { + &format_attr_count_mode.attr, + &format_attr_storage_mode.attr, + &format_attr_wrap_mode.attr, + &format_attr_flag_mode.attr, + &format_attr_inc_sel.attr, + &format_attr_set_flag_sel.attr, + &format_attr_filter_cfg.attr, + &format_attr_filter_match.attr, + &format_attr_filter_mask.attr, + &format_attr_dsp.attr, + &format_attr_thr.attr, + &format_attr_fvc.attr, + &format_attr_pgt.attr, + &format_attr_map.attr, + &format_attr_iss.attr, + &format_attr_pld.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_mbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_mbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_mbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_mbox_msr_enable_event, + .hw_config = nhmex_mbox_hw_config, + .get_constraint = nhmex_mbox_get_constraint, + .put_constraint = nhmex_mbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_mbox = { + .name = "mbox", + .num_counters = 6, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_M0_MSR_PMU_CTL0, + .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, + .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_M_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 8, + .event_descs = nhmex_uncore_mbox_events, + .ops = &nhmex_uncore_mbox_ops, + .format_group = &nhmex_uncore_mbox_format_group, +}; + +void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int port; + + /* adjust the main event selector */ + if (reg1->idx % 2) { + reg1->idx--; + hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } else { + reg1->idx++; + hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } + + /* adjust address or config of extra register */ + port = reg1->idx / 6 + box->pmu->pmu_idx * 4; + switch (reg1->idx % 6) { + case 0: + reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port); + break; + case 1: + reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port); + break; + case 2: + /* the 8~15 bits to the 0~7 bits */ + reg1->config >>= 8; + break; + case 3: + /* the 0~7 bits to the 8~15 bits */ + reg1->config <<= 8; + break; + case 4: + reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port); + break; + case 5: + reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port); + break; + }; +} + +/* + * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. + * An event set consists of 6 events, the 3rd and 4th events in + * an event set use the same extra register. So an event set uses + * 5 extra registers. + */ +static struct event_constraint * +nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + struct intel_uncore_extra_reg *er; + unsigned long flags; + int idx, er_idx; + u64 config1; + bool ok = false; + + if (!uncore_box_is_fake(box) && reg1->alloc) + return NULL; + + idx = reg1->idx % 6; + config1 = reg1->config; +again: + er_idx = idx; + /* the 3rd and 4th events use the same extra register */ + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (idx < 2) { + if (!atomic_read(&er->ref) || er->config == reg1->config) { + atomic_inc(&er->ref); + er->config = reg1->config; + ok = true; + } + } else if (idx == 2 || idx == 3) { + /* + * these two events use different fields in a extra register, + * the 0~7 bits and the 8~15 bits respectively. + */ + u64 mask = 0xff << ((idx - 2) * 8); + if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || + !((er->config ^ config1) & mask)) { + atomic_add(1 << ((idx - 2) * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + } else { + if (!atomic_read(&er->ref) || + (er->config == (hwc->config >> 32) && + er->config1 == reg1->config && + er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config = (hwc->config >> 32); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + /* + * The Rbox events are always in pairs. The paired + * events are functional identical, but use different + * extra registers. If we failed to take an extra + * register, try the alternative. + */ + if (idx % 2) + idx--; + else + idx++; + if (idx != reg1->idx % 6) { + if (idx == 2) + config1 >>= 8; + else if (idx == 3) + config1 <<= 8; + goto again; + } + } else { + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx % 6) + nhmex_rbox_alter_er(box, event); + reg1->alloc = 1; + } + return NULL; + } + return &constraint_empty; +} + +static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + int idx, er_idx; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + idx = reg1->idx % 6; + er_idx = idx; + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + if (idx == 2 || idx == 3) + atomic_sub(1 << ((idx - 2) * 8), &er->ref); + else + atomic_dec(&er->ref); + + reg1->alloc = 0; +} + +static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int port, idx; + + idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> + NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + if (idx >= 0x18) + return -EINVAL; + + reg1->idx = idx; + reg1->config = event->attr.config1; + + port = idx / 6 + box->pmu->pmu_idx * 4; + idx %= 6; + switch (idx) { + case 0: + reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port); + break; + case 1: + reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port); + break; + case 2: + case 3: + reg1->reg = NHMEX_R_MSR_PORTN_QLX_CFG(port); + break; + case 4: + case 5: + if (idx == 4) + reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port); + else + reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port); + reg2->config = event->attr.config2; + hwc->config |= event->attr.config & (~0ULL << 32); + break; + }; + return 0; +} + +static u64 nhmex_rbox_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + er = &box->shared_regs[idx]; + + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + + return config; +} + +static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx, er_idx; + + idx = reg1->idx % 6; + er_idx = idx; + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + switch (idx) { + case 0: + case 1: + wrmsrl(reg1->reg, reg1->config); + break; + case 2: + case 3: + wrmsrl(reg1->reg, nhmex_rbox_shared_reg_config(box, er_idx)); + break; + case 4: + case 5: + wrmsrl(reg1->reg, reg1->config); + wrmsrl(reg1->reg + 1, hwc->config >> 32); + wrmsrl(reg1->reg + 2, reg2->config); + break; + }; + + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); +} + +DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); +DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); + +static struct attribute *nhmex_uncore_rbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_xbr_mm_cfg.attr, + &format_attr_xbr_match.attr, + &format_attr_xbr_mask.attr, + &format_attr_qlx_cfg.attr, + &format_attr_iperf_cfg.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_rbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_rbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_rbox_events[] = { + INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), + INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_rbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_rbox_msr_enable_event, + .hw_config = nhmex_rbox_hw_config, + .get_constraint = nhmex_rbox_get_constraint, + .put_constraint = nhmex_rbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_rbox = { + .name = "rbox", + .num_counters = 8, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_R_MSR_PMON_CTL0, + .perf_ctr = NHMEX_R_MSR_PMON_CNT0, + .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_R_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 20, + .event_descs = nhmex_uncore_rbox_events, + .ops = &nhmex_uncore_rbox_ops, + .format_group = &nhmex_uncore_rbox_format_group +}; + +static struct intel_uncore_type *nhmex_msr_uncores[] = { + &nhmex_uncore_ubox, + &nhmex_uncore_cbox, + &nhmex_uncore_bbox, + &nhmex_uncore_sbox, + &nhmex_uncore_mbox, + &nhmex_uncore_rbox, + &nhmex_uncore_wbox, + NULL, +}; +/* end of Nehalem-EX uncore support */ + +static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) { struct hw_perf_event *hwc = &event->hw; @@ -787,8 +1841,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, hwc->event_base = uncore_perf_ctr(box, hwc->idx); } -static void uncore_perf_event_update(struct intel_uncore_box *box, - struct perf_event *event) +static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) { u64 prev_count, new_count, delta; int shift; @@ -858,14 +1911,12 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, - int cpu) +struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) { struct intel_uncore_box *box; int i, size; - size = sizeof(*box) + type->num_shared_regs * - sizeof(struct intel_uncore_extra_reg); + size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); if (!box) @@ -915,12 +1966,11 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) * perf core schedules event on the basis of cpu, uncore events are * collected by one of the cpus inside a physical package. */ - return uncore_pmu_to_box(uncore_event_to_pmu(event), - smp_processor_id()); + return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); } -static int uncore_collect_events(struct intel_uncore_box *box, - struct perf_event *leader, bool dogrp) +static int +uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { struct perf_event *event; int n, max_count; @@ -952,8 +2002,7 @@ static int uncore_collect_events(struct intel_uncore_box *box, } static struct event_constraint * -uncore_get_event_constraint(struct intel_uncore_box *box, - struct perf_event *event) +uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) { struct intel_uncore_type *type = box->pmu->type; struct event_constraint *c; @@ -977,15 +2026,13 @@ uncore_get_event_constraint(struct intel_uncore_box *box, return &type->unconstrainted; } -static void uncore_put_event_constraint(struct intel_uncore_box *box, - struct perf_event *event) +static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) { if (box->pmu->type->ops->put_constraint) box->pmu->type->ops->put_constraint(box, event); } -static int uncore_assign_events(struct intel_uncore_box *box, - int assign[], int n) +static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) { unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; @@ -1407,8 +2454,7 @@ static bool pcidrv_registered; /* * add a pci uncore device */ -static int __devinit uncore_pci_add(struct intel_uncore_type *type, - struct pci_dev *pdev) +static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) { struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; @@ -1485,6 +2531,7 @@ static int __devinit uncore_pci_probe(struct pci_dev *pdev, struct intel_uncore_type *type; type = (struct intel_uncore_type *)id->driver_data; + return uncore_pci_add(type, pdev); } @@ -1612,8 +2659,8 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) return 0; } -static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores, - int old_cpu, int new_cpu) +static void __cpuinit +uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) { struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; @@ -1694,8 +2741,8 @@ static void __cpuinit uncore_event_init_cpu(int cpu) uncore_change_context(pci_uncores, -1, cpu); } -static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +static int + __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; @@ -1732,12 +2779,12 @@ static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, } static struct notifier_block uncore_cpu_nb __cpuinitdata = { - .notifier_call = uncore_cpu_notifier, + .notifier_call = uncore_cpu_notifier, /* * to migrate uncore events, our notifier should be executed * before perf core's notifier. */ - .priority = CPU_PRI_PERF + 1, + .priority = CPU_PRI_PERF + 1, }; static void __init uncore_cpu_setup(void *dummy) @@ -1767,6 +2814,9 @@ static int __init uncore_cpu_init(void) snbep_uncore_cbox.num_boxes = max_cores; msr_uncores = snbep_msr_uncores; break; + case 46: + msr_uncores = nhmex_msr_uncores; + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index b13e9ea81def..f3851892e077 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -5,8 +5,6 @@ #include "perf_event.h" #define UNCORE_PMU_NAME_LEN 32 -#define UNCORE_BOX_HASH_SIZE 8 - #define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) #define UNCORE_FIXED_EVENT 0xff @@ -115,6 +113,10 @@ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) +#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + /* SNB-EP pci control register */ #define SNBEP_PCI_PMON_BOX_CTL 0xf4 #define SNBEP_PCI_PMON_CTL0 0xd8 @@ -158,6 +160,193 @@ #define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc #define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd +/* NHM-EX event control */ +#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff +#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 +#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) +#define NHMEX_PMON_CTL_EDGE_DET (1 << 18) +#define NHMEX_PMON_CTL_PMI_EN (1 << 20) +#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) +#define NHMEX_PMON_CTL_INVERT (1 << 23) +#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 +#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_UMASK_MASK | \ + NHMEX_PMON_CTL_EDGE_DET | \ + NHMEX_PMON_CTL_INVERT | \ + NHMEX_PMON_CTL_TRESH_MASK) + +/* NHM-EX Ubox */ +#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define NHMEX_U_MSR_PMON_CTR 0xc11 +#define NHMEX_U_MSR_PMON_EV_SEL 0xc10 + +#define NHMEX_U_PMON_GLOBAL_EN (1 << 0) +#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e +#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) +#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) +#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) + +#define NHMEX_U_PMON_RAW_EVENT_MASK \ + (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_EDGE_DET) + +/* NHM-EX Cbox */ +#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 +#define NHMEX_C0_MSR_PMON_CTR0 0xd11 +#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 +#define NHMEX_C_MSR_OFFSET 0x20 + +/* NHM-EX Bbox */ +#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 +#define NHMEX_B0_MSR_PMON_CTR0 0xc31 +#define NHMEX_B0_MSR_PMON_CTL0 0xc30 +#define NHMEX_B_MSR_OFFSET 0x40 +#define NHMEX_B0_MSR_MATCH 0xe45 +#define NHMEX_B0_MSR_MASK 0xe46 +#define NHMEX_B1_MSR_MATCH 0xe4d +#define NHMEX_B1_MSR_MASK 0xe4e + +#define NHMEX_B_PMON_CTL_EN (1 << 0) +#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_B_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_B_PMON_CTR_SHIFT 6 +#define NHMEX_B_PMON_CTR_MASK \ + (0x3 << NHMEX_B_PMON_CTR_SHIFT) +#define NHMEX_B_PMON_RAW_EVENT_MASK \ + (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ + NHMEX_B_PMON_CTR_MASK) + +/* NHM-EX Sbox */ +#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 +#define NHMEX_S0_MSR_PMON_CTR0 0xc51 +#define NHMEX_S0_MSR_PMON_CTL0 0xc50 +#define NHMEX_S_MSR_OFFSET 0x80 +#define NHMEX_S0_MSR_MM_CFG 0xe48 +#define NHMEX_S0_MSR_MATCH 0xe49 +#define NHMEX_S0_MSR_MASK 0xe4a +#define NHMEX_S1_MSR_MM_CFG 0xe58 +#define NHMEX_S1_MSR_MATCH 0xe59 +#define NHMEX_S1_MSR_MASK 0xe5a + +#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) + +/* NHM-EX Mbox */ +#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 +#define NHMEX_M0_MSR_PMU_DSP 0xca5 +#define NHMEX_M0_MSR_PMU_ISS 0xca6 +#define NHMEX_M0_MSR_PMU_MAP 0xca7 +#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 +#define NHMEX_M0_MSR_PMU_PGT 0xca9 +#define NHMEX_M0_MSR_PMU_PLD 0xcaa +#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab +#define NHMEX_M0_MSR_PMU_CTL0 0xcb0 +#define NHMEX_M0_MSR_PMU_CNT0 0xcb1 +#define NHMEX_M_MSR_OFFSET 0x40 +#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 +#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c + +#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) +#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL +#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL +#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 + +#define NHMEX_M_PMON_CTL_EN (1 << 0) +#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) +#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 +#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 +#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) +#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) +#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 +#define NHMEX_M_PMON_CTL_INC_SEL_MASK \ + (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ + (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) +#define NHMEX_M_PMON_RAW_EVENT_MASK \ + (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ + NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ + NHMEX_M_PMON_CTL_WRAP_MODE | \ + NHMEX_M_PMON_CTL_FLAG_MODE | \ + NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) + + +#define NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK 0x1f +#define NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK (0x7 << 5) +#define NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK (0x7 << 8) +#define NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR (1 << 23) +#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK \ + (NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK | \ + NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK | \ + NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK | \ + NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR) +#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) + +/* + * use the 9~13 bits to select event If the 7th bit is not set, + * otherwise use the 19~21 bits to select event. + */ +#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_EXTAR_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) +#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_SET_FLAG_SEL_MASK, \ + (u64)-1, NHMEX_M_##r) + +/* NHM-EX Rbox */ +#define NHMEX_R_MSR_GLOBAL_CTL 0xe00 +#define NHMEX_R_MSR_PMON_CTL0 0xe10 +#define NHMEX_R_MSR_PMON_CNT0 0xe11 +#define NHMEX_R_MSR_OFFSET 0x20 + +#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ + ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) +#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ + (((n) < 4 ? 0 : 0x10) + (n) * 4) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ + (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ + (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) + +#define NHMEX_R_PMON_CTL_EN (1 << 0) +#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_R_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) +#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK + +/* NHM-EX Wbox */ +#define NHMEX_W_MSR_GLOBAL_CTL 0xc80 +#define NHMEX_W_MSR_PMON_CNT0 0xc90 +#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 +#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 +#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 + +#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) + struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; @@ -178,6 +367,7 @@ struct intel_uncore_type { unsigned msr_offset; unsigned num_shared_regs:8; unsigned single_fixed:1; + unsigned pair_ctr_ctl:1; struct event_constraint unconstrainted; struct event_constraint *constraints; struct intel_uncore_pmu *pmus; @@ -213,7 +403,7 @@ struct intel_uncore_pmu { struct intel_uncore_extra_reg { raw_spinlock_t lock; - u64 config1; + u64 config, config1, config2; atomic_t ref; }; @@ -323,14 +513,16 @@ unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) static inline unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) { - return idx + box->pmu->type->event_ctl + + return box->pmu->type->event_ctl + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + box->pmu->type->msr_offset * box->pmu->pmu_idx; } static inline unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) { - return idx + box->pmu->type->perf_ctr + + return box->pmu->type->perf_ctr + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + box->pmu->type->msr_offset * box->pmu->pmu_idx; } @@ -422,3 +614,8 @@ static inline void uncore_box_init(struct intel_uncore_box *box) box->pmu->type->ops->init_box(box); } } + +static inline bool uncore_box_is_fake(struct intel_uncore_box *box) +{ + return (box->phys_id < 0); +} diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 571246d81edf..ae42418bc50f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -27,8 +27,8 @@ static int die_counter; void printk_address(unsigned long address, int reliable) { - printk(" [<%p>] %s%pB\n", (void *) address, - reliable ? "" : "? ", (void *) address); + pr_cont(" [<%p>] %s%pB\n", + (void *)address, reliable ? "" : "? ", (void *)address); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; + print_modules(); show_regs(regs); #ifdef CONFIG_X86_32 if (user_mode_vm(regs)) { diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0b1d783daab..1038a417ea53 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (kstack_end(stack)) break; if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk(KERN_CONT "\n"); - printk(KERN_CONT " %08lx", *stack++); + pr_cont("\n"); + pr_cont(" %08lx", *stack++); touch_nmi_watchdog(); } - printk(KERN_CONT "\n"); + pr_cont("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs) { int i; - print_modules(); __show_regs(regs, !user_mode_vm(regs)); - printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - current_thread_info(), current, task_thread_info(current)); + pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", + TASK_COMM_LEN, current->comm, task_pid_nr(current), + current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs) unsigned char c; u8 *ip; - printk(KERN_EMERG "Stack:\n"); + pr_emerg("Stack:\n"); show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); - printk(KERN_EMERG "Code: "); + pr_emerg("Code:"); ip = (u8 *)regs->ip - code_prologue; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { @@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs) for (i = 0; i < code_len; i++, ip++) { if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - printk(KERN_CONT " Bad EIP value."); + pr_cont(" Bad EIP value."); break; } if (ip == (u8 *)regs->ip) - printk(KERN_CONT "<%02x> ", c); + pr_cont(" <%02x>", c); else - printk(KERN_CONT "%02x ", c); + pr_cont(" %02x", c); } } - printk(KERN_CONT "\n"); + pr_cont("\n"); } int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 791b76122aa8..b653675d5288 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (stack >= irq_stack && stack <= irq_stack_end) { if (stack == irq_stack_end) { stack = (unsigned long *) (irq_stack_end[-1]); - printk(KERN_CONT " <EOI> "); + pr_cont(" <EOI> "); } } else { if (((long) stack & (THREAD_SIZE-1)) == 0) break; } if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk(KERN_CONT "\n"); - printk(KERN_CONT " %016lx", *stack++); + pr_cont("\n"); + pr_cont(" %016lx", *stack++); touch_nmi_watchdog(); } preempt_enable(); - printk(KERN_CONT "\n"); + pr_cont("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs) sp = regs->sp; printk("CPU %d ", cpu); - print_modules(); __show_regs(regs, 1); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); + printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", + cur->comm, cur->pid, task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the @@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs) for (i = 0; i < code_len; i++, ip++) { if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - printk(KERN_CONT " Bad RIP value."); + pr_cont(" Bad RIP value."); break; } if (ip == (u8 *)regs->ip) - printk(KERN_CONT "<%02x> ", c); + pr_cont("<%02x> ", c); else - printk(KERN_CONT "%02x ", c); + pr_cont("%02x ", c); } } - printk(KERN_CONT "\n"); + pr_cont("\n"); } int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38308fa7d7e5..459d4a0dca8d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1834,10 +1834,30 @@ end_repeat_nmi: */ call save_paranoid DEFAULT_FRAME 0 + + /* + * Save off the CR2 register. If we take a page fault in the NMI then + * it could corrupt the CR2 value. If the NMI preempts a page fault + * handler before it was able to read the CR2 register, and then the + * NMI itself takes a page fault, the page fault that was preempted + * will read the information from the NMI page fault and not the + * origin fault. Save it off and restore it if it changes. + * Use the r12 callee-saved register. + */ + movq %cr2, %r12 + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi + + /* Did the NMI take a page fault? Restore cr2 if it did */ + movq %cr2, %rcx + cmpq %rcx, %r12 + je 1f + movq %r12, %cr2 +1: + testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3dafc6003b7c..1f5f1d5d2a02 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -294,9 +294,9 @@ void fixup_irqs(void) raw_spin_unlock(&desc->lock); if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); + pr_notice("Broke affinity for irq %i\n", irq); else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); + pr_notice("Cannot set affinity for irq %i\n", irq); } /* diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 1649cf899ad6..4873e62db6a1 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -514,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = { #ifdef MODULE /* Autoload on Intel and AMD systems */ -static const struct x86_cpu_id microcode_id[] = { +static const struct x86_cpu_id __initconst microcode_id[] = { #ifdef CONFIG_MICROCODE_INTEL { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, #endif diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f21fd94ac897..202494d2ec6e 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/moduleloader.h> #include <linux/elf.h> #include <linux/vmalloc.h> @@ -30,9 +33,14 @@ #include <asm/pgtable.h> #if 0 -#define DEBUGP printk +#define DEBUGP(fmt, ...) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__) #else -#define DEBUGP(fmt...) +#define DEBUGP(fmt, ...) \ +do { \ + if (0) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +} while (0) #endif void *module_alloc(unsigned long size) @@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs, Elf32_Sym *sym; uint32_t *location; - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); + DEBUGP("Applying relocate section %u to %u\n", + relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -77,7 +85,7 @@ int apply_relocate(Elf32_Shdr *sechdrs, *location += sym->st_value - (uint32_t)location; break; default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", + pr_err("%s: Unknown relocation: %u\n", me->name, ELF32_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, void *loc; u64 val; - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); + DEBUGP("Applying relocate section %u to %u\n", + relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, + ELF64_R_SYM(rel[i].r_info); DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)loc); val = sym->st_value + rel[i].r_addend; @@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, #endif break; default: - printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", + pr_err("%s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", + pr_err("overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); - printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", + pr_err("`%s' likely not compiled with -mcmodel=kernel\n", me->name); return -ENOEXEC; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457be..f84f5c57de35 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) #ifdef CONFIG_X86_32 /* * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C. Simply have 3 states - * the NMI can be in. + * add a workaround to the iret problem in C (preventing nested + * NMIs if an NMI takes a trap). Simply have 3 states the NMI + * can be in: * * 1) not running * 2) executing @@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) * If an NMI hits a breakpoint that executes an iret, another * NMI can preempt it. We do not want to allow this new NMI * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the first NMI will perform - * an cmpxchg on the state, and if it doesn't successfully - * reset the state to "not running" it will restart the next - * NMI. + * We set the state to "latched", and the exit of the first NMI will + * perform a dec_return, if the result is zero (NOT_RUNNING), then + * it will simply exit the NMI handler. If not, the dec_return + * would have set the state to NMI_EXECUTING (what we want it to + * be when we are running). In this case, we simply jump back + * to rerun the NMI handler again, and restart the 'latched' NMI. + * + * No trap (breakpoint or page fault) should be hit before nmi_restart, + * thus there is no race between the first check of state for NOT_RUNNING + * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs + * at this point. + * + * In case the NMI takes a page fault, we need to save off the CR2 + * because the NMI could have preempted another page fault and corrupt + * the CR2 that is about to be read. As nested NMIs must be restarted + * and they can not take breakpoints or page faults, the update of the + * CR2 must be done before converting the nmi state back to NOT_RUNNING. + * Otherwise, there would be a race of another nested NMI coming in + * after setting state to NOT_RUNNING but before updating the nmi_cr2. */ enum nmi_states { - NMI_NOT_RUNNING, + NMI_NOT_RUNNING = 0, NMI_EXECUTING, NMI_LATCHED, }; static DEFINE_PER_CPU(enum nmi_states, nmi_state); +static DEFINE_PER_CPU(unsigned long, nmi_cr2); #define nmi_nesting_preprocess(regs) \ do { \ - if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ - __get_cpu_var(nmi_state) = NMI_LATCHED; \ + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ + this_cpu_write(nmi_state, NMI_LATCHED); \ return; \ } \ - nmi_restart: \ - __get_cpu_var(nmi_state) = NMI_EXECUTING; \ - } while (0) + this_cpu_write(nmi_state, NMI_EXECUTING); \ + this_cpu_write(nmi_cr2, read_cr2()); \ + } while (0); \ + nmi_restart: #define nmi_nesting_postprocess() \ do { \ - if (cmpxchg(&__get_cpu_var(nmi_state), \ - NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ + write_cr2(this_cpu_read(nmi_cr2)); \ + if (this_cpu_dec_return(nmi_state)) \ goto nmi_restart; \ } while (0) #else /* x86_64 */ diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 149b8d9c6ad4..6d9582ec0324 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) static void __init init_nmi_testsuite(void) { /* trap all the unknown NMIs we may generate */ - register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); + register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk", + __initdata); } static void __init cleanup_nmi_testsuite(void) @@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask) { unsigned long timeout; - if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, - NMI_FLAG_FIRST, "nmi_selftest")) { + if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, + NMI_FLAG_FIRST, "nmi_selftest", __initdata)) { nmi_fail = FAILURE; return; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b72838bae64a..299d49302e7d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -22,6 +22,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "Calgary: " fmt + #include <linux/kernel.h> #include <linux/init.h> #include <linux/types.h> @@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev, offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, npages, 0, boundary_size, 0); if (offset == ~0UL) { - printk(KERN_WARNING "Calgary: IOMMU full.\n"); + pr_warn("IOMMU full\n"); spin_unlock_irqrestore(&tbl->it_lock, flags); if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); @@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, entry = iommu_range_alloc(dev, tbl, npages); if (unlikely(entry == DMA_ERROR_CODE)) { - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); + pr_warn("failed to allocate %u pages in iommu %p\n", + npages, tbl); return DMA_ERROR_CODE; } @@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl) i++; } while ((val & 0xff) != 0xff && i < 100); if (i == 100) - printk(KERN_WARNING "Calgary: PCI bus not quiesced, " - "continuing anyway\n"); + pr_warn("PCI bus not quiesced, continuing anyway\n"); /* invalidate TCE cache */ target = calgary_reg(bbar, tar_offset(tbl->it_busno)); @@ -604,8 +605,7 @@ begin: i++; } while ((val64 & 0xff) != 0xff && i < 100); if (i == 100) - printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " - "continuing anyway\n"); + pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n"); /* 3. poll Page Migration DEBUG for SoftStopFault */ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); @@ -617,8 +617,7 @@ begin: if (++count < 100) goto begin; else { - printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " - "aborting TCE cache flush sequence!\n"); + pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n"); return; /* pray for the best */ } } @@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl) plssr = be32_to_cpu(readl(target)); /* If no error, the agent ID in the CSR is not valid */ - printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " - "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); + pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n", + tbl->it_busno, csr, plssr); } static void calioc2_dump_error_regs(struct iommu_table *tbl) @@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl) target = calgary_reg(bbar, phboff | 0x800); mck = be32_to_cpu(readl(target)); - printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", - tbl->it_busno); + pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno); - printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", - csr, plssr, csmr, mck); + pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", + csr, plssr, csmr, mck); /* dump rest of error regs */ - printk(KERN_EMERG "Calgary: "); + pr_emerg(""); for (i = 0; i < ARRAY_SIZE(errregs); i++) { /* err regs are at 0x810 - 0x870 */ erroff = (0x810 + (i * 0x10)); target = calgary_reg(bbar, phboff | erroff); errregs[i] = be32_to_cpu(readl(target)); - printk("0x%08x@0x%lx ", errregs[i], erroff); + pr_cont("0x%08x@0x%lx ", errregs[i], erroff); } - printk("\n"); + pr_cont("\n"); /* root complex status */ target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 735279e54e59..ef6a8456f719 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/errno.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -145,16 +147,14 @@ void show_regs_common(void) /* Board Name is optional */ board = dmi_get_system_info(DMI_BOARD_NAME); - printk(KERN_CONT "\n"); - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk(KERN_CONT " %s %s", vendor, product); - if (board) - printk(KERN_CONT "/%s", board); - printk(KERN_CONT "\n"); + printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version, + vendor, product, + board ? "/" : "", + board ? board : ""); } void flush_thread(void) @@ -645,7 +645,7 @@ static void amd_e400_idle(void) amd_e400_c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); - printk(KERN_INFO "System has AMD C1E enabled\n"); + pr_info("System has AMD C1E enabled\n"); } } @@ -659,8 +659,7 @@ static void amd_e400_idle(void) */ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &cpu); - printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", - cpu); + pr_info("Switch to broadcast mode on CPU%d\n", cpu); } clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); @@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); + pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); } #endif if (pm_idle) @@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) /* * One CPU supports mwait => All CPUs supports mwait */ - printk(KERN_INFO "using mwait in idle threads.\n"); + pr_info("using mwait in idle threads\n"); pm_idle = mwait_idle; } else if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ - printk(KERN_INFO "using AMD E400 aware idle routine\n"); + pr_info("using AMD E400 aware idle routine\n"); pm_idle = amd_e400_idle; } else pm_idle = default_idle; @@ -715,7 +713,7 @@ static int __init idle_setup(char *str) return -EINVAL; if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); + pr_info("using polling idle threads\n"); pm_idle = poll_idle; boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3e215ba68766..0a980c9d7cb8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); + pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", + dead_task->comm, + dead_task->mm->context.ldt, + dead_task->mm->context.size); BUG(); } } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 5de92f1abd76..52190a938b4a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/reboot.h> #include <linux/init.h> @@ -20,14 +22,12 @@ #include <asm/virtext.h> #include <asm/cpu.h> #include <asm/nmi.h> +#include <asm/smp.h> -#ifdef CONFIG_X86_32 -# include <linux/ctype.h> -# include <linux/mc146818rtc.h> -# include <asm/realmode.h> -#else -# include <asm/x86_init.h> -#endif +#include <linux/ctype.h> +#include <linux/mc146818rtc.h> +#include <asm/realmode.h> +#include <asm/x86_init.h> /* * Power off function, if any @@ -49,7 +49,7 @@ int reboot_force; */ static int reboot_default = 1; -#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP static int reboot_cpu = -1; #endif @@ -67,8 +67,8 @@ bool port_cf9_safe = false; * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] * warm Don't set the cold reboot flag * cold Set the cold reboot flag - * bios Reboot by jumping through the BIOS (only for X86_32) - * smp Reboot by executing reset on BSP or other CPU (only for X86_32) + * bios Reboot by jumping through the BIOS + * smp Reboot by executing reset on BSP or other CPU * triple Force a triple fault (init) * kbd Use the keyboard controller. cold reset (default) * acpi Use the RESET_REG in the FADT @@ -95,7 +95,6 @@ static int __init reboot_setup(char *str) reboot_mode = 0; break; -#ifdef CONFIG_X86_32 #ifdef CONFIG_SMP case 's': if (isdigit(*(str+1))) { @@ -112,7 +111,6 @@ static int __init reboot_setup(char *str) #endif /* CONFIG_SMP */ case 'b': -#endif case 'a': case 'k': case 't': @@ -138,7 +136,6 @@ static int __init reboot_setup(char *str) __setup("reboot=", reboot_setup); -#ifdef CONFIG_X86_32 /* * Reboot options and system auto-detection code provided by * Dell Inc. so their systems "just work". :-) @@ -152,16 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) { if (reboot_type != BOOT_BIOS) { reboot_type = BOOT_BIOS; - printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); + pr_info("%s series board detected. Selecting %s-method for reboots.\n", + "BIOS", d->ident); } return 0; } -void machine_real_restart(unsigned int type) +void __noreturn machine_real_restart(unsigned int type) { - void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) - real_mode_header->machine_real_restart_asm; - local_irq_disable(); /* @@ -181,25 +176,28 @@ void machine_real_restart(unsigned int type) /* * Switch back to the initial page table. */ +#ifdef CONFIG_X86_32 load_cr3(initial_page_table); - - /* - * Write 0x1234 to absolute memory location 0x472. The BIOS reads - * this on booting to tell it to "Bypass memory test (also warm - * boot)". This seems like a fairly standard thing that gets set by - * REBOOT.COM programs, and the previous reset routine did this - * too. */ - *((unsigned short *)0x472) = reboot_mode; +#else + write_cr3(real_mode_header->trampoline_pgd); +#endif /* Jump to the identity-mapped low memory code */ - restart_lowmem(type); +#ifdef CONFIG_X86_32 + asm volatile("jmpl *%0" : : + "rm" (real_mode_header->machine_real_restart_asm), + "a" (type)); +#else + asm volatile("ljmpl *%0" : : + "m" (real_mode_header->machine_real_restart_asm), + "D" (type)); +#endif + unreachable(); } #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(machine_real_restart); #endif -#endif /* CONFIG_X86_32 */ - /* * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot */ @@ -207,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d) { if (reboot_type != BOOT_CF9) { reboot_type = BOOT_CF9; - printk(KERN_INFO "%s series board detected. " - "Selecting PCI-method for reboots.\n", d->ident); + pr_info("%s series board detected. Selecting %s-method for reboots.\n", + "PCI", d->ident); } return 0; } @@ -217,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) { if (reboot_type != BOOT_KBD) { reboot_type = BOOT_KBD; - printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); + pr_info("%s series board detected. Selecting %s-method for reboot.\n", + "KBD", d->ident); } return 0; } /* - * This is a single dmi_table handling all reboot quirks. Note that - * REBOOT_BIOS is only available for 32bit + * This is a single dmi_table handling all reboot quirks. */ static struct dmi_system_id __initdata reboot_dmi_table[] = { -#ifdef CONFIG_X86_32 { /* Handle problems with rebooting on Dell E520's */ .callback = set_bios_reboot, .ident = "Dell E520", @@ -377,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, -#endif /* CONFIG_X86_32 */ { /* Handle reboot issue on Acer Aspire one */ .callback = set_kbd_reboot, @@ -584,13 +580,11 @@ static void native_machine_emergency_restart(void) reboot_type = BOOT_KBD; break; -#ifdef CONFIG_X86_32 case BOOT_BIOS: machine_real_restart(MRR_BIOS); reboot_type = BOOT_KBD; break; -#endif case BOOT_ACPI: acpi_reboot(); @@ -632,12 +626,10 @@ void native_machine_shutdown(void) /* The boot cpu is always logical cpu 0 */ int reboot_cpu_id = 0; -#ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; -#endif /* Make certain the cpu I'm about to reboot on is online */ if (!cpu_online(reboot_cpu_id)) @@ -678,7 +670,7 @@ static void __machine_emergency_restart(int emergency) static void native_machine_restart(char *__unused) { - printk("machine restart\n"); + pr_notice("machine restart\n"); if (!reboot_force) machine_shutdown(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16be6dc14db1..f4b9b80e1b95 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1031,8 +1031,6 @@ void __init setup_arch(char **cmdline_p) x86_init.timers.wallclock_init(); - x86_platform.wallclock_init(); - mcheck_init(); arch_init_ideal_nops(); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 21af737053aa..b280908a376e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,6 +6,9 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> @@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) me->comm, me->pid, where, frame, regs->ip, regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } force_sig(SIGSEGV, me); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7bd8a0823654..c1a310fb8309 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1,4 +1,4 @@ -/* + /* * x86 SMP booting functions * * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> @@ -39,6 +39,8 @@ * Glauber Costa : i386 and x86_64 integration */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/smp.h> #include <linux/module.h> @@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void) * boards) */ - pr_debug("CALLIN, before setup_local_APIC().\n"); + pr_debug("CALLIN, before setup_local_APIC()\n"); if (apic->smp_callin_clear_local_apic) apic->smp_callin_clear_local_apic(); setup_local_APIC(); @@ -255,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused) check_tsc_sync_target(); /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - * * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding * this lock ensures we don't half assign or remove an irq from a cpu. */ - ipi_call_lock(); lock_vector_lock(); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); - ipi_call_unlock(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; x86_platform.nmi_init(); @@ -432,17 +425,16 @@ static void impress_friends(void) /* * Allow the user to impress friends. */ - pr_debug("Before bogomips.\n"); + pr_debug("Before bogomips\n"); for_each_possible_cpu(cpu) if (cpumask_test_cpu(cpu, cpu_callout_mask)) bogosum += cpu_data(cpu).loops_per_jiffy; - printk(KERN_INFO - "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); - pr_debug("Before bogocount - setting activated=1.\n"); + pr_debug("Before bogocount - setting activated=1\n"); } void __inquire_remote_apic(int apicid) @@ -452,18 +444,17 @@ void __inquire_remote_apic(int apicid) int timeout; u32 status; - printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); + pr_info("Inquiring remote APIC 0x%x...\n", apicid); for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); + pr_info("... APIC 0x%x %s: ", apicid, names[i]); /* * Wait for idle. */ status = safe_apic_wait_icr_idle(); if (status) - printk(KERN_CONT - "a previous APIC delivery may have failed\n"); + pr_cont("a previous APIC delivery may have failed\n"); apic_icr_write(APIC_DM_REMRD | regs[i], apicid); @@ -476,10 +467,10 @@ void __inquire_remote_apic(int apicid) switch (status) { case APIC_ICR_RR_VALID: status = apic_read(APIC_RRR); - printk(KERN_CONT "%08x\n", status); + pr_cont("%08x\n", status); break; default: - printk(KERN_CONT "failed\n"); + pr_cont("failed\n"); } } } @@ -513,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); } - pr_debug("NMI sent.\n"); + pr_debug("NMI sent\n"); if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); + pr_err("APIC never delivered???\n"); if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); + pr_err("APIC delivery error (%lx)\n", accept_status); return (send_status | accept_status); } @@ -540,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) apic_read(APIC_ESR); } - pr_debug("Asserting INIT.\n"); + pr_debug("Asserting INIT\n"); /* * Turn INIT on target chip @@ -556,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) mdelay(10); - pr_debug("Deasserting INIT.\n"); + pr_debug("Deasserting INIT\n"); /* Target chip */ /* Send IPI */ @@ -589,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* * Run STARTUP IPI loop. */ - pr_debug("#startup loops: %d.\n", num_starts); + pr_debug("#startup loops: %d\n", num_starts); for (j = 1; j <= num_starts; j++) { - pr_debug("Sending STARTUP #%d.\n", j); + pr_debug("Sending STARTUP #%d\n", j); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - pr_debug("After apic_write.\n"); + pr_debug("After apic_write\n"); /* * STARTUP IPI @@ -613,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) */ udelay(300); - pr_debug("Startup point 1.\n"); + pr_debug("Startup point 1\n"); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -628,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) if (send_status || accept_status) break; } - pr_debug("After Startup.\n"); + pr_debug("After Startup\n"); if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); + pr_err("APIC never delivered???\n"); if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); + pr_err("APIC delivery error (%lx)\n", accept_status); return (send_status | accept_status); } @@ -647,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid) if (system_state == SYSTEM_BOOTING) { if (node != current_node) { if (current_node > (-1)) - pr_cont(" Ok.\n"); + pr_cont(" OK\n"); current_node = node; pr_info("Booting Node %3d, Processors ", node); } - pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); + pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); return; } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", @@ -731,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) /* * allow APs to start initializing. */ - pr_debug("Before Callout %d.\n", cpu); + pr_debug("Before Callout %d\n", cpu); cpumask_set_cpu(cpu, cpu_callout_mask); - pr_debug("After Callout %d.\n", cpu); + pr_debug("After Callout %d\n", cpu); /* * Wait 5s total for a response @@ -761,7 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) pr_err("CPU%d: Stuck ??\n", cpu); else /* trampoline code not run */ - pr_err("CPU%d: Not responding.\n", cpu); + pr_err("CPU%d: Not responding\n", cpu); if (apic->inquire_remote_apic) apic->inquire_remote_apic(apicid); } @@ -806,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || !physid_isset(apicid, phys_cpu_present_map) || !apic->apic_id_valid(apicid)) { - printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); + pr_err("%s: bad cpu %d\n", __func__, cpu); return -EINVAL; } @@ -887,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus) unsigned int cpu; unsigned nr; - printk(KERN_WARNING - "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_BIGSMP.\n"); + pr_warn("More than 8 CPUs detected - skipping them\n" + "Use CONFIG_X86_BIGSMP\n"); nr = 0; for_each_present_cpu(cpu) { @@ -910,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus) #endif if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { - printk(KERN_WARNING - "weird, boot CPU (#%d) not listed by the BIOS.\n", + pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n", hard_smp_processor_id()); physid_set(hard_smp_processor_id(), phys_cpu_present_map); @@ -923,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (!smp_found_config && !acpi_lapic) { preempt_enable(); - printk(KERN_NOTICE "SMP motherboard not detected.\n"); + pr_notice("SMP motherboard not detected\n"); disable_smp(); if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); + pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); return -1; } @@ -936,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus) * CPU too, but we do it for the sake of robustness anyway. */ if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk(KERN_NOTICE - "weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); + pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n", + boot_cpu_physical_apicid); physid_set(hard_smp_processor_id(), phys_cpu_present_map); } preempt_enable(); @@ -951,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus) if (!disable_apic) { pr_err("BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); - pr_err("... forcing use of dummy APIC emulation." - "(tell your hw vendor)\n"); + pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); } smpboot_clear_io_apic(); disable_ioapic_support(); @@ -965,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * If SMP should be disabled, then really disable it! */ if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated.\n"); + pr_info("SMP mode deactivated\n"); smpboot_clear_io_apic(); connect_bsp_APIC(); @@ -1017,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (smp_sanity_check(max_cpus) < 0) { - printk(KERN_INFO "SMP disabled\n"); + pr_info("SMP disabled\n"); disable_smp(); goto out; } @@ -1055,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Set up local APIC timer on boot CPU. */ - printk(KERN_INFO "CPU%d: ", 0); + pr_info("CPU%d: ", 0); print_cpu_info(&cpu_data(0)); x86_init.timers.setup_percpu_clockev(); @@ -1105,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void) void __init native_smp_cpus_done(unsigned int max_cpus) { - pr_debug("Boot done.\n"); + pr_debug("Boot done\n"); nmi_selftest(); impress_friends(); @@ -1166,8 +1152,7 @@ __init void prefill_possible_map(void) /* nr_cpu_ids could be reduced via nr_cpus= */ if (possible > nr_cpu_ids) { - printk(KERN_WARNING - "%d Processors exceeds NR_CPUS limit of %d\n", + pr_warn("%d Processors exceeds NR_CPUS limit of %d\n", possible, nr_cpu_ids); possible = nr_cpu_ids; } @@ -1176,13 +1161,12 @@ __init void prefill_possible_map(void) if (!setup_max_cpus) #endif if (possible > i) { - printk(KERN_WARNING - "%d Processors exceeds max_cpus limit of %u\n", + pr_warn("%d Processors exceeds max_cpus limit of %u\n", possible, setup_max_cpus); possible = i; } - printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + pr_info("Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); for (i = 0; i < possible; i++) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05b31d92f69c..b481341c9369 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -9,6 +9,9 @@ /* * Handle hardware traps and faults. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/interrupt.h> #include <linux/kallsyms.h> #include <linux/spinlock.h> @@ -143,12 +146,11 @@ trap_signal: #ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, - regs->ip, regs->sp, error_code); + pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", + tsk->comm, tsk->pid, str, + regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); - printk("\n"); + pr_cont("\n"); } #endif @@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code) if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx", + pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", tsk->comm, task_pid_nr(tsk), regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); - printk("\n"); + pr_cont("\n"); } force_sig(SIGSEGV, tsk); @@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) conditional_sti(regs); #if 0 /* No need to warn about this any longer. */ - printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); + pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); #endif } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fc0a147e3727..cfa5d4f7ca56 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/sched.h> #include <linux/init.h> @@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); #ifdef CONFIG_X86_TSC int __init notsc_setup(char *str) { - printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC completely.\n"); + pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); tsc_disabled = 1; return 1; } @@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void) goto success; } } - printk("Fast TSC calibration failed\n"); + pr_err("Fast TSC calibration failed\n"); return 0; success: @@ -392,7 +393,7 @@ success: */ delta *= PIT_TICK_RATE; do_div(delta, i*256*1000); - printk("Fast TSC calibration using PIT\n"); + pr_info("Fast TSC calibration using PIT\n"); return delta; } @@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void) * use the reference value, as it is more precise. */ if (delta >= 90 && delta <= 110) { - printk(KERN_INFO - "TSC: PIT calibration matches %s. %d loops\n", - hpet ? "HPET" : "PMTIMER", i + 1); + pr_info("PIT calibration matches %s. %d loops\n", + hpet ? "HPET" : "PMTIMER", i + 1); return tsc_ref_min; } @@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void) */ if (tsc_pit_min == ULONG_MAX) { /* PIT gave no useful value */ - printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); + pr_warn("Unable to calibrate against PIT\n"); /* We don't have an alternative source, disable TSC */ if (!hpet && !ref1 && !ref2) { - printk("TSC: No reference (HPET/PMTIMER) available\n"); + pr_notice("No reference (HPET/PMTIMER) available\n"); return 0; } /* The alternative source failed as well, disable TSC */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed.\n"); + pr_warn("HPET/PMTIMER calibration failed\n"); return 0; } /* Use the alternative source */ - printk(KERN_INFO "TSC: using %s reference calibration\n", - hpet ? "HPET" : "PMTIMER"); + pr_info("using %s reference calibration\n", + hpet ? "HPET" : "PMTIMER"); return tsc_ref_min; } /* We don't have an alternative source, use the PIT calibration value */ if (!hpet && !ref1 && !ref2) { - printk(KERN_INFO "TSC: Using PIT calibration value\n"); + pr_info("Using PIT calibration value\n"); return tsc_pit_min; } /* The alternative source failed, use the PIT calibration value */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " - "Using PIT calibration\n"); + pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); return tsc_pit_min; } @@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void) * the PIT value as we know that there are PMTIMERs around * running at double speed. At least we let the user know: */ - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - printk(KERN_INFO "TSC: Using PIT calibration value\n"); + pr_warn("PIT calibration deviates from %s: %lu %lu\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); + pr_info("Using PIT calibration value\n"); return tsc_pit_min; } @@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason) tsc_unstable = 1; sched_clock_stable = 0; disable_sched_clock_irqtime(); - printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); + pr_info("Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) clocksource_mark_unstable(&clocksource_tsc); @@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work) goto out; tsc_khz = freq; - printk(KERN_INFO "Refined TSC clocksource calibration: " - "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, - (unsigned long)tsc_khz % 1000); + pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", + (unsigned long)tsc_khz / 1000, + (unsigned long)tsc_khz % 1000); out: clocksource_register_khz(&clocksource_tsc, tsc_khz); @@ -970,9 +968,9 @@ void __init tsc_init(void) return; } - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); + pr_info("Detected %lu.%03lu MHz processor\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); /* * Secondary CPUs do not run through tsc_init(), so set up diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 255f58ae71e8..54abcc0baf23 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -28,6 +28,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/capability.h> #include <linux/errno.h> #include <linux/interrupt.h> @@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) local_irq_enable(); if (!current->thread.vm86_info) { - printk("no vm86_info: BAD\n"); + pr_alert("no vm86_info: BAD\n"); do_exit(SIGSEGV); } set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); if (tmp) { - printk("vm86: could not access userspace vm86_info\n"); + pr_alert("could not access userspace vm86_info\n"); do_exit(SIGSEGV); } diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 8eeb55a551b4..992f890283e9 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -16,6 +16,7 @@ #include <linux/pci_ids.h> #include <linux/pci_regs.h> #include <linux/smp.h> +#include <linux/irq.h> #include <asm/apic.h> #include <asm/pci-direct.h> @@ -95,6 +96,18 @@ static void __init set_vsmp_pv_ops(void) ctl = readl(address + 4); printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", cap, ctl); + + /* If possible, let the vSMP foundation route the interrupt optimally */ +#ifdef CONFIG_SMP + if (cap & ctl & BIT(8)) { + ctl &= ~BIT(8); +#ifdef CONFIG_PROC_FS + /* Don't let users change irq affinity via procfs */ + no_irq_affinity = 1; +#endif + } +#endif + if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); @@ -102,12 +115,11 @@ static void __init set_vsmp_pv_ops(void) pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); pv_init_ops.patch = vsmp_patch; - ctl &= ~(1 << 4); - writel(ctl, address + 4); - ctl = readl(address + 4); - printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); } + writel(ctl, address + 4); + ctl = readl(address + 4); + pr_info("vSMP CTL: control set to:0x%08x\n", ctl); early_iounmap(address, 8); } @@ -187,12 +199,36 @@ static void __init vsmp_cap_cpus(void) #endif } +static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + +/* + * In vSMP, all cpus should be capable of handling interrupts, regardless of + * the APIC used. + */ +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + cpumask_setall(retmask); +} + +static void vsmp_apic_post_init(void) +{ + /* need to update phys_pkg_id */ + apic->phys_pkg_id = apicid_phys_pkg_id; + apic->vector_allocation_domain = fill_vector_allocation_domain; +} + void __init vsmp_init(void) { detect_vsmp_box(); if (!is_vsmp_box()) return; + x86_platform.apic_post_init = vsmp_apic_post_init; + vsmp_cap_cpus(); set_vsmp_pv_ops(); diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 5db36caf4289..8d141b309046 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -18,6 +18,8 @@ * use the vDSO. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/time.h> #include <linux/init.h> #include <linux/kernel.h> @@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, const char *message) { - static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - struct task_struct *tsk; - - if (!show_unhandled_signals || !__ratelimit(&rs)) + if (!show_unhandled_signals) return; - tsk = current; - - printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", - level, tsk->comm, task_pid_nr(tsk), - message, regs->ip, regs->cs, - regs->sp, regs->ax, regs->si, regs->di); + pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + level, current->comm, task_pid_nr(current), + message, regs->ip, regs->cs, + regs->sp, regs->ax, regs->si, regs->di); } static int addr_to_vsyscall_nr(unsigned long addr) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 9796c2f3d074..6020f6f5927c 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic_string); EXPORT_SYMBOL(copy_user_generic_unrolled); +EXPORT_SYMBOL(copy_user_enhanced_fast_string); EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(_copy_to_user); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 35c5e543f550..9f3167e891ef 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } -void wallclock_init_noop(void) { } /* * The platform setup functions are preset with the default functions @@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, - .wallclock_init = wallclock_init_noop, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, .iommu_shutdown = iommu_shutdown_noop, diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bd18149b2b0f..3d3e20709119 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -3,6 +3,9 @@ * * Author: Suresh Siddha <suresh.b.siddha@intel.com> */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/bootmem.h> #include <linux/compat.h> #include <asm/i387.h> @@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf) BUG_ON(sig_xstate_size < xstate_size); if ((unsigned long)buf % 64) - printk("save_i387_xstate: bad fpstate %p\n", buf); + pr_err("%s: bad fpstate %p\n", __func__, buf); if (!used_math()) return 0; @@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void) pcntxt_mask = eax + ((u64)edx << 32); if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", + pr_err("FP/SSE not shown under xsave features 0x%llx\n", pcntxt_mask); BUG(); } @@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void) setup_xstate_init(); - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " - "cntxt size 0x%x\n", - pcntxt_mask, xstate_size); + pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", + pcntxt_mask, xstate_size); } /* diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bc4e9d84157f..e0e6990723e9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -385,7 +385,7 @@ void free_initmem(void) } #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end) { /* * end could be not aligned, and We can not align that, diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 59880afa851f..71b5d5a07d7b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1,7 +1,7 @@ /* * SGI UltraViolet TLB flush routines. * - * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. + * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI. * * This code is released under the GNU General Public License version 2 or * later. @@ -38,8 +38,7 @@ static int timeout_base_ns[] = { static int timeout_us; static int nobau; -static int baudisabled; -static spinlock_t disable_lock; +static int nobau_perm; static cycles_t congested_cycles; /* tunables: */ @@ -47,12 +46,13 @@ static int max_concurr = MAX_BAU_CONCURRENT; static int max_concurr_const = MAX_BAU_CONCURRENT; static int plugged_delay = PLUGGED_DELAY; static int plugsb4reset = PLUGSB4RESET; +static int giveup_limit = GIVEUP_LIMIT; static int timeoutsb4reset = TIMEOUTSB4RESET; static int ipi_reset_limit = IPI_RESET_LIMIT; static int complete_threshold = COMPLETE_THRESHOLD; static int congested_respns_us = CONGESTED_RESPONSE_US; static int congested_reps = CONGESTED_REPS; -static int congested_period = CONGESTED_PERIOD; +static int disabled_period = DISABLED_PERIOD; static struct tunables tunables[] = { {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ @@ -63,7 +63,8 @@ static struct tunables tunables[] = { {&complete_threshold, COMPLETE_THRESHOLD}, {&congested_respns_us, CONGESTED_RESPONSE_US}, {&congested_reps, CONGESTED_REPS}, - {&congested_period, CONGESTED_PERIOD} + {&disabled_period, DISABLED_PERIOD}, + {&giveup_limit, GIVEUP_LIMIT} }; static struct dentry *tunables_dir; @@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats); static DEFINE_PER_CPU(struct bau_control, bau_control); static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); +static void +set_bau_on(void) +{ + int cpu; + struct bau_control *bcp; + + if (nobau_perm) { + pr_info("BAU not initialized; cannot be turned on\n"); + return; + } + nobau = 0; + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->nobau = 0; + } + pr_info("BAU turned on\n"); + return; +} + +static void +set_bau_off(void) +{ + int cpu; + struct bau_control *bcp; + + nobau = 1; + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->nobau = 1; + } + pr_info("BAU turned off\n"); + return; +} + /* * Determine the first node on a uvhub. 'Nodes' are used for kernel * memory allocation. @@ -278,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, * Both sockets dump their completed count total into * the message's count. */ - smaster->socket_acknowledge_count[mdp->msg_slot] = 0; + *sp = 0; asp = (struct atomic_short *)&msg->acknowledge_count; msg_ack_count = atom_asr(socket_ack_count, asp); @@ -491,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, } /* - * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. + * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register. + * But not currently used. */ static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) { unsigned long descriptor_status; - unsigned long descriptor_status2; - descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); - descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; - descriptor_status = (descriptor_status << 1) | descriptor_status2; + descriptor_status = + ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; return descriptor_status; } @@ -531,87 +565,11 @@ int normal_busy(struct bau_control *bcp) */ int handle_uv2_busy(struct bau_control *bcp) { - int busy_one = bcp->using_desc; - int normal = bcp->uvhub_cpu; - int selected = -1; - int i; - unsigned long descriptor_status; - unsigned long status; - int mmr_offset; - struct bau_desc *bau_desc_old; - struct bau_desc *bau_desc_new; - struct bau_control *hmaster = bcp->uvhub_master; struct ptc_stats *stat = bcp->statp; - cycles_t ttm; stat->s_uv2_wars++; - spin_lock(&hmaster->uvhub_lock); - /* try for the original first */ - if (busy_one != normal) { - if (!normal_busy(bcp)) - selected = normal; - } - if (selected < 0) { - /* can't use the normal, select an alternate */ - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; - descriptor_status = read_lmmr(mmr_offset); - - /* scan available descriptors 32-63 */ - for (i = 0; i < UV_CPUS_PER_AS; i++) { - if ((hmaster->inuse_map & (1 << i)) == 0) { - status = ((descriptor_status >> - (i * UV_ACT_STATUS_SIZE)) & - UV_ACT_STATUS_MASK) << 1; - if (status != UV2H_DESC_BUSY) { - selected = i + UV_CPUS_PER_AS; - break; - } - } - } - } - - if (busy_one != normal) - /* mark the busy alternate as not in-use */ - hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS)); - - if (selected >= 0) { - /* switch to the selected descriptor */ - if (selected != normal) { - /* set the selected alternate as in-use */ - hmaster->inuse_map |= - (1 << (selected - UV_CPUS_PER_AS)); - if (selected > stat->s_uv2_wars_hw) - stat->s_uv2_wars_hw = selected; - } - bau_desc_old = bcp->descriptor_base; - bau_desc_old += (ITEMS_PER_DESC * busy_one); - bcp->using_desc = selected; - bau_desc_new = bcp->descriptor_base; - bau_desc_new += (ITEMS_PER_DESC * selected); - *bau_desc_new = *bau_desc_old; - } else { - /* - * All are busy. Wait for the normal one for this cpu to - * free up. - */ - stat->s_uv2_war_waits++; - spin_unlock(&hmaster->uvhub_lock); - ttm = get_cycles(); - do { - cpu_relax(); - } while (normal_busy(bcp)); - spin_lock(&hmaster->uvhub_lock); - /* switch to the original descriptor */ - bcp->using_desc = normal; - bau_desc_old = bcp->descriptor_base; - bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc); - bcp->using_desc = (ITEMS_PER_DESC * normal); - bau_desc_new = bcp->descriptor_base; - bau_desc_new += (ITEMS_PER_DESC * normal); - *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */ - } - spin_unlock(&hmaster->uvhub_lock); - return FLUSH_RETRY_BUSYBUG; + bcp->busy = 1; + return FLUSH_GIVEUP; } static int uv2_wait_completion(struct bau_desc *bau_desc, @@ -620,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, { unsigned long descriptor_stat; cycles_t ttm; - int desc = bcp->using_desc; + int desc = bcp->uvhub_cpu; long busy_reps = 0; struct ptc_stats *stat = bcp->statp; @@ -628,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, /* spin on the status MMR, waiting for it to go idle */ while (descriptor_stat != UV2H_DESC_IDLE) { - /* - * Our software ack messages may be blocked because - * there are no swack resources available. As long - * as none of them has timed out hardware will NACK - * our message and its state will stay IDLE. - */ - if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || - (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { + if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { + /* + * A h/w bug on the destination side may + * have prevented the message being marked + * pending, thus it doesn't get replied to + * and gets continually nacked until it times + * out with a SOURCE_TIMEOUT. + */ stat->s_stimeout++; return FLUSH_GIVEUP; - } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) { - stat->s_strongnacks++; - bcp->conseccompletes = 0; - return FLUSH_GIVEUP; } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { + ttm = get_cycles(); + + /* + * Our retries may be blocked by all destination + * swack resources being consumed, and a timeout + * pending. In that case hardware returns the + * ERROR that looks like a destination timeout. + * Without using the extended status we have to + * deduce from the short time that this was a + * strong nack. + */ + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { + bcp->conseccompletes = 0; + stat->s_plugged++; + /* FLUSH_RETRY_PLUGGED causes hang on boot */ + return FLUSH_GIVEUP; + } stat->s_dtimeout++; bcp->conseccompletes = 0; - return FLUSH_RETRY_TIMEOUT; + /* FLUSH_RETRY_TIMEOUT causes hang on boot */ + return FLUSH_GIVEUP; } else { busy_reps++; if (busy_reps > 1000000) { @@ -653,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, busy_reps = 0; ttm = get_cycles(); if ((ttm - bcp->send_message) > - (bcp->clocks_per_100_usec)) { + bcp->timeout_interval) return handle_uv2_busy(bcp); - } } /* * descriptor_stat is still BUSY @@ -679,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc, { int right_shift; unsigned long mmr_offset; - int desc = bcp->using_desc; + int desc = bcp->uvhub_cpu; if (desc < UV_CPUS_PER_AS) { mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; @@ -758,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc, } /* - * Completions are taking a very long time due to a congested numalink - * network. + * Stop all cpus on a uvhub from using the BAU for a period of time. + * This is reversed by check_enable. */ -static void disable_for_congestion(struct bau_control *bcp, - struct ptc_stats *stat) +static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) { - /* let only one cpu do this disabling */ - spin_lock(&disable_lock); - - if (!baudisabled && bcp->period_requests && - ((bcp->period_time / bcp->period_requests) > congested_cycles)) { - int tcpu; - struct bau_control *tbcp; - /* it becomes this cpu's job to turn on the use of the - BAU again */ - baudisabled = 1; - bcp->set_bau_off = 1; - bcp->set_bau_on_time = get_cycles(); - bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); + int tcpu; + struct bau_control *tbcp; + struct bau_control *hmaster; + cycles_t tm1; + + hmaster = bcp->uvhub_master; + spin_lock(&hmaster->disable_lock); + if (!bcp->baudisabled) { stat->s_bau_disabled++; + tm1 = get_cycles(); for_each_present_cpu(tcpu) { tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 1; + if (tbcp->uvhub_master == hmaster) { + tbcp->baudisabled = 1; + tbcp->set_bau_on_time = + tm1 + bcp->disabled_period; + } } } - - spin_unlock(&disable_lock); + spin_unlock(&hmaster->disable_lock); } static void count_max_concurr(int stat, struct bau_control *bcp, @@ -815,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2, bcp->period_requests++; bcp->period_time += elapsed; if ((elapsed > congested_cycles) && - (bcp->period_requests > bcp->cong_reps)) - disable_for_congestion(bcp, stat); + (bcp->period_requests > bcp->cong_reps) && + ((bcp->period_time / bcp->period_requests) > + congested_cycles)) { + stat->s_congested++; + disable_for_period(bcp, stat); + } } } else stat->s_requestor--; if (completion_status == FLUSH_COMPLETE && try > 1) stat->s_retriesok++; - else if (completion_status == FLUSH_GIVEUP) + else if (completion_status == FLUSH_GIVEUP) { stat->s_giveup++; + if (get_cycles() > bcp->period_end) + bcp->period_giveups = 0; + bcp->period_giveups++; + if (bcp->period_giveups == 1) + bcp->period_end = get_cycles() + bcp->disabled_period; + if (bcp->period_giveups > bcp->giveup_limit) { + disable_for_period(bcp, stat); + stat->s_giveuplimit++; + } + } } /* @@ -868,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, * Returns 1 if it gives up entirely and the original cpu mask is to be * returned to the kernel. */ -int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) +int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, + struct bau_desc *bau_desc) { int seq_number = 0; int completion_stat = 0; @@ -881,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) struct bau_control *hmaster = bcp->uvhub_master; struct uv1_bau_msg_header *uv1_hdr = NULL; struct uv2_bau_msg_header *uv2_hdr = NULL; - struct bau_desc *bau_desc; - if (bcp->uvhub_version == 1) + if (bcp->uvhub_version == 1) { + uv1 = 1; uv1_throttle(hmaster, stat); + } while (hmaster->uvhub_quiesce) cpu_relax(); time1 = get_cycles(); + if (uv1) + uv1_hdr = &bau_desc->header.uv1_hdr; + else + uv2_hdr = &bau_desc->header.uv2_hdr; + do { - bau_desc = bcp->descriptor_base; - bau_desc += (ITEMS_PER_DESC * bcp->using_desc); - if (bcp->uvhub_version == 1) { - uv1 = 1; - uv1_hdr = &bau_desc->header.uv1_hdr; - } else - uv2_hdr = &bau_desc->header.uv2_hdr; - if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) { + if (try == 0) { if (uv1) uv1_hdr->msg_type = MSG_REGULAR; else @@ -916,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) uv1_hdr->sequence = seq_number; else uv2_hdr->sequence = seq_number; - index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; + index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; bcp->send_message = get_cycles(); write_mmr_activation(index); try++; completion_stat = wait_completion(bau_desc, bcp, try); - /* UV2: wait_completion() may change the bcp->using_desc */ handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { bcp->ipi_attempts = 0; + stat->s_overipilimit++; completion_stat = FLUSH_GIVEUP; break; } cpu_relax(); } while ((completion_stat == FLUSH_RETRY_PLUGGED) || - (completion_stat == FLUSH_RETRY_BUSYBUG) || (completion_stat == FLUSH_RETRY_TIMEOUT)); time2 = get_cycles(); @@ -955,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) } /* - * The BAU is disabled. When the disabled time period has expired, the cpu - * that disabled it must re-enable it. - * Return 0 if it is re-enabled for all cpus. + * The BAU is disabled for this uvhub. When the disabled time period has + * expired re-enable it. + * Return 0 if it is re-enabled for all cpus on this uvhub. */ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) { int tcpu; struct bau_control *tbcp; + struct bau_control *hmaster; - if (bcp->set_bau_off) { - if (get_cycles() >= bcp->set_bau_on_time) { - stat->s_bau_reenabled++; - baudisabled = 0; - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); + hmaster = bcp->uvhub_master; + spin_lock(&hmaster->disable_lock); + if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { + stat->s_bau_reenabled++; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + if (tbcp->uvhub_master == hmaster) { tbcp->baudisabled = 0; tbcp->period_requests = 0; tbcp->period_time = 0; + tbcp->period_giveups = 0; } - return 0; } + spin_unlock(&hmaster->disable_lock); + return 0; } + spin_unlock(&hmaster->disable_lock); return -1; } @@ -1078,18 +1065,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct cpumask *flush_mask; struct ptc_stats *stat; struct bau_control *bcp; - - /* kernel was booted 'nobau' */ - if (nobau) - return cpumask; + unsigned long descriptor_status; + unsigned long status; bcp = &per_cpu(bau_control, cpu); stat = bcp->statp; + stat->s_enters++; + + if (bcp->nobau) + return cpumask; + + if (bcp->busy) { + descriptor_status = + read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); + status = ((descriptor_status >> (bcp->uvhub_cpu * + UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1; + if (status == UV2H_DESC_BUSY) + return cpumask; + bcp->busy = 0; + } /* bau was disabled due to slow response */ if (bcp->baudisabled) { - if (check_enable(bcp, stat)) + if (check_enable(bcp, stat)) { + stat->s_ipifordisabled++; return cpumask; + } } /* @@ -1105,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, stat->s_ntargself++; bau_desc = bcp->descriptor_base; - bau_desc += (ITEMS_PER_DESC * bcp->using_desc); + bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu); bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) return NULL; @@ -1118,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, * uv_flush_send_and_wait returns 0 if all cpu's were messaged, * or 1 if it gave up and the original cpumask should be returned. */ - if (!uv_flush_send_and_wait(flush_mask, bcp)) + if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc)) return NULL; else return cpumask; } /* - * Search the message queue for any 'other' message with the same software - * acknowledge resource bit vector. + * Search the message queue for any 'other' unprocessed message with the + * same software acknowledge resource bit vector as the 'msg' message. */ struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, - struct bau_control *bcp, unsigned char swack_vec) + struct bau_control *bcp) { struct bau_pq_entry *msg_next = msg + 1; + unsigned char swack_vec = msg->swack_vec; if (msg_next > bcp->queue_last) msg_next = bcp->queue_first; - while ((msg_next->swack_vec != 0) && (msg_next != msg)) { - if (msg_next->swack_vec == swack_vec) + while (msg_next != msg) { + if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) && + (msg_next->swack_vec == swack_vec)) return msg_next; msg_next++; if (msg_next > bcp->queue_last) @@ -1165,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) * This message was assigned a swack resource, but no * reserved acknowlegment is pending. * The bug has prevented this message from setting the MMR. - * And no other message has used the same sw_ack resource. - * Do the requested shootdown but do not reply to the msg. - * (the 0 means make no acknowledge) */ - bau_process_message(mdp, bcp, 0); - return; - } - - /* - * Some message has set the MMR 'pending' bit; it might have been - * another message. Look for that message. - */ - other_msg = find_another_by_swack(msg, bcp, msg->swack_vec); - if (other_msg) { - /* There is another. Do not ack the current one. */ - bau_process_message(mdp, bcp, 0); /* - * Let the natural processing of that message acknowledge - * it. Don't get the processing of sw_ack's out of order. + * Some message has set the MMR 'pending' bit; it might have + * been another message. Look for that message. */ - return; + other_msg = find_another_by_swack(msg, bcp); + if (other_msg) { + /* + * There is another. Process this one but do not + * ack it. + */ + bau_process_message(mdp, bcp, 0); + /* + * Let the natural processing of that other message + * acknowledge it. Don't get the processing of sw_ack's + * out of order. + */ + return; + } } /* - * There is no other message using this sw_ack, so it is safe to - * acknowledge it. + * Either the MMR shows this one pending a reply or there is no + * other message using this sw_ack, so it is safe to acknowledge it. */ bau_process_message(mdp, bcp, 1); @@ -1295,7 +1296,8 @@ static void __init enable_timeouts(void) */ mmr_image |= (1L << SOFTACK_MSHIFT); if (is_uv2_hub()) { - mmr_image |= (1L << UV2_EXT_SHFT); + /* hw bug workaround; do not use extended status */ + mmr_image &= ~(1L << UV2_EXT_SHFT); } write_mmr_misc_control(pnode, mmr_image); } @@ -1338,29 +1340,34 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec) static int ptc_seq_show(struct seq_file *file, void *data) { struct ptc_stats *stat; + struct bau_control *bcp; int cpu; cpu = *(loff_t *)data; if (!cpu) { seq_printf(file, - "# cpu sent stime self locals remotes ncpus localhub "); + "# cpu bauoff sent stime self locals remotes ncpus localhub "); seq_printf(file, "remotehub numuvhubs numuvhubs16 numuvhubs8 "); seq_printf(file, - "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); + "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries "); + seq_printf(file, + "rok resetp resett giveup sto bz throt disable "); seq_printf(file, - "resetp resett giveup sto bz throt swack recv rtime "); + "enable wars warshw warwaits enters ipidis plugged "); seq_printf(file, - "all one mult none retry canc nocan reset rcan "); + "ipiover glim cong swack recv rtime all one mult "); seq_printf(file, - "disable enable wars warshw warwaits\n"); + "none retry canc nocan reset rcan\n"); } if (cpu < num_possible_cpus() && cpu_online(cpu)) { - stat = &per_cpu(ptcstats, cpu); + bcp = &per_cpu(bau_control, cpu); + stat = bcp->statp; /* source side statistics */ seq_printf(file, - "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - cpu, stat->s_requestor, cycles_2_us(stat->s_time), + "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + cpu, bcp->nobau, stat->s_requestor, + cycles_2_us(stat->s_time), stat->s_ntargself, stat->s_ntarglocals, stat->s_ntargremotes, stat->s_ntargcpu, stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, @@ -1374,20 +1381,23 @@ static int ptc_seq_show(struct seq_file *file, void *data) stat->s_resets_plug, stat->s_resets_timeout, stat->s_giveup, stat->s_stimeout, stat->s_busy, stat->s_throttles); + seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + stat->s_bau_disabled, stat->s_bau_reenabled, + stat->s_uv2_wars, stat->s_uv2_wars_hw, + stat->s_uv2_war_waits, stat->s_enters, + stat->s_ipifordisabled, stat->s_plugged, + stat->s_overipilimit, stat->s_giveuplimit, + stat->s_congested); /* destination side statistics */ seq_printf(file, - "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), stat->d_requestee, cycles_2_us(stat->d_time), stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, stat->d_nomsg, stat->d_retries, stat->d_canceled, stat->d_nocanceled, stat->d_resets, stat->d_rcanceled); - seq_printf(file, "%ld %ld %ld %ld %ld\n", - stat->s_bau_disabled, stat->s_bau_reenabled, - stat->s_uv2_wars, stat->s_uv2_wars_hw, - stat->s_uv2_war_waits); } return 0; } @@ -1401,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, char *buf; int ret; - buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", - "max_concur plugged_delay plugsb4reset", - "timeoutsb4reset ipi_reset_limit complete_threshold", - "congested_response_us congested_reps congested_period", + buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n", + "max_concur plugged_delay plugsb4reset timeoutsb4reset", + "ipi_reset_limit complete_threshold congested_response_us", + "congested_reps disabled_period giveup_limit", max_concurr, plugged_delay, plugsb4reset, timeoutsb4reset, ipi_reset_limit, complete_threshold, - congested_respns_us, congested_reps, congested_period); + congested_respns_us, congested_reps, disabled_period, + giveup_limit); if (!buf) return -ENOMEM; @@ -1438,6 +1449,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, return -EFAULT; optstr[count - 1] = '\0'; + if (!strcmp(optstr, "on")) { + set_bau_on(); + return count; + } else if (!strcmp(optstr, "off")) { + set_bau_off(); + return count; + } + if (strict_strtol(optstr, 10, &input_arg) < 0) { printk(KERN_DEBUG "%s is invalid\n", optstr); return -EINVAL; @@ -1570,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, bcp->complete_threshold = complete_threshold; bcp->cong_response_us = congested_respns_us; bcp->cong_reps = congested_reps; - bcp->cong_period = congested_period; + bcp->disabled_period = sec_2_cycles(disabled_period); + bcp->giveup_limit = giveup_limit; } return count; } @@ -1699,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) * fairness chaining multilevel count replied_to */ } else { + /* + * BIOS uses legacy mode, but UV2 hardware always + * uses native mode for selective broadcasts. + */ uv2_hdr = &bd2->header.uv2_hdr; uv2_hdr->swack_flag = 1; uv2_hdr->base_dest_nasid = @@ -1811,8 +1835,8 @@ static int calculate_destination_timeout(void) index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; - base = timeout_base_ns[index]; - ts_ns = base * mult1 * mult2; + ts_ns = timeout_base_ns[index]; + ts_ns *= (mult1 * mult2); ret = ts_ns / 1000; } else { /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ @@ -1836,6 +1860,8 @@ static void __init init_per_cpu_tunables(void) for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); bcp->baudisabled = 0; + if (nobau) + bcp->nobau = 1; bcp->statp = &per_cpu(ptcstats, cpu); /* time interval to catch a hardware stay-busy bug */ bcp->timeout_interval = usec_2_cycles(2*timeout_us); @@ -1848,10 +1874,11 @@ static void __init init_per_cpu_tunables(void) bcp->complete_threshold = complete_threshold; bcp->cong_response_us = congested_respns_us; bcp->cong_reps = congested_reps; - bcp->cong_period = congested_period; - bcp->clocks_per_100_usec = usec_2_cycles(100); + bcp->disabled_period = sec_2_cycles(disabled_period); + bcp->giveup_limit = giveup_limit; spin_lock_init(&bcp->queue_lock); spin_lock_init(&bcp->uvhub_lock); + spin_lock_init(&bcp->disable_lock); } } @@ -1972,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, } bcp->uvhub_master = *hmasterp; bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; - bcp->using_desc = bcp->uvhub_cpu; if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { printk(KERN_EMERG "%d cpus per uvhub invalid\n", bcp->uvhub_cpu); @@ -2069,16 +2095,12 @@ static int __init uv_bau_init(void) if (!is_uv_system()) return 0; - if (nobau) - return 0; - for_each_possible_cpu(cur_cpu) { mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); } nuvhubs = uv_num_possible_blades(); - spin_lock_init(&disable_lock); congested_cycles = usec_2_cycles(congested_respns_us); uv_base_pnode = 0x7fffffff; @@ -2091,7 +2113,8 @@ static int __init uv_bau_init(void) enable_timeouts(); if (init_per_cpu(nuvhubs, uv_base_pnode)) { - nobau = 1; + set_bau_off(); + nobau_perm = 1; return 0; } diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index f25c2765a5c9..acf7752da952 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; int mmr_pnode, err; + unsigned int dest; BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; + err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); + if (err != 0) + return err; + if (limit == UV_AFFINITY_CPU) irq_set_status_flags(irq, IRQ_NO_BALANCING); else @@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + entry->dest = dest; mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -222,7 +227,7 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, if (cfg->move_in_progress) send_cleanup_vector(cfg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } /* diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 5b84a2d30888..b2d534cab25f 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -22,7 +22,7 @@ wakeup-objs += video-bios.o realmode-y += header.o realmode-y += trampoline_$(BITS).o realmode-y += stack.o -realmode-$(CONFIG_X86_32) += reboot_32.o +realmode-y += reboot.o realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs) targets += $(realmode-y) diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index fadf48378ada..a28221d94e69 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -6,6 +6,7 @@ #include <linux/linkage.h> #include <asm/page_types.h> +#include <asm/segment.h> #include "realmode.h" @@ -28,8 +29,9 @@ GLOBAL(real_mode_header) .long pa_wakeup_header #endif /* APM/BIOS reboot */ -#ifdef CONFIG_X86_32 .long pa_machine_real_restart_asm +#ifdef CONFIG_X86_64 + .long __KERNEL32_CS #endif END(real_mode_header) diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot.S index 114044876b3d..f932ea61d1c8 100644 --- a/arch/x86/realmode/rm/reboot_32.S +++ b/arch/x86/realmode/rm/reboot.S @@ -2,6 +2,8 @@ #include <linux/init.h> #include <asm/segment.h> #include <asm/page_types.h> +#include <asm/processor-flags.h> +#include <asm/msr-index.h> #include "realmode.h" /* @@ -12,13 +14,35 @@ * doesn't work with at least one type of 486 motherboard. It is easy * to stop this code working; hence the copious comments. * - * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. + * This code is called with the restart type (0 = BIOS, 1 = APM) in + * the primary argument register (%eax for 32 bit, %edi for 64 bit). */ .section ".text32", "ax" .code32 - - .balign 16 ENTRY(machine_real_restart_asm) + +#ifdef CONFIG_X86_64 + /* Switch to trampoline GDT as it is guaranteed < 4 GiB */ + movl $__KERNEL_DS, %eax + movl %eax, %ds + lgdtl pa_tr_gdt + + /* Disable paging to drop us out of long mode */ + movl %cr0, %eax + andl $~X86_CR0_PG, %eax + movl %eax, %cr0 + ljmpl $__KERNEL32_CS, $pa_machine_real_restart_paging_off + +GLOBAL(machine_real_restart_paging_off) + xorl %eax, %eax + xorl %edx, %edx + movl $MSR_EFER, %ecx + wrmsr + + movl %edi, %eax + +#endif /* CONFIG_X86_64 */ + /* Set up the IDT for real mode. */ lidtl pa_machine_real_restart_idt diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index afb250d22a6b..f58dca7a6e52 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void) notify_cpu_starting(cpu); - ipi_call_lock(); set_cpu_online(cpu, true); - ipi_call_unlock(); this_cpu_write(cpu_state, CPU_ONLINE); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index dcb8a6e48692..4b01ab3d2c24 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -24,6 +24,7 @@ #include <linux/wait.h> #include <linux/async.h> #include <linux/pm_runtime.h> +#include <scsi/scsi_scan.h> #include "base.h" #include "power/power.h" @@ -332,6 +333,7 @@ void wait_for_device_probe(void) /* wait for the known devices to complete their probing */ wait_event(probe_waitqueue, atomic_read(&probe_count) == 0); async_synchronize_full(); + scsi_complete_async_scans(); } EXPORT_SYMBOL_GPL(wait_for_device_probe); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 65665c9c42c6..8f428a8ab003 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -499,7 +499,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header, / sizeof (*ondisk)) return -EINVAL; header->snapc = kmalloc(sizeof(struct ceph_snap_context) + - snap_count * sizeof (*ondisk), + snap_count * sizeof(u64), gfp_flags); if (!header->snapc) return -ENOMEM; @@ -977,7 +977,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) op = (void *)(replyhead + 1); rc = le32_to_cpu(replyhead->result); bytes = le64_to_cpu(op->extent.length); - read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); + read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c index 8f05652d53e6..0fcec2aae19c 100644 --- a/drivers/clk/spear/spear1310_clock.c +++ b/drivers/clk/spear/spear1310_clock.c @@ -345,31 +345,30 @@ static struct frac_rate_tbl gen_rtbl[] = { /* clock parents */ static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", }; static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", }; -static const char *uart0_parents[] = { "pll5_clk", "uart_synth_gate_clk", }; -static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", }; -static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk", +static const char *uart0_parents[] = { "pll5_clk", "uart_syn_gclk", }; +static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", }; +static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk", "osc_25m_clk", }; -static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk", - "gmac_phy_synth_gate_clk", }; +static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", }; static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", }; -static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", }; +static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", }; static const char *i2s_src_parents[] = { "vco1div2_clk", "none", "pll3_clk", "i2s_src_pad_clk", }; -static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", }; +static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", }; static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk", "pll3_clk", }; static const char *gen_synth2_3_parents[] = { "vco1div4_clk", "vco3div2_clk", "pll2_clk", }; static const char *rmii_phy_parents[] = { "ras_tx50_clk", "none", - "ras_pll2_clk", "ras_synth0_clk", }; + "ras_pll2_clk", "ras_syn0_clk", }; static const char *smii_rgmii_phy_parents[] = { "none", "ras_tx125_clk", - "ras_pll2_clk", "ras_synth0_clk", }; -static const char *uart_parents[] = { "ras_apb_clk", "gen_synth3_clk", }; -static const char *i2c_parents[] = { "ras_apb_clk", "gen_synth1_clk", }; -static const char *ssp1_parents[] = { "ras_apb_clk", "gen_synth1_clk", + "ras_pll2_clk", "ras_syn0_clk", }; +static const char *uart_parents[] = { "ras_apb_clk", "gen_syn3_clk", }; +static const char *i2c_parents[] = { "ras_apb_clk", "gen_syn1_clk", }; +static const char *ssp1_parents[] = { "ras_apb_clk", "gen_syn1_clk", "ras_plclk0_clk", }; -static const char *pci_parents[] = { "ras_pll3_clk", "gen_synth2_clk", }; -static const char *tdm_parents[] = { "ras_pll3_clk", "gen_synth1_clk", }; +static const char *pci_parents[] = { "ras_pll3_clk", "gen_syn2_clk", }; +static const char *tdm_parents[] = { "ras_pll3_clk", "gen_syn1_clk", }; void __init spear1310_clk_init(void) { @@ -390,9 +389,9 @@ void __init spear1310_clk_init(void) 25000000); clk_register_clkdev(clk, "osc_25m_clk", NULL); - clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL, - CLK_IS_ROOT, 125000000); - clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL); + clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT, + 125000000); + clk_register_clkdev(clk, "gmii_pad_clk", NULL); clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, CLK_IS_ROOT, 12288000); @@ -406,34 +405,34 @@ void __init spear1310_clk_init(void) /* clock derived from 24 or 25 MHz osc clk */ /* vco-pll */ - clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco1_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG, SPEAR1310_PLL1_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco1_mux_clk", NULL); - clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk", + clk_register_clkdev(clk, "vco1_mclk", NULL); + clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk", 0, SPEAR1310_PLL1_CTR, SPEAR1310_PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco1_clk", NULL); clk_register_clkdev(clk1, "pll1_clk", NULL); - clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco2_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG, SPEAR1310_PLL2_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco2_mux_clk", NULL); - clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk", + clk_register_clkdev(clk, "vco2_mclk", NULL); + clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk", 0, SPEAR1310_PLL2_CTR, SPEAR1310_PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco2_clk", NULL); clk_register_clkdev(clk1, "pll2_clk", NULL); - clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco3_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG, SPEAR1310_PLL3_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco3_mux_clk", NULL); - clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk", + clk_register_clkdev(clk, "vco3_mclk", NULL); + clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk", 0, SPEAR1310_PLL3_CTR, SPEAR1310_PLL3_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco3_clk", NULL); @@ -473,7 +472,7 @@ void __init spear1310_clk_init(void) /* peripherals */ clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1, 128); - clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0, + clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_THSENS_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_thermal"); @@ -500,177 +499,176 @@ void __init spear1310_clk_init(void) clk_register_clkdev(clk, "apb_clk", NULL); /* gpt clocks */ - clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT0_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt0_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0, + clk_register_clkdev(clk, "gpt0_mclk", NULL); + clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT1_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk_register_clkdev(clk, "gpt1_mclk", NULL); + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT2_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk_register_clkdev(clk, "gpt2_mclk", NULL); + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); - clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT3_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt3_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, + clk_register_clkdev(clk, "gpt3_mclk", NULL); + clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt3"); /* others */ - clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1310_UART_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart_synth_clk", NULL); - clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); + clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "vco1div2_clk", + 0, SPEAR1310_UART_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "uart_syn_clk", NULL); + clk_register_clkdev(clk1, "uart_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, + clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents, ARRAY_SIZE(uart0_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_UART_CLK_SHIFT, SPEAR1310_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart0_mux_clk", NULL); + clk_register_clkdev(clk, "uart0_mclk", NULL); - clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0, + clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_UART_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "e0000000.serial"); - clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk", + clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk", "vco1div2_clk", 0, SPEAR1310_SDHCI_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "sdhci_synth_clk", NULL); - clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL); + clk_register_clkdev(clk, "sdhci_syn_clk", NULL); + clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL); - clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_SDHCI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b3000000.sdhci"); - clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1310_CFXD_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "cfxd_synth_clk", NULL); - clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL); + clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk", + 0, SPEAR1310_CFXD_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "cfxd_syn_clk", NULL); + clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL); - clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CFXD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b2800000.cf"); clk_register_clkdev(clk, NULL, "arasan_xd"); - clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1310_C3_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "c3_synth_clk", NULL); - clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL); + clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk", + 0, SPEAR1310_C3_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "c3_syn_clk", NULL); + clk_register_clkdev(clk1, "c3_syn_gclk", NULL); - clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents, + clk = clk_register_mux(NULL, "c3_mclk", c3_parents, ARRAY_SIZE(c3_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_C3_CLK_SHIFT, SPEAR1310_C3_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "c3_mux_clk", NULL); + clk_register_clkdev(clk, "c3_mclk", NULL); - clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0, + clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_C3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "c3"); /* gmac */ - clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk", - gmac_phy_input_parents, + clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents, ARRAY_SIZE(gmac_phy_input_parents), 0, SPEAR1310_GMAC_CLK_CFG, SPEAR1310_GMAC_PHY_INPUT_CLK_SHIFT, SPEAR1310_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL); + clk_register_clkdev(clk, "phy_input_mclk", NULL); - clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk", - "gmac_phy_input_mux_clk", 0, SPEAR1310_GMAC_CLK_SYNT, - NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL); - clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL); + clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk", + 0, SPEAR1310_GMAC_CLK_SYNT, NULL, gmac_rtbl, + ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "phy_syn_clk", NULL); + clk_register_clkdev(clk1, "phy_syn_gclk", NULL); - clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents, + clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents, ARRAY_SIZE(gmac_phy_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GMAC_PHY_CLK_SHIFT, SPEAR1310_GMAC_PHY_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "stmmacphy.0"); /* clcd */ - clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents, + clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents, ARRAY_SIZE(clcd_synth_parents), 0, SPEAR1310_CLCD_CLK_SYNT, SPEAR1310_CLCD_SYNT_CLK_SHIFT, SPEAR1310_CLCD_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_mclk", NULL); - clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0, + clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0, SPEAR1310_CLCD_CLK_SYNT, clcd_rtbl, ARRAY_SIZE(clcd_rtbl), &_lock); - clk_register_clkdev(clk, "clcd_synth_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_clk", NULL); - clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents, + clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents, ARRAY_SIZE(clcd_pixel_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_CLCD_CLK_SHIFT, SPEAR1310_CLCD_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "clcd_pixel_clk", NULL); - clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0, + clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CLCD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "clcd_clk", NULL); /* i2s */ - clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents, + clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents, ARRAY_SIZE(i2s_src_parents), 0, SPEAR1310_I2S_CLK_CFG, SPEAR1310_I2S_SRC_CLK_SHIFT, SPEAR1310_I2S_SRC_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_src_clk", NULL); - clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0, + clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0, SPEAR1310_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl, ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL); clk_register_clkdev(clk, "i2s_prs1_clk", NULL); - clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents, + clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents, ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1310_I2S_CLK_CFG, SPEAR1310_I2S_REF_SHIFT, SPEAR1310_I2S_REF_SEL_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_clk", NULL); - clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0, + clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_I2S_REF_PAD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL); - clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk", + clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk", "i2s_ref_pad_clk", 0, SPEAR1310_I2S_CLK_CFG, &i2s_sclk_masks, i2s_sclk_rtbl, ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1); clk_register_clkdev(clk, "i2s_sclk_clk", NULL); - clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL); + clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL); /* clock derived from ahb clk */ clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0, @@ -747,13 +745,13 @@ void __init spear1310_clk_init(void) &_lock); clk_register_clkdev(clk, "sysram1_clk", NULL); - clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk", + clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk", 0, SPEAR1310_ADC_CLK_SYNT, NULL, adc_rtbl, ARRAY_SIZE(adc_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "adc_synth_clk", NULL); - clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL); + clk_register_clkdev(clk, "adc_syn_clk", NULL); + clk_register_clkdev(clk1, "adc_syn_gclk", NULL); - clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_ADC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "adc_clk"); @@ -790,37 +788,37 @@ void __init spear1310_clk_init(void) clk_register_clkdev(clk, NULL, "e0300000.kbd"); /* RAS clks */ - clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk", - gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents), - 0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT0_1_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents, + ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1310_PLL_CFG, + SPEAR1310_RAS_SYNT0_1_CLK_SHIFT, SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth0_1_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_1_clk", NULL); - clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk", - gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents), - 0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT2_3_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents, + ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1310_PLL_CFG, + SPEAR1310_RAS_SYNT2_3_CLK_SHIFT, SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth2_3_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_3_clk", NULL); - clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0, SPEAR1310_RAS_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth0_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_clk", NULL); - clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0, SPEAR1310_RAS_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth1_clk", NULL); + clk_register_clkdev(clk, "gen_syn1_clk", NULL); - clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0, SPEAR1310_RAS_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth2_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_clk", NULL); - clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0, SPEAR1310_RAS_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth3_clk", NULL); + clk_register_clkdev(clk, "gen_syn3_clk", NULL); clk = clk_register_gate(NULL, "ras_osc_24m_clk", "osc_24m_clk", 0, SPEAR1310_RAS_CLK_ENB, SPEAR1310_OSC_24M_CLK_ENB, 0, @@ -847,7 +845,7 @@ void __init spear1310_clk_init(void) &_lock); clk_register_clkdev(clk, "ras_pll3_clk", NULL); - clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_125m_pad_clk", 0, + clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_pad_clk", 0, SPEAR1310_RAS_CLK_ENB, SPEAR1310_C125M_PAD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "ras_tx125_clk", NULL); @@ -912,7 +910,7 @@ void __init spear1310_clk_init(void) &_lock); clk_register_clkdev(clk, NULL, "5c700000.eth"); - clk = clk_register_mux(NULL, "smii_rgmii_phy_mux_clk", + clk = clk_register_mux(NULL, "smii_rgmii_phy_mclk", smii_rgmii_phy_parents, ARRAY_SIZE(smii_rgmii_phy_parents), 0, SPEAR1310_RAS_CTRL_REG1, @@ -922,184 +920,184 @@ void __init spear1310_clk_init(void) clk_register_clkdev(clk, NULL, "stmmacphy.2"); clk_register_clkdev(clk, NULL, "stmmacphy.4"); - clk = clk_register_mux(NULL, "rmii_phy_mux_clk", rmii_phy_parents, + clk = clk_register_mux(NULL, "rmii_phy_mclk", rmii_phy_parents, ARRAY_SIZE(rmii_phy_parents), 0, SPEAR1310_RAS_CTRL_REG1, SPEAR1310_RMII_PHY_CLK_SHIFT, SPEAR1310_PHY_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "stmmacphy.3"); - clk = clk_register_mux(NULL, "uart1_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart1_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART1_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart1_mux_clk", NULL); + clk_register_clkdev(clk, "uart1_mclk", NULL); - clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0, + clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5c800000.serial"); - clk = clk_register_mux(NULL, "uart2_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart2_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART2_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart2_mux_clk", NULL); + clk_register_clkdev(clk, "uart2_mclk", NULL); - clk = clk_register_gate(NULL, "uart2_clk", "uart2_mux_clk", 0, + clk = clk_register_gate(NULL, "uart2_clk", "uart2_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5c900000.serial"); - clk = clk_register_mux(NULL, "uart3_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart3_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART3_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart3_mux_clk", NULL); + clk_register_clkdev(clk, "uart3_mclk", NULL); - clk = clk_register_gate(NULL, "uart3_clk", "uart3_mux_clk", 0, + clk = clk_register_gate(NULL, "uart3_clk", "uart3_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5ca00000.serial"); - clk = clk_register_mux(NULL, "uart4_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart4_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART4_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart4_mux_clk", NULL); + clk_register_clkdev(clk, "uart4_mclk", NULL); - clk = clk_register_gate(NULL, "uart4_clk", "uart4_mux_clk", 0, + clk = clk_register_gate(NULL, "uart4_clk", "uart4_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART4_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cb00000.serial"); - clk = clk_register_mux(NULL, "uart5_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart5_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART5_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart5_mux_clk", NULL); + clk_register_clkdev(clk, "uart5_mclk", NULL); - clk = clk_register_gate(NULL, "uart5_clk", "uart5_mux_clk", 0, + clk = clk_register_gate(NULL, "uart5_clk", "uart5_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART5_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cc00000.serial"); - clk = clk_register_mux(NULL, "i2c1_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c1_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C1_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c1_mux_clk", NULL); + clk_register_clkdev(clk, "i2c1_mclk", NULL); - clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cd00000.i2c"); - clk = clk_register_mux(NULL, "i2c2_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c2_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C2_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c2_mux_clk", NULL); + clk_register_clkdev(clk, "i2c2_mclk", NULL); - clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5ce00000.i2c"); - clk = clk_register_mux(NULL, "i2c3_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c3_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C3_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c3_mux_clk", NULL); + clk_register_clkdev(clk, "i2c3_mclk", NULL); - clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cf00000.i2c"); - clk = clk_register_mux(NULL, "i2c4_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c4_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C4_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c4_mux_clk", NULL); + clk_register_clkdev(clk, "i2c4_mclk", NULL); - clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C4_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d000000.i2c"); - clk = clk_register_mux(NULL, "i2c5_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c5_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C5_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c5_mux_clk", NULL); + clk_register_clkdev(clk, "i2c5_mclk", NULL); - clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C5_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d100000.i2c"); - clk = clk_register_mux(NULL, "i2c6_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c6_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C6_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c6_mux_clk", NULL); + clk_register_clkdev(clk, "i2c6_mclk", NULL); - clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C6_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d200000.i2c"); - clk = clk_register_mux(NULL, "i2c7_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c7_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C7_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c7_mux_clk", NULL); + clk_register_clkdev(clk, "i2c7_mclk", NULL); - clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C7_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d300000.i2c"); - clk = clk_register_mux(NULL, "ssp1_mux_clk", ssp1_parents, + clk = clk_register_mux(NULL, "ssp1_mclk", ssp1_parents, ARRAY_SIZE(ssp1_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_SSP1_CLK_SHIFT, SPEAR1310_SSP1_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "ssp1_mux_clk", NULL); + clk_register_clkdev(clk, "ssp1_mclk", NULL); - clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mux_clk", 0, + clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_SSP1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d400000.spi"); - clk = clk_register_mux(NULL, "pci_mux_clk", pci_parents, + clk = clk_register_mux(NULL, "pci_mclk", pci_parents, ARRAY_SIZE(pci_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_PCI_CLK_SHIFT, SPEAR1310_PCI_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "pci_mux_clk", NULL); + clk_register_clkdev(clk, "pci_mclk", NULL); - clk = clk_register_gate(NULL, "pci_clk", "pci_mux_clk", 0, + clk = clk_register_gate(NULL, "pci_clk", "pci_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_PCI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "pci"); - clk = clk_register_mux(NULL, "tdm1_mux_clk", tdm_parents, + clk = clk_register_mux(NULL, "tdm1_mclk", tdm_parents, ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_TDM1_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "tdm1_mux_clk", NULL); + clk_register_clkdev(clk, "tdm1_mclk", NULL); - clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mux_clk", 0, + clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "tdm_hdlc.0"); - clk = clk_register_mux(NULL, "tdm2_mux_clk", tdm_parents, + clk = clk_register_mux(NULL, "tdm2_mclk", tdm_parents, ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_TDM2_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "tdm2_mux_clk", NULL); + clk_register_clkdev(clk, "tdm2_mclk", NULL); - clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mux_clk", 0, + clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "tdm_hdlc.1"); diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c index e3ea72162236..2352cee7f645 100644 --- a/drivers/clk/spear/spear1340_clock.c +++ b/drivers/clk/spear/spear1340_clock.c @@ -369,27 +369,25 @@ static struct frac_rate_tbl gen_rtbl[] = { /* clock parents */ static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", }; -static const char *sys_parents[] = { "none", "pll1_clk", "none", "none", - "sys_synth_clk", "none", "pll2_clk", "pll3_clk", }; -static const char *ahb_parents[] = { "cpu_div3_clk", "amba_synth_clk", }; +static const char *sys_parents[] = { "pll1_clk", "pll1_clk", "pll1_clk", + "pll1_clk", "sys_synth_clk", "sys_synth_clk", "pll2_clk", "pll3_clk", }; +static const char *ahb_parents[] = { "cpu_div3_clk", "amba_syn_clk", }; static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", }; static const char *uart0_parents[] = { "pll5_clk", "osc_24m_clk", - "uart0_synth_gate_clk", }; + "uart0_syn_gclk", }; static const char *uart1_parents[] = { "pll5_clk", "osc_24m_clk", - "uart1_synth_gate_clk", }; -static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", }; -static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk", + "uart1_syn_gclk", }; +static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", }; +static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk", "osc_25m_clk", }; -static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk", - "gmac_phy_synth_gate_clk", }; +static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", }; static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", }; -static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", }; +static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", }; static const char *i2s_src_parents[] = { "vco1div2_clk", "pll2_clk", "pll3_clk", "i2s_src_pad_clk", }; -static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", }; -static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_synth2_clk", -}; -static const char *spdif_in_parents[] = { "pll2_clk", "gen_synth3_clk", }; +static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", }; +static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_syn2_clk", }; +static const char *spdif_in_parents[] = { "pll2_clk", "gen_syn3_clk", }; static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk", "pll3_clk", }; @@ -415,9 +413,9 @@ void __init spear1340_clk_init(void) 25000000); clk_register_clkdev(clk, "osc_25m_clk", NULL); - clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL, - CLK_IS_ROOT, 125000000); - clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL); + clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT, + 125000000); + clk_register_clkdev(clk, "gmii_pad_clk", NULL); clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, CLK_IS_ROOT, 12288000); @@ -431,35 +429,35 @@ void __init spear1340_clk_init(void) /* clock derived from 24 or 25 MHz osc clk */ /* vco-pll */ - clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco1_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG, SPEAR1340_PLL1_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco1_mux_clk", NULL); - clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk", - 0, SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl, + clk_register_clkdev(clk, "vco1_mclk", NULL); + clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk", 0, + SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco1_clk", NULL); clk_register_clkdev(clk1, "pll1_clk", NULL); - clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco2_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG, SPEAR1340_PLL2_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco2_mux_clk", NULL); - clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk", - 0, SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl, + clk_register_clkdev(clk, "vco2_mclk", NULL); + clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk", 0, + SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco2_clk", NULL); clk_register_clkdev(clk1, "pll2_clk", NULL); - clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco3_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG, SPEAR1340_PLL3_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco3_mux_clk", NULL); - clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk", - 0, SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl, + clk_register_clkdev(clk, "vco3_mclk", NULL); + clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk", 0, + SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco3_clk", NULL); clk_register_clkdev(clk1, "pll3_clk", NULL); @@ -498,7 +496,7 @@ void __init spear1340_clk_init(void) /* peripherals */ clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1, 128); - clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0, + clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_THSENS_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_thermal"); @@ -509,23 +507,23 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, "ddr_clk", NULL); /* clock derived from pll1 clk */ - clk = clk_register_frac("sys_synth_clk", "vco1div2_clk", 0, + clk = clk_register_frac("sys_syn_clk", "vco1div2_clk", 0, SPEAR1340_SYS_CLK_SYNT, sys_synth_rtbl, ARRAY_SIZE(sys_synth_rtbl), &_lock); - clk_register_clkdev(clk, "sys_synth_clk", NULL); + clk_register_clkdev(clk, "sys_syn_clk", NULL); - clk = clk_register_frac("amba_synth_clk", "vco1div2_clk", 0, + clk = clk_register_frac("amba_syn_clk", "vco1div2_clk", 0, SPEAR1340_AMBA_CLK_SYNT, amba_synth_rtbl, ARRAY_SIZE(amba_synth_rtbl), &_lock); - clk_register_clkdev(clk, "amba_synth_clk", NULL); + clk_register_clkdev(clk, "amba_syn_clk", NULL); - clk = clk_register_mux(NULL, "sys_mux_clk", sys_parents, + clk = clk_register_mux(NULL, "sys_mclk", sys_parents, ARRAY_SIZE(sys_parents), 0, SPEAR1340_SYS_CLK_CTRL, SPEAR1340_SCLK_SRC_SEL_SHIFT, SPEAR1340_SCLK_SRC_SEL_MASK, 0, &_lock); clk_register_clkdev(clk, "sys_clk", NULL); - clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mux_clk", 0, 1, + clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mclk", 0, 1, 2); clk_register_clkdev(clk, "cpu_clk", NULL); @@ -548,194 +546,193 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, "apb_clk", NULL); /* gpt clocks */ - clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT0_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt0_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0, + clk_register_clkdev(clk, "gpt0_mclk", NULL); + clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT1_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk_register_clkdev(clk, "gpt1_mclk", NULL); + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT2_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk_register_clkdev(clk, "gpt2_mclk", NULL); + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); - clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT3_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt3_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, + clk_register_clkdev(clk, "gpt3_mclk", NULL); + clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt3"); /* others */ - clk = clk_register_aux("uart0_synth_clk", "uart0_synth_gate_clk", + clk = clk_register_aux("uart0_syn_clk", "uart0_syn_gclk", "vco1div2_clk", 0, SPEAR1340_UART0_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart0_synth_clk", NULL); - clk_register_clkdev(clk1, "uart0_synth_gate_clk", NULL); + clk_register_clkdev(clk, "uart0_syn_clk", NULL); + clk_register_clkdev(clk1, "uart0_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, + clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents, ARRAY_SIZE(uart0_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_UART0_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart0_mux_clk", NULL); + clk_register_clkdev(clk, "uart0_mclk", NULL); - clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0, + clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "e0000000.serial"); - clk = clk_register_aux("uart1_synth_clk", "uart1_synth_gate_clk", + clk = clk_register_aux("uart1_syn_clk", "uart1_syn_gclk", "vco1div2_clk", 0, SPEAR1340_UART1_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart1_synth_clk", NULL); - clk_register_clkdev(clk1, "uart1_synth_gate_clk", NULL); + clk_register_clkdev(clk, "uart1_syn_clk", NULL); + clk_register_clkdev(clk1, "uart1_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart1_mux_clk", uart1_parents, + clk = clk_register_mux(NULL, "uart1_mclk", uart1_parents, ARRAY_SIZE(uart1_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_UART1_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart1_mux_clk", NULL); + clk_register_clkdev(clk, "uart1_mclk", NULL); - clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0, - SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0, + clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0, + SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b4100000.serial"); - clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk", + clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk", "vco1div2_clk", 0, SPEAR1340_SDHCI_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "sdhci_synth_clk", NULL); - clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL); + clk_register_clkdev(clk, "sdhci_syn_clk", NULL); + clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL); - clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_SDHCI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b3000000.sdhci"); - clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1340_CFXD_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "cfxd_synth_clk", NULL); - clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL); + clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk", + 0, SPEAR1340_CFXD_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "cfxd_syn_clk", NULL); + clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL); - clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CFXD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b2800000.cf"); clk_register_clkdev(clk, NULL, "arasan_xd"); - clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1340_C3_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "c3_synth_clk", NULL); - clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL); + clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk", 0, + SPEAR1340_C3_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "c3_syn_clk", NULL); + clk_register_clkdev(clk1, "c3_syn_gclk", NULL); - clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents, + clk = clk_register_mux(NULL, "c3_mclk", c3_parents, ARRAY_SIZE(c3_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_C3_CLK_SHIFT, SPEAR1340_C3_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "c3_mux_clk", NULL); + clk_register_clkdev(clk, "c3_mclk", NULL); - clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0, + clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_C3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "c3"); /* gmac */ - clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk", - gmac_phy_input_parents, + clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents, ARRAY_SIZE(gmac_phy_input_parents), 0, SPEAR1340_GMAC_CLK_CFG, SPEAR1340_GMAC_PHY_INPUT_CLK_SHIFT, SPEAR1340_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL); + clk_register_clkdev(clk, "phy_input_mclk", NULL); - clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk", - "gmac_phy_input_mux_clk", 0, SPEAR1340_GMAC_CLK_SYNT, - NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL); - clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL); + clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk", + 0, SPEAR1340_GMAC_CLK_SYNT, NULL, gmac_rtbl, + ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "phy_syn_clk", NULL); + clk_register_clkdev(clk1, "phy_syn_gclk", NULL); - clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents, + clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents, ARRAY_SIZE(gmac_phy_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GMAC_PHY_CLK_SHIFT, SPEAR1340_GMAC_PHY_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "stmmacphy.0"); /* clcd */ - clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents, + clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents, ARRAY_SIZE(clcd_synth_parents), 0, SPEAR1340_CLCD_CLK_SYNT, SPEAR1340_CLCD_SYNT_CLK_SHIFT, SPEAR1340_CLCD_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_mclk", NULL); - clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0, + clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0, SPEAR1340_CLCD_CLK_SYNT, clcd_rtbl, ARRAY_SIZE(clcd_rtbl), &_lock); - clk_register_clkdev(clk, "clcd_synth_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_clk", NULL); - clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents, + clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents, ARRAY_SIZE(clcd_pixel_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_CLCD_CLK_SHIFT, SPEAR1340_CLCD_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "clcd_pixel_clk", NULL); - clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0, + clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CLCD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "clcd_clk", NULL); /* i2s */ - clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents, + clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents, ARRAY_SIZE(i2s_src_parents), 0, SPEAR1340_I2S_CLK_CFG, SPEAR1340_I2S_SRC_CLK_SHIFT, SPEAR1340_I2S_SRC_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_src_clk", NULL); - clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0, + clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0, SPEAR1340_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl, ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL); clk_register_clkdev(clk, "i2s_prs1_clk", NULL); - clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents, + clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents, ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1340_I2S_CLK_CFG, SPEAR1340_I2S_REF_SHIFT, SPEAR1340_I2S_REF_SEL_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_clk", NULL); - clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0, + clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_I2S_REF_PAD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL); - clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk", - "i2s_ref_mux_clk", 0, SPEAR1340_I2S_CLK_CFG, - &i2s_sclk_masks, i2s_sclk_rtbl, - ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1); + clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk", "i2s_ref_mclk", + 0, SPEAR1340_I2S_CLK_CFG, &i2s_sclk_masks, + i2s_sclk_rtbl, ARRAY_SIZE(i2s_sclk_rtbl), &_lock, + &clk1); clk_register_clkdev(clk, "i2s_sclk_clk", NULL); - clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL); + clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL); /* clock derived from ahb clk */ clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0, @@ -744,7 +741,7 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, NULL, "e0280000.i2c"); clk = clk_register_gate(NULL, "i2c1_clk", "ahb_clk", 0, - SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0, + SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b4000000.i2c"); @@ -800,13 +797,13 @@ void __init spear1340_clk_init(void) &_lock); clk_register_clkdev(clk, "sysram1_clk", NULL); - clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk", + clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk", 0, SPEAR1340_ADC_CLK_SYNT, NULL, adc_rtbl, ARRAY_SIZE(adc_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "adc_synth_clk", NULL); - clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL); + clk_register_clkdev(clk, "adc_syn_clk", NULL); + clk_register_clkdev(clk1, "adc_syn_gclk", NULL); - clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_ADC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "adc_clk"); @@ -843,39 +840,39 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, NULL, "e0300000.kbd"); /* RAS clks */ - clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk", - gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents), - 0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT0_1_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents, + ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1340_PLL_CFG, + SPEAR1340_GEN_SYNT0_1_CLK_SHIFT, SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth0_1_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_1_clk", NULL); - clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk", - gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents), - 0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT2_3_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents, + ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1340_PLL_CFG, + SPEAR1340_GEN_SYNT2_3_CLK_SHIFT, SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth2_3_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_3_clk", NULL); - clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0, SPEAR1340_GEN_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth0_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_clk", NULL); - clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0, SPEAR1340_GEN_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth1_clk", NULL); + clk_register_clkdev(clk, "gen_syn1_clk", NULL); - clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0, SPEAR1340_GEN_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth2_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_clk", NULL); - clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0, SPEAR1340_GEN_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth3_clk", NULL); + clk_register_clkdev(clk, "gen_syn3_clk", NULL); - clk = clk_register_gate(NULL, "mali_clk", "gen_synth3_clk", 0, + clk = clk_register_gate(NULL, "mali_clk", "gen_syn3_clk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_MALI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "mali"); @@ -890,74 +887,74 @@ void __init spear1340_clk_init(void) &_lock); clk_register_clkdev(clk, NULL, "spear_cec.1"); - clk = clk_register_mux(NULL, "spdif_out_mux_clk", spdif_out_parents, + clk = clk_register_mux(NULL, "spdif_out_mclk", spdif_out_parents, ARRAY_SIZE(spdif_out_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_OUT_CLK_SHIFT, SPEAR1340_SPDIF_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "spdif_out_mux_clk", NULL); + clk_register_clkdev(clk, "spdif_out_mclk", NULL); - clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mux_clk", 0, + clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_OUT_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spdif-out"); - clk = clk_register_mux(NULL, "spdif_in_mux_clk", spdif_in_parents, + clk = clk_register_mux(NULL, "spdif_in_mclk", spdif_in_parents, ARRAY_SIZE(spdif_in_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_IN_CLK_SHIFT, SPEAR1340_SPDIF_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "spdif_in_mux_clk", NULL); + clk_register_clkdev(clk, "spdif_in_mclk", NULL); - clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mux_clk", 0, + clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_IN_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spdif-in"); - clk = clk_register_gate(NULL, "acp_clk", "acp_mux_clk", 0, + clk = clk_register_gate(NULL, "acp_clk", "acp_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_ACP_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "acp_clk"); - clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mux_clk", 0, + clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PLGPIO_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "plgpio"); - clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mux_clk", 0, + clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_DEC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "video_dec"); - clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mux_clk", 0, + clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_ENC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "video_enc"); - clk = clk_register_gate(NULL, "video_in_clk", "video_in_mux_clk", 0, + clk = clk_register_gate(NULL, "video_in_clk", "video_in_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_IN_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_vip"); - clk = clk_register_gate(NULL, "cam0_clk", "cam0_mux_clk", 0, + clk = clk_register_gate(NULL, "cam0_clk", "cam0_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.0"); - clk = clk_register_gate(NULL, "cam1_clk", "cam1_mux_clk", 0, + clk = clk_register_gate(NULL, "cam1_clk", "cam1_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.1"); - clk = clk_register_gate(NULL, "cam2_clk", "cam2_mux_clk", 0, + clk = clk_register_gate(NULL, "cam2_clk", "cam2_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.2"); - clk = clk_register_gate(NULL, "cam3_clk", "cam3_mux_clk", 0, + clk = clk_register_gate(NULL, "cam3_clk", "cam3_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.3"); - clk = clk_register_gate(NULL, "pwm_clk", "pwm_mux_clk", 0, + clk = clk_register_gate(NULL, "pwm_clk", "pwm_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PWM_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "pwm"); diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c index 01dd6daff2a1..c3157454bb3f 100644 --- a/drivers/clk/spear/spear3xx_clock.c +++ b/drivers/clk/spear/spear3xx_clock.c @@ -122,12 +122,12 @@ static struct gpt_rate_tbl gpt_rtbl[] = { }; /* clock parents */ -static const char *uart0_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", }; -static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk", +static const char *uart0_parents[] = { "pll3_clk", "uart_syn_gclk", }; +static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk", }; -static const char *gpt0_parents[] = { "pll3_48m_clk", "gpt0_synth_clk", }; -static const char *gpt1_parents[] = { "pll3_48m_clk", "gpt1_synth_clk", }; -static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", }; +static const char *gpt0_parents[] = { "pll3_clk", "gpt0_syn_clk", }; +static const char *gpt1_parents[] = { "pll3_clk", "gpt1_syn_clk", }; +static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", }; static const char *gen2_3_parents[] = { "pll1_clk", "pll2_clk", }; static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none", "pll2_clk", }; @@ -137,7 +137,7 @@ static void __init spear300_clk_init(void) { struct clk *clk; - clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0, + clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0, 1, 1); clk_register_clkdev(clk, NULL, "60000000.clcd"); @@ -219,15 +219,11 @@ static void __init spear310_clk_init(void) #define SPEAR320_UARTX_PCLK_VAL_SYNTH1 0x0 #define SPEAR320_UARTX_PCLK_VAL_APB 0x1 -static const char *i2s_ref_parents[] = { "ras_pll2_clk", - "ras_gen2_synth_gate_clk", }; -static const char *sdhci_parents[] = { "ras_pll3_48m_clk", - "ras_gen3_synth_gate_clk", -}; +static const char *i2s_ref_parents[] = { "ras_pll2_clk", "ras_syn2_gclk", }; +static const char *sdhci_parents[] = { "ras_pll3_clk", "ras_syn3_gclk", }; static const char *smii0_parents[] = { "smii_125m_pad", "ras_pll2_clk", - "ras_gen0_synth_gate_clk", }; -static const char *uartx_parents[] = { "ras_gen1_synth_gate_clk", "ras_apb_clk", -}; + "ras_syn0_gclk", }; +static const char *uartx_parents[] = { "ras_syn1_gclk", "ras_apb_clk", }; static void __init spear320_clk_init(void) { @@ -237,7 +233,7 @@ static void __init spear320_clk_init(void) CLK_IS_ROOT, 125000000); clk_register_clkdev(clk, "smii_125m_pad", NULL); - clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0, + clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0, 1, 1); clk_register_clkdev(clk, NULL, "90000000.clcd"); @@ -363,9 +359,9 @@ void __init spear3xx_clk_init(void) clk_register_clkdev(clk, NULL, "fc900000.rtc"); /* clock derived from 24 MHz osc clk */ - clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0, + clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0, 48000000); - clk_register_clkdev(clk, "pll3_48m_clk", NULL); + clk_register_clkdev(clk, "pll3_clk", NULL); clk = clk_register_fixed_factor(NULL, "wdt_clk", "osc_24m_clk", 0, 1, 1); @@ -392,98 +388,98 @@ void __init spear3xx_clk_init(void) HCLK_RATIO_MASK, 0, &_lock); clk_register_clkdev(clk, "ahb_clk", NULL); - clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", - "pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart_synth_clk", NULL); - clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); + clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0, + UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "uart_syn_clk", NULL); + clk_register_clkdev(clk1, "uart_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, + clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents, ARRAY_SIZE(uart0_parents), 0, PERIP_CLK_CFG, UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart0_mux_clk", NULL); + clk_register_clkdev(clk, "uart0_mclk", NULL); - clk = clk_register_gate(NULL, "uart0", "uart0_mux_clk", 0, - PERIP1_CLK_ENB, UART_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "uart0", "uart0_mclk", 0, PERIP1_CLK_ENB, + UART_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "d0000000.serial"); - clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk", - "pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "firda_synth_clk", NULL); - clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL); + clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk", 0, + FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "firda_syn_clk", NULL); + clk_register_clkdev(clk1, "firda_syn_gclk", NULL); - clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents, + clk = clk_register_mux(NULL, "firda_mclk", firda_parents, ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG, FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "firda_mux_clk", NULL); + clk_register_clkdev(clk, "firda_mclk", NULL); - clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0, + clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0, PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "firda"); /* gpt clocks */ - clk_register_gpt("gpt0_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG, - gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); + clk_register_gpt("gpt0_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG, gpt_rtbl, + ARRAY_SIZE(gpt_rtbl), &_lock); clk = clk_register_mux(NULL, "gpt0_clk", gpt0_parents, ARRAY_SIZE(gpt0_parents), 0, PERIP_CLK_CFG, GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk_register_gpt("gpt1_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG, - gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt1_parents, + clk_register_gpt("gpt1_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG, gpt_rtbl, + ARRAY_SIZE(gpt_rtbl), &_lock); + clk = clk_register_mux(NULL, "gpt1_mclk", gpt1_parents, ARRAY_SIZE(gpt1_parents), 0, PERIP_CLK_CFG, GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk_register_clkdev(clk, "gpt1_mclk", NULL); + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG, - gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents, + clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG, gpt_rtbl, + ARRAY_SIZE(gpt_rtbl), &_lock); + clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents, ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG, GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk_register_clkdev(clk, "gpt2_mclk", NULL); + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); /* general synths clocks */ - clk = clk_register_aux("gen0_synth_clk", "gen0_synth_gate_clk", - "pll1_clk", 0, GEN0_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen0_synth_clk", NULL); - clk_register_clkdev(clk1, "gen0_synth_gate_clk", NULL); - - clk = clk_register_aux("gen1_synth_clk", "gen1_synth_gate_clk", - "pll1_clk", 0, GEN1_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen1_synth_clk", NULL); - clk_register_clkdev(clk1, "gen1_synth_gate_clk", NULL); - - clk = clk_register_mux(NULL, "gen2_3_parent_clk", gen2_3_parents, + clk = clk_register_aux("gen0_syn_clk", "gen0_syn_gclk", "pll1_clk", + 0, GEN0_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "gen0_syn_clk", NULL); + clk_register_clkdev(clk1, "gen0_syn_gclk", NULL); + + clk = clk_register_aux("gen1_syn_clk", "gen1_syn_gclk", "pll1_clk", + 0, GEN1_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "gen1_syn_clk", NULL); + clk_register_clkdev(clk1, "gen1_syn_gclk", NULL); + + clk = clk_register_mux(NULL, "gen2_3_par_clk", gen2_3_parents, ARRAY_SIZE(gen2_3_parents), 0, CORE_CLK_CFG, GEN_SYNTH2_3_CLK_SHIFT, GEN_SYNTH2_3_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen2_3_parent_clk", NULL); + clk_register_clkdev(clk, "gen2_3_par_clk", NULL); - clk = clk_register_aux("gen2_synth_clk", "gen2_synth_gate_clk", - "gen2_3_parent_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl, + clk = clk_register_aux("gen2_syn_clk", "gen2_syn_gclk", + "gen2_3_par_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen2_synth_clk", NULL); - clk_register_clkdev(clk1, "gen2_synth_gate_clk", NULL); + clk_register_clkdev(clk, "gen2_syn_clk", NULL); + clk_register_clkdev(clk1, "gen2_syn_gclk", NULL); - clk = clk_register_aux("gen3_synth_clk", "gen3_synth_gate_clk", - "gen2_3_parent_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl, + clk = clk_register_aux("gen3_syn_clk", "gen3_syn_gclk", + "gen2_3_par_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen3_synth_clk", NULL); - clk_register_clkdev(clk1, "gen3_synth_gate_clk", NULL); + clk_register_clkdev(clk, "gen3_syn_clk", NULL); + clk_register_clkdev(clk1, "gen3_syn_gclk", NULL); /* clock derived from pll3 clk */ - clk = clk_register_gate(NULL, "usbh_clk", "pll3_48m_clk", 0, - PERIP1_CLK_ENB, USBH_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "usbh_clk", "pll3_clk", 0, PERIP1_CLK_ENB, + USBH_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "usbh_clk", NULL); clk = clk_register_fixed_factor(NULL, "usbh.0_clk", "usbh_clk", 0, 1, @@ -494,8 +490,8 @@ void __init spear3xx_clk_init(void) 1); clk_register_clkdev(clk, "usbh.1_clk", NULL); - clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0, - PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB, + USBD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "designware_udc"); /* clock derived from ahb clk */ @@ -579,29 +575,25 @@ void __init spear3xx_clk_init(void) RAS_CLK_ENB, RAS_PLL2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "ras_pll2_clk", NULL); - clk = clk_register_gate(NULL, "ras_pll3_48m_clk", "pll3_48m_clk", 0, + clk = clk_register_gate(NULL, "ras_pll3_clk", "pll3_clk", 0, RAS_CLK_ENB, RAS_48M_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_pll3_48m_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen0_synth_gate_clk", - "gen0_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT0_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen0_synth_gate_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen1_synth_gate_clk", - "gen1_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT1_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen1_synth_gate_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen2_synth_gate_clk", - "gen2_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT2_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen2_synth_gate_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen3_synth_gate_clk", - "gen3_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT3_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen3_synth_gate_clk", NULL); + clk_register_clkdev(clk, "ras_pll3_clk", NULL); + + clk = clk_register_gate(NULL, "ras_syn0_gclk", "gen0_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT0_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn0_gclk", NULL); + + clk = clk_register_gate(NULL, "ras_syn1_gclk", "gen1_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT1_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn1_gclk", NULL); + + clk = clk_register_gate(NULL, "ras_syn2_gclk", "gen2_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT2_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn2_gclk", NULL); + + clk = clk_register_gate(NULL, "ras_syn3_gclk", "gen3_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT3_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn3_gclk", NULL); if (of_machine_is_compatible("st,spear300")) spear300_clk_init(); diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c index 61026ae564ab..a98d0866f541 100644 --- a/drivers/clk/spear/spear6xx_clock.c +++ b/drivers/clk/spear/spear6xx_clock.c @@ -97,13 +97,12 @@ static struct aux_rate_tbl aux_rtbl[] = { {.xscale = 1, .yscale = 2, .eq = 1}, /* 166 MHz */ }; -static const char *clcd_parents[] = { "pll3_48m_clk", "clcd_synth_gate_clk", }; -static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk", -}; -static const char *uart_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", }; -static const char *gpt0_1_parents[] = { "pll3_48m_clk", "gpt0_1_synth_clk", }; -static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", }; -static const char *gpt3_parents[] = { "pll3_48m_clk", "gpt3_synth_clk", }; +static const char *clcd_parents[] = { "pll3_clk", "clcd_syn_gclk", }; +static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk", }; +static const char *uart_parents[] = { "pll3_clk", "uart_syn_gclk", }; +static const char *gpt0_1_parents[] = { "pll3_clk", "gpt0_1_syn_clk", }; +static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", }; +static const char *gpt3_parents[] = { "pll3_clk", "gpt3_syn_clk", }; static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none", "pll2_clk", }; @@ -136,9 +135,9 @@ void __init spear6xx_clk_init(void) clk_register_clkdev(clk, NULL, "rtc-spear"); /* clock derived from 30 MHz osc clk */ - clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0, + clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0, 48000000); - clk_register_clkdev(clk, "pll3_48m_clk", NULL); + clk_register_clkdev(clk, "pll3_clk", NULL); clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "osc_30m_clk", 0, PLL1_CTR, PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), @@ -146,9 +145,9 @@ void __init spear6xx_clk_init(void) clk_register_clkdev(clk, "vco1_clk", NULL); clk_register_clkdev(clk1, "pll1_clk", NULL); - clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, - "osc_30m_clk", 0, PLL2_CTR, PLL2_FRQ, pll_rtbl, - ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); + clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "osc_30m_clk", + 0, PLL2_CTR, PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), + &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco2_clk", NULL); clk_register_clkdev(clk1, "pll2_clk", NULL); @@ -165,111 +164,111 @@ void __init spear6xx_clk_init(void) HCLK_RATIO_MASK, 0, &_lock); clk_register_clkdev(clk, "ahb_clk", NULL); - clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", - "pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart_synth_clk", NULL); - clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); + clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0, + UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "uart_syn_clk", NULL); + clk_register_clkdev(clk1, "uart_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, PERIP_CLK_CFG, UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart_mux_clk", NULL); + clk_register_clkdev(clk, "uart_mclk", NULL); - clk = clk_register_gate(NULL, "uart0", "uart_mux_clk", 0, - PERIP1_CLK_ENB, UART0_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "uart0", "uart_mclk", 0, PERIP1_CLK_ENB, + UART0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "d0000000.serial"); - clk = clk_register_gate(NULL, "uart1", "uart_mux_clk", 0, - PERIP1_CLK_ENB, UART1_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "uart1", "uart_mclk", 0, PERIP1_CLK_ENB, + UART1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "d0080000.serial"); - clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk", - "pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "firda_synth_clk", NULL); - clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL); + clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk", + 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "firda_syn_clk", NULL); + clk_register_clkdev(clk1, "firda_syn_gclk", NULL); - clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents, + clk = clk_register_mux(NULL, "firda_mclk", firda_parents, ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG, FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "firda_mux_clk", NULL); + clk_register_clkdev(clk, "firda_mclk", NULL); - clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0, + clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0, PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "firda"); - clk = clk_register_aux("clcd_synth_clk", "clcd_synth_gate_clk", - "pll1_clk", 0, CLCD_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "clcd_synth_clk", NULL); - clk_register_clkdev(clk1, "clcd_synth_gate_clk", NULL); + clk = clk_register_aux("clcd_syn_clk", "clcd_syn_gclk", "pll1_clk", + 0, CLCD_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "clcd_syn_clk", NULL); + clk_register_clkdev(clk1, "clcd_syn_gclk", NULL); - clk = clk_register_mux(NULL, "clcd_mux_clk", clcd_parents, + clk = clk_register_mux(NULL, "clcd_mclk", clcd_parents, ARRAY_SIZE(clcd_parents), 0, PERIP_CLK_CFG, CLCD_CLK_SHIFT, CLCD_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "clcd_mux_clk", NULL); + clk_register_clkdev(clk, "clcd_mclk", NULL); - clk = clk_register_gate(NULL, "clcd_clk", "clcd_mux_clk", 0, + clk = clk_register_gate(NULL, "clcd_clk", "clcd_mclk", 0, PERIP1_CLK_ENB, CLCD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "clcd"); /* gpt clocks */ - clk = clk_register_gpt("gpt0_1_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG, + clk = clk_register_gpt("gpt0_1_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG, gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk_register_clkdev(clk, "gpt0_1_synth_clk", NULL); + clk_register_clkdev(clk, "gpt0_1_syn_clk", NULL); - clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt0_1_parents, + clk = clk_register_mux(NULL, "gpt0_mclk", gpt0_1_parents, ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG, GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt0_1_parents, + clk = clk_register_mux(NULL, "gpt1_mclk", gpt0_1_parents, ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG, GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); + clk_register_clkdev(clk, "gpt1_mclk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk = clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG, + clk = clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG, gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk_register_clkdev(clk, "gpt2_synth_clk", NULL); + clk_register_clkdev(clk, "gpt2_syn_clk", NULL); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents, + clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents, ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG, GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); + clk_register_clkdev(clk, "gpt2_mclk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); - clk = clk_register_gpt("gpt3_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG, + clk = clk_register_gpt("gpt3_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG, gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk_register_clkdev(clk, "gpt3_synth_clk", NULL); + clk_register_clkdev(clk, "gpt3_syn_clk", NULL); - clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt3_parents, + clk = clk_register_mux(NULL, "gpt3_mclk", gpt3_parents, ARRAY_SIZE(gpt3_parents), 0, PERIP_CLK_CFG, GPT3_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt3_mux_clk", NULL); + clk_register_clkdev(clk, "gpt3_mclk", NULL); - clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, + clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0, PERIP1_CLK_ENB, GPT3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt3"); /* clock derived from pll3 clk */ - clk = clk_register_gate(NULL, "usbh0_clk", "pll3_48m_clk", 0, + clk = clk_register_gate(NULL, "usbh0_clk", "pll3_clk", 0, PERIP1_CLK_ENB, USBH0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "usbh.0_clk"); - clk = clk_register_gate(NULL, "usbh1_clk", "pll3_48m_clk", 0, + clk = clk_register_gate(NULL, "usbh1_clk", "pll3_clk", 0, PERIP1_CLK_ENB, USBH1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "usbh.1_clk"); - clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0, - PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB, + USBD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "designware_udc"); /* clock derived from ahb clk */ @@ -278,9 +277,8 @@ void __init spear6xx_clk_init(void) clk_register_clkdev(clk, "ahbmult2_clk", NULL); clk = clk_register_mux(NULL, "ddr_clk", ddr_parents, - ARRAY_SIZE(ddr_parents), - 0, PLL_CLK_CFG, MCTR_CLK_SHIFT, MCTR_CLK_MASK, 0, - &_lock); + ARRAY_SIZE(ddr_parents), 0, PLL_CLK_CFG, MCTR_CLK_SHIFT, + MCTR_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "ddr_clk", NULL); clk = clk_register_divider(NULL, "apb_clk", "ahb_clk", diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index bef04c192768..3fda8c87f02c 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -386,6 +386,7 @@ config HID_MULTITOUCH - Unitec Panels - XAT optical touch panels - Xiroku optical touch panels + - Zytronic touch panels If unsure, say N. diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 875ff451842b..32039235cfee 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -659,6 +659,9 @@ #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE 0x0001 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE 0x0600 +#define USB_VENDOR_ID_SENNHEISER 0x1395 +#define USB_DEVICE_ID_SENNHEISER_BTD500USB 0x002c + #define USB_VENDOR_ID_SIGMA_MICRO 0x1c4f #define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD 0x0002 @@ -808,6 +811,9 @@ #define USB_VENDOR_ID_ZYDACRON 0x13EC #define USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL 0x0006 +#define USB_VENDOR_ID_ZYTRONIC 0x14c8 +#define USB_DEVICE_ID_ZYTRONIC_ZXY100 0x0005 + #define USB_VENDOR_ID_PRIMAX 0x0461 #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 132b0019365e..5301006f6c15 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -301,6 +301,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), + HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, {} }; diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 6e3332a99976..76479246d4ee 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1048,6 +1048,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR2) }, + /* Zytronic panels */ + { .driver_data = MT_CLS_SERIAL, + MT_USB_DEVICE(USB_VENDOR_ID_ZYTRONIC, + USB_DEVICE_ID_ZYTRONIC_ZXY100) }, + /* Generic MT device */ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, { } diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 0597ee604f6e..903eef3d3e10 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -76,6 +76,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_1, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2, HID_QUIRK_NOGET }, diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 3a74e4410fc0..86e2f4a62b9a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -26,6 +26,8 @@ * These routines are used by both DMA-remapping and Interrupt-remapping */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */ + #include <linux/pci.h> #include <linux/dmar.h> #include <linux/iova.h> @@ -39,8 +41,6 @@ #include <asm/irq_remapping.h> #include <asm/iommu_table.h> -#define PREFIX "DMAR: " - /* No locks are needed as DMA remapping hardware unit * list is constructed at boot time and hotplug of * these units are not supported by the architecture. @@ -83,16 +83,12 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, * ignore it */ if (!bus) { - printk(KERN_WARNING - PREFIX "Device scope bus [%d] not found\n", - scope->bus); + pr_warn("Device scope bus [%d] not found\n", scope->bus); break; } pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn)); if (!pdev) { - printk(KERN_WARNING PREFIX - "Device scope device [%04x:%02x:%02x.%02x] not found\n", - segment, bus->number, path->dev, path->fn); + /* warning will be printed below */ break; } path ++; @@ -100,9 +96,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, bus = pdev->subordinate; } if (!pdev) { - printk(KERN_WARNING PREFIX - "Device scope device [%04x:%02x:%02x.%02x] not found\n", - segment, scope->bus, path->dev, path->fn); + pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n", + segment, scope->bus, path->dev, path->fn); *dev = NULL; return 0; } @@ -110,9 +105,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, pdev->subordinate) || (scope->entry_type == \ ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { pci_dev_put(pdev); - printk(KERN_WARNING PREFIX - "Device scope type does not match for %s\n", - pci_name(pdev)); + pr_warn("Device scope type does not match for %s\n", + pci_name(pdev)); return -EINVAL; } *dev = pdev; @@ -134,8 +128,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) (*cnt)++; else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) { - printk(KERN_WARNING PREFIX - "Unsupported device scope\n"); + pr_warn("Unsupported device scope\n"); } start += scope->length; } @@ -261,25 +254,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) case ACPI_DMAR_TYPE_HARDWARE_UNIT: drhd = container_of(header, struct acpi_dmar_hardware_unit, header); - printk (KERN_INFO PREFIX - "DRHD base: %#016Lx flags: %#x\n", + pr_info("DRHD base: %#016Lx flags: %#x\n", (unsigned long long)drhd->address, drhd->flags); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: rmrr = container_of(header, struct acpi_dmar_reserved_memory, header); - printk (KERN_INFO PREFIX - "RMRR base: %#016Lx end: %#016Lx\n", + pr_info("RMRR base: %#016Lx end: %#016Lx\n", (unsigned long long)rmrr->base_address, (unsigned long long)rmrr->end_address); break; case ACPI_DMAR_TYPE_ATSR: atsr = container_of(header, struct acpi_dmar_atsr, header); - printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags); + pr_info("ATSR flags: %#x\n", atsr->flags); break; case ACPI_DMAR_HARDWARE_AFFINITY: rhsa = container_of(header, struct acpi_dmar_rhsa, header); - printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n", + pr_info("RHSA base: %#016Lx proximity domain: %#x\n", (unsigned long long)rhsa->base_address, rhsa->proximity_domain); break; @@ -299,7 +290,7 @@ static int __init dmar_table_detect(void) &dmar_tbl_size); if (ACPI_SUCCESS(status) && !dmar_tbl) { - printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); + pr_warn("Unable to map DMAR\n"); status = AE_NOT_FOUND; } @@ -333,20 +324,18 @@ parse_dmar_table(void) return -ENODEV; if (dmar->width < PAGE_SHIFT - 1) { - printk(KERN_WARNING PREFIX "Invalid DMAR haw\n"); + pr_warn("Invalid DMAR haw\n"); return -EINVAL; } - printk (KERN_INFO PREFIX "Host address width %d\n", - dmar->width + 1); + pr_info("Host address width %d\n", dmar->width + 1); entry_header = (struct acpi_dmar_header *)(dmar + 1); while (((unsigned long)entry_header) < (((unsigned long)dmar) + dmar_tbl->length)) { /* Avoid looping forever on bad ACPI tables */ if (entry_header->length == 0) { - printk(KERN_WARNING PREFIX - "Invalid 0-length structure\n"); + pr_warn("Invalid 0-length structure\n"); ret = -EINVAL; break; } @@ -369,8 +358,7 @@ parse_dmar_table(void) #endif break; default: - printk(KERN_WARNING PREFIX - "Unknown DMAR structure type %d\n", + pr_warn("Unknown DMAR structure type %d\n", entry_header->type); ret = 0; /* for forward compatibility */ break; @@ -469,12 +457,12 @@ int __init dmar_table_init(void) ret = parse_dmar_table(); if (ret) { if (ret != -ENODEV) - printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); + pr_info("parse DMAR table failure.\n"); return ret; } if (list_empty(&dmar_drhd_units)) { - printk(KERN_INFO PREFIX "No DMAR devices found\n"); + pr_info("No DMAR devices found\n"); return -ENODEV; } @@ -506,8 +494,7 @@ int __init check_zero_address(void) (((unsigned long)dmar) + dmar_tbl->length)) { /* Avoid looping forever on bad ACPI tables */ if (entry_header->length == 0) { - printk(KERN_WARNING PREFIX - "Invalid 0-length structure\n"); + pr_warn("Invalid 0-length structure\n"); return 0; } @@ -558,8 +545,7 @@ int __init detect_intel_iommu(void) if (ret && irq_remapping_enabled && cpu_has_x2apic && dmar->flags & 0x1) - printk(KERN_INFO - "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); + pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; @@ -579,14 +565,89 @@ int __init detect_intel_iommu(void) } +static void unmap_iommu(struct intel_iommu *iommu) +{ + iounmap(iommu->reg); + release_mem_region(iommu->reg_phys, iommu->reg_size); +} + +/** + * map_iommu: map the iommu's registers + * @iommu: the iommu to map + * @phys_addr: the physical address of the base resgister + * + * Memory map the iommu's registers. Start w/ a single page, and + * possibly expand if that turns out to be insufficent. + */ +static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) +{ + int map_size, err=0; + + iommu->reg_phys = phys_addr; + iommu->reg_size = VTD_PAGE_SIZE; + + if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { + pr_err("IOMMU: can't reserve memory\n"); + err = -EBUSY; + goto out; + } + + iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); + if (!iommu->reg) { + pr_err("IOMMU: can't map the region\n"); + err = -ENOMEM; + goto release; + } + + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { + err = -EINVAL; + warn_invalid_dmar(phys_addr, " returns all ones"); + goto unmap; + } + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = VTD_PAGE_ALIGN(map_size); + if (map_size > iommu->reg_size) { + iounmap(iommu->reg); + release_mem_region(iommu->reg_phys, iommu->reg_size); + iommu->reg_size = map_size; + if (!request_mem_region(iommu->reg_phys, iommu->reg_size, + iommu->name)) { + pr_err("IOMMU: can't reserve memory\n"); + err = -EBUSY; + goto out; + } + iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); + if (!iommu->reg) { + pr_err("IOMMU: can't map the region\n"); + err = -ENOMEM; + goto release; + } + } + err = 0; + goto out; + +unmap: + iounmap(iommu->reg); +release: + release_mem_region(iommu->reg_phys, iommu->reg_size); +out: + return err; +} + int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; - int map_size; u32 ver; static int iommu_allocated = 0; int agaw = 0; int msagaw = 0; + int err; if (!drhd->reg_base_addr) { warn_invalid_dmar(0, ""); @@ -600,30 +661,22 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->seq_id = iommu_allocated++; sprintf (iommu->name, "dmar%d", iommu->seq_id); - iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); + err = map_iommu(iommu, drhd->reg_base_addr); + if (err) { + pr_err("IOMMU: failed to map %s\n", iommu->name); goto error; } - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - - if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { - warn_invalid_dmar(drhd->reg_base_addr, " returns all ones"); - goto err_unmap; - } + err = -EINVAL; agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { - printk(KERN_ERR - "Cannot get a valid agaw for iommu (seq_id = %d)\n", - iommu->seq_id); + pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); goto err_unmap; } msagaw = iommu_calculate_max_sagaw(iommu); if (msagaw < 0) { - printk(KERN_ERR - "Cannot get a valid max agaw for iommu (seq_id = %d)\n", + pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); goto err_unmap; } @@ -632,19 +685,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->node = -1; - /* the registers might be more than one page */ - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), - cap_max_fault_reg_offset(iommu->cap)); - map_size = VTD_PAGE_ALIGN(map_size); - if (map_size > VTD_PAGE_SIZE) { - iounmap(iommu->reg); - iommu->reg = ioremap(drhd->reg_base_addr, map_size); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - } - ver = readl(iommu->reg + DMAR_VER_REG); pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", iommu->seq_id, @@ -659,10 +699,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) return 0; err_unmap: - iounmap(iommu->reg); + unmap_iommu(iommu); error: kfree(iommu); - return -1; + return err; } void free_iommu(struct intel_iommu *iommu) @@ -673,7 +713,8 @@ void free_iommu(struct intel_iommu *iommu) free_dmar_iommu(iommu); if (iommu->reg) - iounmap(iommu->reg); + unmap_iommu(iommu); + kfree(iommu); } @@ -710,7 +751,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) if (fault & DMA_FSTS_IQE) { head = readl(iommu->reg + DMAR_IQH_REG); if ((head >> DMAR_IQ_SHIFT) == index) { - printk(KERN_ERR "VT-d detected invalid descriptor: " + pr_err("VT-d detected invalid descriptor: " "low=%llx, high=%llx\n", (unsigned long long)qi->desc[index].low, (unsigned long long)qi->desc[index].high); @@ -1129,15 +1170,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, &fault_type); if (fault_type == INTR_REMAP) - printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] " + pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] " "fault index %llx\n" "INTR-REMAP:[fault reason %02d] %s\n", (source_id >> 8), PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr >> 48, fault_reason, reason); else - printk(KERN_ERR - "DMAR:[%s] Request device [%02x:%02x.%d] " + pr_err("DMAR:[%s] Request device [%02x:%02x.%d] " "fault addr %llx \n" "DMAR:[fault reason %02d] %s\n", (type ? "DMA Read" : "DMA Write"), @@ -1157,8 +1197,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) raw_spin_lock_irqsave(&iommu->register_lock, flag); fault_status = readl(iommu->reg + DMAR_FSTS_REG); if (fault_status) - printk(KERN_ERR "DRHD: handling fault status reg %x\n", - fault_status); + pr_err("DRHD: handling fault status reg %x\n", fault_status); /* TBD: ignore advanced fault log currently */ if (!(fault_status & DMA_FSTS_PPF)) @@ -1224,7 +1263,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) irq = create_irq(); if (!irq) { - printk(KERN_ERR "IOMMU: no free vectors\n"); + pr_err("IOMMU: no free vectors\n"); return -EINVAL; } @@ -1241,7 +1280,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); if (ret) - printk(KERN_ERR "IOMMU: can't request irq\n"); + pr_err("IOMMU: can't request irq\n"); return ret; } @@ -1258,8 +1297,7 @@ int __init enable_drhd_fault_handling(void) ret = dmar_set_interrupt(iommu); if (ret) { - printk(KERN_ERR "DRHD %Lx: failed to enable fault, " - " interrupt, ret %d\n", + pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", (unsigned long long)drhd->reg_base_addr, ret); return -1; } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 6d347064b8b0..e0b18f3ae9a8 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -902,7 +902,6 @@ static int intel_setup_ioapic_entry(int irq, return 0; } -#ifdef CONFIG_SMP /* * Migrate the IO-APIC irq in the presence of intr-remapping. * @@ -924,6 +923,10 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_cfg *cfg = data->chip_data; unsigned int dest, irq = data->irq; struct irte irte; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -EINVAL; if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; @@ -931,10 +934,16 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, if (get_irte(irq, &irte)) return -EBUSY; - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -956,7 +965,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, cpumask_copy(data->affinity, mask); return 0; } -#endif static void intel_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, @@ -1058,9 +1066,7 @@ struct irq_remap_ops intel_irq_remap_ops = { .reenable = reenable_irq_remapping, .enable_faulting = enable_drhd_fault_handling, .setup_ioapic_entry = intel_setup_ioapic_entry, -#ifdef CONFIG_SMP .set_affinity = intel_ioapic_set_affinity, -#endif .free_irq = free_irte, .compose_msi_msg = intel_compose_msi_msg, .msi_alloc_irq = intel_msi_alloc_irq, diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 40cda8e98d87..1d29b1c66e72 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -111,16 +111,15 @@ int setup_ioapic_remapped_entry(int irq, vector, attr); } -#ifdef CONFIG_SMP int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - if (!remap_ops || !remap_ops->set_affinity) + if (!config_enabled(CONFIG_SMP) || !remap_ops || + !remap_ops->set_affinity) return 0; return remap_ops->set_affinity(data, mask, force); } -#endif void free_remapped_irq(int irq) { diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index be9d72950c51..b12974cc1dfe 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -59,11 +59,9 @@ struct irq_remap_ops { unsigned int, int, struct io_apic_irq_attr *); -#ifdef CONFIG_SMP /* Set the CPU affinity of a remapped interrupt */ int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, bool force); -#endif /* Free an IRQ */ int (*free_irq)(int); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d039de8322f0..b58b7a33914a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1084,6 +1084,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->split_io = dm_rh_get_region_size(ms->rh); ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->discard_zeroes_data_unsupported = 1; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); @@ -1214,7 +1215,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * We need to dec pending if this was a write. */ if (rw == WRITE) { - if (!(bio->bi_rw & REQ_FLUSH)) + if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) dm_rh_dec(ms->rh, map_context->ll); return error; } diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 7771ed212182..69732e03eb34 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) return; } + if (bio->bi_rw & REQ_DISCARD) + return; + /* We must inform the log that the sync count has changed. */ log->type->set_region_sync(log, region, 0); @@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) struct bio *bio; for (bio = bios->head; bio; bio = bio->bi_next) { - if (bio->bi_rw & REQ_FLUSH) + if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)) continue; rh_inc(rh, dm_rh_bio_to_region(rh, bio)); } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ce59824fb414..68694da0d21d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1245,7 +1245,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio) cell_release_singleton(cell, bio); cell_release_singleton(cell2, bio); - remap_and_issue(tc, bio, lookup_result.block); + if ((!lookup_result.shared) && pool->pf.discard_passdown) + remap_and_issue(tc, bio, lookup_result.block); + else + bio_endio(bio, 0); } break; @@ -2628,6 +2631,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (tc->pool->pf.discard_enabled) { ti->discards_supported = 1; ti->num_discard_requests = 1; + ti->discard_zeroes_data_unsupported = 1; } dm_put(pool_md); diff --git a/drivers/md/md.c b/drivers/md/md.c index a4c219e3c859..d5ab4493c8be 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2931,6 +2931,7 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len) * can be sane */ return -EBUSY; rdev->data_offset = offset; + rdev->new_data_offset = offset; return len; } @@ -3926,8 +3927,8 @@ array_state_show(struct mddev *mddev, char *page) return sprintf(page, "%s\n", array_states[st]); } -static int do_md_stop(struct mddev * mddev, int ro, int is_open); -static int md_set_readonly(struct mddev * mddev, int is_open); +static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev); +static int md_set_readonly(struct mddev * mddev, struct block_device *bdev); static int do_md_run(struct mddev * mddev); static int restart_array(struct mddev *mddev); @@ -3943,14 +3944,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) /* stopping an active array */ if (atomic_read(&mddev->openers) > 0) return -EBUSY; - err = do_md_stop(mddev, 0, 0); + err = do_md_stop(mddev, 0, NULL); break; case inactive: /* stopping an active array */ if (mddev->pers) { if (atomic_read(&mddev->openers) > 0) return -EBUSY; - err = do_md_stop(mddev, 2, 0); + err = do_md_stop(mddev, 2, NULL); } else err = 0; /* already inactive */ break; @@ -3958,7 +3959,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) break; /* not supported yet */ case readonly: if (mddev->pers) - err = md_set_readonly(mddev, 0); + err = md_set_readonly(mddev, NULL); else { mddev->ro = 1; set_disk_ro(mddev->gendisk, 1); @@ -3968,7 +3969,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) case read_auto: if (mddev->pers) { if (mddev->ro == 0) - err = md_set_readonly(mddev, 0); + err = md_set_readonly(mddev, NULL); else if (mddev->ro == 1) err = restart_array(mddev); if (err == 0) { @@ -5351,15 +5352,17 @@ void md_stop(struct mddev *mddev) } EXPORT_SYMBOL_GPL(md_stop); -static int md_set_readonly(struct mddev *mddev, int is_open) +static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) { int err = 0; mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > is_open) { + if (atomic_read(&mddev->openers) > !!bdev) { printk("md: %s still in use.\n",mdname(mddev)); err = -EBUSY; goto out; } + if (bdev) + sync_blockdev(bdev); if (mddev->pers) { __md_stop_writes(mddev); @@ -5381,18 +5384,26 @@ out: * 0 - completely stop and dis-assemble array * 2 - stop but do not disassemble array */ -static int do_md_stop(struct mddev * mddev, int mode, int is_open) +static int do_md_stop(struct mddev * mddev, int mode, + struct block_device *bdev) { struct gendisk *disk = mddev->gendisk; struct md_rdev *rdev; mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > is_open || + if (atomic_read(&mddev->openers) > !!bdev || mddev->sysfs_active) { printk("md: %s still in use.\n",mdname(mddev)); mutex_unlock(&mddev->open_mutex); return -EBUSY; } + if (bdev) + /* It is possible IO was issued on some other + * open file which was closed before we took ->open_mutex. + * As that was not the last close __blkdev_put will not + * have called sync_blockdev, so we must. + */ + sync_blockdev(bdev); if (mddev->pers) { if (mddev->ro) @@ -5466,7 +5477,7 @@ static void autorun_array(struct mddev *mddev) err = do_md_run(mddev); if (err) { printk(KERN_WARNING "md: do_md_run() returned %d\n", err); - do_md_stop(mddev, 0, 0); + do_md_stop(mddev, 0, NULL); } } @@ -6481,11 +6492,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, goto done_unlock; case STOP_ARRAY: - err = do_md_stop(mddev, 0, 1); + err = do_md_stop(mddev, 0, bdev); goto done_unlock; case STOP_ARRAY_RO: - err = md_set_readonly(mddev, 1); + err = md_set_readonly(mddev, bdev); goto done_unlock; case BLKROSET: diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 240ff3125040..cacd008d6864 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1818,8 +1818,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) if (atomic_dec_and_test(&r1_bio->remaining)) { /* if we're here, all write(s) have completed, so clean up */ - md_done_sync(mddev, r1_bio->sectors, 1); - put_buf(r1_bio); + int s = r1_bio->sectors; + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + put_buf(r1_bio); + md_done_sync(mddev, s, 1); + } } } diff --git a/drivers/media/video/cx25821/cx25821-core.c b/drivers/media/video/cx25821/cx25821-core.c index 83c1aa6b2e6c..f11f6f07e915 100644 --- a/drivers/media/video/cx25821/cx25821-core.c +++ b/drivers/media/video/cx25821/cx25821-core.c @@ -904,9 +904,6 @@ static int cx25821_dev_setup(struct cx25821_dev *dev) list_add_tail(&dev->devlist, &cx25821_devlist); mutex_unlock(&cx25821_devlist_mutex); - strcpy(cx25821_boards[UNKNOWN_BOARD].name, "unknown"); - strcpy(cx25821_boards[CX25821_BOARD].name, "cx25821"); - if (dev->pci->device != 0x8210) { pr_info("%s(): Exiting. Incorrect Hardware device = 0x%02x\n", __func__, dev->pci->device); diff --git a/drivers/media/video/cx25821/cx25821.h b/drivers/media/video/cx25821/cx25821.h index b9aa801b00a7..029f2934a6d8 100644 --- a/drivers/media/video/cx25821/cx25821.h +++ b/drivers/media/video/cx25821/cx25821.h @@ -187,7 +187,7 @@ enum port { }; struct cx25821_board { - char *name; + const char *name; enum port porta; enum port portb; enum port portc; diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c index 83dbb2ddff10..0cbada18f6f5 100644 --- a/drivers/media/video/v4l2-dev.c +++ b/drivers/media/video/v4l2-dev.c @@ -681,6 +681,7 @@ static void determine_valid_ioctls(struct video_device *vdev) SET_VALID_IOCTL(ops, VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings); + SET_VALID_IOCTL(ops, VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap); /* yes, really vidioc_subscribe_event */ SET_VALID_IOCTL(ops, VIDIOC_DQEVENT, vidioc_subscribe_event); SET_VALID_IOCTL(ops, VIDIOC_SUBSCRIBE_EVENT, vidioc_subscribe_event); diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c index ae7814874618..072734538876 100644 --- a/drivers/scsi/scsi_wait_scan.c +++ b/drivers/scsi/scsi_wait_scan.c @@ -22,11 +22,6 @@ static int __init wait_scan_init(void) * and might not yet have reached the scsi async scanning */ wait_for_device_probe(); - /* - * and then we wait for the actual asynchronous scsi scan - * to finish. - */ - scsi_complete_async_scans(); return 0; } diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 9888693a18fe..664f6e775d0e 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -1095,7 +1095,7 @@ int target_emulate_write_same(struct se_cmd *cmd) if (num_blocks != 0) range = num_blocks; else - range = (dev->transport->get_blocks(dev) - lba); + range = (dev->transport->get_blocks(dev) - lba) + 1; pr_debug("WRITE_SAME UNMAP: LBA: %llu Range: %llu\n", (unsigned long long)lba, (unsigned long long)range); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 85564998500a..a1bcd927a9e6 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -2031,7 +2031,7 @@ static int __core_scsi3_write_aptpl_to_file( if (IS_ERR(file) || !file || !file->f_dentry) { pr_err("filp_open(%s) for APTPL metadata" " failed\n", path); - return (PTR_ERR(file) < 0 ? PTR_ERR(file) : -ENOENT); + return IS_ERR(file) ? PTR_ERR(file) : -ENOENT; } iov[0].iov_base = &buf[0]; @@ -3818,7 +3818,7 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd) " SPC-2 reservation is held, returning" " RESERVATION_CONFLICT\n"); cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT; - ret = EINVAL; + ret = -EINVAL; goto out; } @@ -3828,7 +3828,8 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd) */ if (!cmd->se_sess) { cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - return -EINVAL; + ret = -EINVAL; + goto out; } if (cmd->data_length < 24) { diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index f03fb9730f5b..5b65f33939a8 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -230,6 +230,8 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd) { struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd); + if (cmd->aborted) + return ~0; return fc_seq_exch(cmd->seq)->rxid; } diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5b400730c213..4ee522b3f66f 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,7 +86,31 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ -/* Forward declarations */ +#ifdef CONFIG_HIGHMEM +/* + * On arches that have high memory, kmap address space is limited. By + * serializing the kmap operations on those arches, we ensure that we don't + * end up with a bunch of threads in writeback with partially mapped page + * arrays, stuck waiting for kmap to come back. That situation prevents + * progress and can deadlock. + */ +static DEFINE_MUTEX(cifs_kmap_mutex); + +static inline void +cifs_kmap_lock(void) +{ + mutex_lock(&cifs_kmap_mutex); +} + +static inline void +cifs_kmap_unlock(void) +{ + mutex_unlock(&cifs_kmap_mutex); +} +#else /* !CONFIG_HIGHMEM */ +#define cifs_kmap_lock() do { ; } while(0) +#define cifs_kmap_unlock() do { ; } while(0) +#endif /* CONFIG_HIGHMEM */ /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ @@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) } /* marshal up the page array */ + cifs_kmap_lock(); len = rdata->marshal_iov(rdata, data_len); + cifs_kmap_unlock(); data_len -= len; /* issue the read if we have any iovecs left to fill */ @@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata) * and set the iov_len properly for each one. It may also set * wdata->bytes too. */ + cifs_kmap_lock(); wdata->marshal_iov(iov, wdata); + cifs_kmap_unlock(); cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0ae86ddf2213..94b7788c3189 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3445,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) +/* + * On hosts with high memory, we can't currently support wsize/rsize that are + * larger than we can kmap at once. Cap the rsize/wsize at + * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request + * larger than that anyway. + */ +#ifdef CONFIG_HIGHMEM +#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) +#else /* CONFIG_HIGHMEM */ +#define CIFS_KMAP_SIZE_LIMIT (1<<24) +#endif /* CONFIG_HIGHMEM */ + static unsigned int cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) { @@ -3475,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) wsize = min_t(unsigned int, wsize, server->maxBuf - sizeof(WRITE_REQ) + 4); + /* limit to the amount that we can kmap at once */ + wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_WSIZE */ wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); @@ -3516,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) if (!(server->capabilities & CAP_LARGE_READ_X)) rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); + /* limit to the amount that we can kmap at once */ + rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_RSIZE */ rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 0a8224d1c4c5..a4217f02fab2 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { - /* FIXME: check for inode number changes? */ - if (dentry->d_inode != NULL) + inode = dentry->d_inode; + /* update inode in place if i_ino didn't change */ + if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); return dentry; + } d_drop(dentry); dput(dentry); } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 3097ee58fd7d..f25d4ea14be4 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, if (mid == NULL) return -ENOMEM; - /* put it on the pending_mid_q */ - spin_lock(&GlobalMid_Lock); - list_add_tail(&mid->qhead, &server->pending_mid_q); - spin_unlock(&GlobalMid_Lock); - rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number); - if (rc) - delete_mid(mid); + if (rc) { + DeleteMidQEntry(mid); + return rc; + } + *ret_mid = mid; - return rc; + return 0; } /* @@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid->callback_data = cbdata; mid->mid_state = MID_REQUEST_SUBMITTED; + /* put it on the pending_mid_q */ + spin_lock(&GlobalMid_Lock); + list_add_tail(&mid->qhead, &server->pending_mid_q); + spin_unlock(&GlobalMid_Lock); + + cifs_in_send_inc(server); rc = smb_sendv(server, iov, nvec); cifs_in_send_dec(server); cifs_save_when_sent(mid); mutex_unlock(&server->srv_mutex); - if (rc) - goto out_err; + if (rc == 0) + return 0; - return rc; -out_err: delete_mid(mid); add_credits(server, 1); wake_up(&server->request_q); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 74598f67efeb..1c8b55670804 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) epds.events &= ~EPOLLWAKEUP; /* diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 49cf230554a2..24a49d47e935 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios) out: ios->numdevs = devs_in_group; ios->pages_consumed = cur_pg; - if (unlikely(ret)) { - if (length == ios->length) - return ret; - else - ios->length -= length; - } - return 0; + return ret; } int ore_create(struct ore_io_state *ios) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index d222c77cfa1b..5f376d14fdcc 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d, { unsigned data_devs = sp2d->data_devs; unsigned group_width = data_devs + sp2d->parity; - unsigned p; + int p, c; if (!sp2d->needed) return; - for (p = 0; p < sp2d->pages_in_unit; p++) { - struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; - - if (_1ps->write_count < group_width) { - unsigned c; + for (c = data_devs - 1; c >= 0; --c) + for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; - for (c = 0; c < data_devs; c++) - if (_1ps->page_is_read[c]) { - struct page *page = _1ps->pages[c]; + if (_1ps->page_is_read[c]) { + struct page *page = _1ps->pages[c]; - r4w->put_page(priv, page); - _1ps->page_is_read[c] = false; - } + r4w->put_page(priv, page); + _1ps->page_is_read[c] = false; + } } + for (p = 0; p < sp2d->pages_in_unit; p++) { + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; + memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); _1ps->write_count = 0; _1ps->tx = NULL; @@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) * ios->sp2d[p][*], xor is calculated the same way. These pages are * allocated/freed and don't go through cache */ -static int _read_4_write(struct ore_io_state *ios) +static int _read_4_write_first_stripe(struct ore_io_state *ios) { - struct ore_io_state *ios_read; struct ore_striping_info read_si; struct __stripe_pages_2d *sp2d = ios->sp2d; u64 offset = ios->si.first_stripe_start; - u64 last_stripe_end; - unsigned bytes_in_stripe = ios->si.bytes_in_stripe; - unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; - int ret; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; if (offset == ios->offset) /* Go to start collect $200 */ goto read_last_stripe; @@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios) min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); + ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n", + offset, ios->offset, min_p, max_p); + for (c = 0; ; c++) { ore_calc_stripe_info(ios->layout, offset, 0, &read_si); read_si.obj_offset += min_p * PAGE_SIZE; @@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios) } read_last_stripe: + return 0; +} + +static int _read_4_write_last_stripe(struct ore_io_state *ios) +{ + struct ore_striping_info read_si; + struct __stripe_pages_2d *sp2d = ios->sp2d; + u64 offset; + u64 last_stripe_end; + unsigned bytes_in_stripe = ios->si.bytes_in_stripe; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; + offset = ios->offset + ios->length; if (offset % PAGE_SIZE) _add_to_r4w_last_page(ios, &offset); @@ -527,15 +538,15 @@ read_last_stripe: c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); - BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); - /* unaligned IO must be within a single stripe */ - if (min_p == sp2d->pages_in_unit) { /* Didn't do it yet */ min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); } + ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n", + offset, last_stripe_end, min_p, max_p); + while (offset < last_stripe_end) { struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; @@ -568,6 +579,15 @@ read_last_stripe: } read_it: + return 0; +} + +static int _read_4_write_execute(struct ore_io_state *ios) +{ + struct ore_io_state *ios_read; + unsigned i; + int ret; + ios_read = ios->ios_read_4_write; if (!ios_read) return 0; @@ -591,6 +611,8 @@ read_it: } _mark_read4write_pages_uptodate(ios_read, ret); + ore_put_io_state(ios_read); + ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */ return 0; } @@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, /* If first stripe, Read in all read4write pages * (if needed) before we calculate the first parity. */ - _read_4_write(ios); + _read_4_write_first_stripe(ios); } + if (!cur_len) /* If last stripe r4w pages of last stripe */ + _read_4_write_last_stripe(ios); + _read_4_write_execute(ios); for (i = 0; i < num_pages; i++) { pages[i] = _raid_page_alloc(); @@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios, int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) { - struct ore_layout *layout = ios->layout; - if (ios->parity_pages) { + struct ore_layout *layout = ios->layout; unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; - unsigned stripe_size = ios->si.bytes_in_stripe; - u64 last_stripe, first_stripe; if (_sp2d_alloc(pages_in_unit, layout->group_width, layout->parity, &ios->sp2d)) { return -ENOMEM; } - - /* Round io down to last full strip */ - first_stripe = div_u64(ios->offset, stripe_size); - last_stripe = div_u64(ios->offset + ios->length, stripe_size); - - /* If an IO spans more then a single stripe it must end at - * a stripe boundary. The reminder at the end is pushed into the - * next IO. - */ - if (last_stripe != first_stripe) { - ios->length = last_stripe * stripe_size - ios->offset; - - BUG_ON(!ios->length); - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / - PAGE_SIZE; - ios->si.length = ios->length; /*make it consistent */ - } } return 0; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e34deac3f366..6ec6f9ee2fec 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -268,7 +268,6 @@ group_extend_out: err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); - mnt_drop_write(filp->f_path.mnt); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index b47277baebab..f50d3e8d6f22 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata) objios->ios->done = _read_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, rdata->args.offset, rdata->args.count); - return ore_read(objios->ios); + ret = ore_read(objios->ios); + if (unlikely(ret)) + objio_free_result(&objios->oir); + return ret; } /* @@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) struct nfs_write_data *wdata = objios->oir.rpcdata; struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; - struct page *page = find_get_page(mapping, index); + struct page *page; + loff_t i_size = i_size_read(wdata->header->inode); + + if (offset >= i_size) { + *uptodate = true; + dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); + return ZERO_PAGE(0); + } + page = find_get_page(mapping, index); if (!page) { page = find_or_create_page(mapping, index, GFP_NOFS); if (unlikely(!page)) { @@ -507,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) static void __r4w_put_page(void *priv, struct page *page) { - dprintk("%s: index=0x%lx\n", __func__, page->index); - page_cache_release(page); + dprintk("%s: index=0x%lx\n", __func__, + (page == ZERO_PAGE(0)) ? -1UL : page->index); + if (ZERO_PAGE(0) != page) + page_cache_release(page); return; } @@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) dprintk("%s: offset=0x%llx length=0x%x\n", __func__, wdata->args.offset, wdata->args.count); ret = ore_write(objios->ios); - if (unlikely(ret)) + if (unlikely(ret)) { + objio_free_result(&objios->oir); return ret; + } if (objios->sync) _write_done(objios->ios, objios); diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index ef3d1ba6d992..15e2fc5aa60b 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c) lnum = ubifs_next_log_lnum(c, lnum); } - /* Fixup the current log head */ - err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); + /* + * Fixup the log head which contains the only a CS node at the + * beginning. + */ + err = fixup_leb(c, c->lhead_lnum, + ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size)); if (err) goto out; diff --git a/include/linux/capability.h b/include/linux/capability.h index 68d56effc328..d10b7ed595b1 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,11 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 -/* Allow preventing system suspends while epoll events are pending */ +/* Allow preventing system suspends */ -#define CAP_EPOLLWAKEUP 36 +#define CAP_BLOCK_SUSPEND 36 -#define CAP_LAST_CAP CAP_EPOLLWAKEUP +#define CAP_LAST_CAP CAP_BLOCK_SUSPEND #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2521a95fa6d9..44c87e731e9d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -163,16 +163,8 @@ struct ceph_connection { /* connection negotiation temps */ char in_banner[CEPH_BANNER_MAX_LEN]; - union { - struct { /* outgoing connection */ - struct ceph_msg_connect out_connect; - struct ceph_msg_connect_reply in_reply; - }; - struct { /* incoming */ - struct ceph_msg_connect in_connect; - struct ceph_msg_connect_reply out_reply; - }; - }; + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; struct ceph_entity_addr actual_peer_addr; /* message out temps */ diff --git a/include/linux/device.h b/include/linux/device.h index 161d96241b1b..6de94151ff6f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -865,8 +865,6 @@ extern int (*platform_notify_remove)(struct device *dev); extern struct device *get_device(struct device *dev); extern void put_device(struct device *dev); -extern void wait_for_device_probe(void); - #ifdef CONFIG_DEVTMPFS extern int devtmpfs_create_node(struct device *dev); extern int devtmpfs_delete_node(struct device *dev); diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6f8be328770a..f4bb378ccf6a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -34,7 +34,7 @@ * re-allowed until epoll_wait is called again after consuming the wakeup * event(s). * - * Requires CAP_EPOLLWAKEUP + * Requires CAP_BLOCK_SUSPEND */ #define EPOLLWAKEUP (1 << 29) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e65eff6af3b..8a7476186990 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -168,8 +168,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - RCU_INIT_POINTER(.real_cred, &init_cred), \ - RCU_INIT_POINTER(.cred, &init_cred), \ + RCU_POINTER_INITIALIZER(real_cred, &init_cred), \ + RCU_POINTER_INITIALIZER(cred, &init_cred), \ .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e6ca56de9936..78e2ada50cd5 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -308,6 +308,8 @@ enum { struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 reg_phys; /* physical address of hw register set */ + u64 reg_size; /* size of hw register set */ u64 cap; u64 ecap; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ diff --git a/include/linux/irq.h b/include/linux/irq.h index a5261e3d2e3c..553fb66da130 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -150,9 +150,7 @@ struct irq_data { void *handler_data; void *chip_data; struct msi_desc *msi_desc; -#ifdef CONFIG_SMP cpumask_var_t affinity; -#endif }; /* diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e07f5e0c5df4..604382143bcf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -377,7 +377,6 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, - SYSTEM_SUSPEND_DISK, } system_state; #define TAINT_PROPRIETARY_MODULE 0 diff --git a/include/linux/key.h b/include/linux/key.h index 4cd22ed627ef..cef3b315ba7c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key) rwsem_is_locked(&((struct key *)(KEY))->sem))) #define rcu_assign_keypointer(KEY, PAYLOAD) \ - (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD)) +do { \ + rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \ +} while (0) #ifdef CONFIG_SYSCTL extern ctl_table key_sysctls[]; diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index d6bd50110ec2..2e7a1e032c71 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -55,12 +55,17 @@ struct kmsg_dumper { #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len); + bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, char *buf, size_t size, size_t *len); +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); + void kmsg_dump_rewind(struct kmsg_dumper *dumper); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -71,6 +76,13 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } +static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, + bool syslog, const char *line, + size_t size, size_t *len) +{ + return false; +} + static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, const char *line, size_t size, size_t *len) { @@ -83,6 +95,10 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, return false; } +static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ +} + static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) { } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9cac722b169c..115ead2b5155 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -147,6 +147,7 @@ extern void synchronize_sched(void); extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); +extern void rcu_read_unlock_special(struct task_struct *t); void synchronize_rcu(void); /* @@ -255,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) +extern int rcu_is_cpu_idle(void); +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ + #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ @@ -266,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void) #ifdef CONFIG_DEBUG_LOCK_ALLOC -#ifdef CONFIG_PROVE_RCU -extern int rcu_is_cpu_idle(void); -#else /* !CONFIG_PROVE_RCU */ -static inline int rcu_is_cpu_idle(void) -{ - return 0; -} -#endif /* else !CONFIG_PROVE_RCU */ - static inline void rcu_lock_acquire(struct lockdep_map *map) { lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); @@ -431,8 +427,7 @@ extern int rcu_my_thread_group_empty(void); static inline void rcu_preempt_sleep_check(void) { rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), - "Illegal context switch in RCU read-side " - "critical section"); + "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) @@ -513,10 +508,10 @@ static inline void rcu_preempt_sleep_check(void) (_________p1); \ }) #define __rcu_assign_pointer(p, v, space) \ - ({ \ + do { \ smp_wmb(); \ (p) = (typeof(*v) __force space *)(v); \ - }) + } while (0) /** @@ -851,7 +846,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * * Assigns the specified value to the specified RCU-protected * pointer, ensuring that any concurrent RCU readers will see - * any prior initialization. Returns the value assigned. + * any prior initialization. * * Inserts memory barriers on architectures that require them * (which is most of them), and also prevents the compiler from @@ -903,25 +898,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * the reader-accessible portions of the linked structure. */ #define RCU_INIT_POINTER(p, v) \ - p = (typeof(*v) __force __rcu *)(v) - -static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) -{ - return offset < 4096; -} - -static __always_inline -void __kfree_rcu(struct rcu_head *head, unsigned long offset) -{ - typedef void (*rcu_callback)(struct rcu_head *); - - BUILD_BUG_ON(!__builtin_constant_p(offset)); - - /* See the kfree_rcu() header comment. */ - BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); + do { \ + p = (typeof(*v) __force __rcu *)(v); \ + } while (0) - kfree_call_rcu(head, (rcu_callback)offset); -} +/** + * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * + * GCC-style initialization for an RCU-protected pointer in a structure field. + */ +#define RCU_POINTER_INITIALIZER(p, v) \ + .p = (typeof(*v) __force __rcu *)(v) /* * Does the specified offset indicate that the corresponding rcu_head @@ -935,7 +922,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) #define __kfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ + kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ } while (0) /** diff --git a/include/linux/smp.h b/include/linux/smp.h index 717fb746c9a8..dd6f06be3c9f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -90,10 +90,6 @@ void kick_all_cpus_sync(void); void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); void generic_smp_call_function_interrupt(void); -void ipi_call_lock(void); -void ipi_call_unlock(void); -void ipi_call_lock_irq(void); -void ipi_call_unlock_irq(void); #else static inline void call_function_init(void) { } #endif @@ -181,7 +177,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info) } while (0) static inline void smp_send_reschedule(int cpu) { } -#define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) diff --git a/include/linux/tick.h b/include/linux/tick.h index ab8be90b5cc9..f37fceb69b73 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -31,10 +31,10 @@ enum tick_nohz_mode { * struct tick_sched - sched tick emulation and no idle tick control/stats * @sched_timer: hrtimer to schedule the periodic tick in high * resolution mode - * @idle_tick: Store the last idle tick expiry time when the tick - * timer is modified for idle sleeps. This is necessary + * @last_tick: Store the last tick expiry time when the tick + * timer is modified for nohz sleeps. This is necessary * to resume the tick timer operation in the timeline - * when the CPU returns from idle + * when the CPU returns from nohz sleep. * @tick_stopped: Indicator that the idle tick has been stopped * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_calls: Total number of idle calls @@ -51,7 +51,7 @@ struct tick_sched { struct hrtimer sched_timer; unsigned long check_clocks; enum tick_nohz_mode nohz_mode; - ktime_t idle_tick; + ktime_t last_tick; int inidle; int tick_stopped; unsigned long idle_jiffies; diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d274734b2aa4..5bde94d8585b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read, __entry->rcutorturename, __entry->rhp) ); +/* + * Tracepoint for _rcu_barrier() execution. The string "s" describes + * the _rcu_barrier phase: + * "Begin": rcu_barrier_callback() started. + * "Check": rcu_barrier_callback() checking for piggybacking. + * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. + * "Inc1": rcu_barrier_callback() piggyback check counter incremented. + * "Offline": rcu_barrier_callback() found offline CPU + * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks. + * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. + * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. + * "CB": An rcu_barrier_callback() invoked a callback, not the last. + * "LastCB": An rcu_barrier_callback() invoked the last callback. + * "Inc2": rcu_barrier_callback() piggyback check counter incremented. + * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument + * is the count of remaining callbacks, and "done" is the piggybacking count. + */ +TRACE_EVENT(rcu_barrier, + + TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done), + + TP_ARGS(rcuname, s, cpu, cnt, done), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(char *, s) + __field(int, cpu) + __field(int, cnt) + __field(unsigned long, done) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->s = s; + __entry->cpu = cpu; + __entry->cnt = cnt; + __entry->done = done; + ), + + TP_printk("%s %s cpu %d remaining %d # %lu", + __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt, + __entry->done) +); + #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) @@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ do { } while (0) #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 67b847dfa2bb..1f91413edb87 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -14,6 +14,7 @@ #include <linux/ctype.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/kmsg_dump.h> #include <linux/reboot.h> #include <linux/sched.h> #include <linux/sysrq.h> @@ -2040,8 +2041,15 @@ static int kdb_env(int argc, const char **argv) */ static int kdb_dmesg(int argc, const char **argv) { - char *syslog_data[4], *start, *end, c = '\0', *p; - int diag, logging, logsize, lines = 0, adjust = 0, n; + int diag; + int logging; + int lines = 0; + int adjust = 0; + int n = 0; + int skip = 0; + struct kmsg_dumper dumper = { .active = 1 }; + size_t len; + char buf[201]; if (argc > 2) return KDB_ARGCOUNT; @@ -2064,22 +2072,10 @@ static int kdb_dmesg(int argc, const char **argv) kdb_set(2, setargs); } - /* syslog_data[0,1] physical start, end+1. syslog_data[2,3] - * logical start, end+1. */ - kdb_syslog_data(syslog_data); - if (syslog_data[2] == syslog_data[3]) - return 0; - logsize = syslog_data[1] - syslog_data[0]; - start = syslog_data[2]; - end = syslog_data[3]; -#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0]) - for (n = 0, p = start; p < end; ++p) { - c = *KDB_WRAP(p); - if (c == '\n') - ++n; - } - if (c != '\n') - ++n; + kmsg_dump_rewind_nolock(&dumper); + while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL)) + n++; + if (lines < 0) { if (adjust >= n) kdb_printf("buffer only contains %d lines, nothing " @@ -2087,21 +2083,11 @@ static int kdb_dmesg(int argc, const char **argv) else if (adjust - lines >= n) kdb_printf("buffer only contains %d lines, last %d " "lines printed\n", n, n - adjust); - if (adjust) { - for (; start < end && adjust; ++start) { - if (*KDB_WRAP(start) == '\n') - --adjust; - } - if (start < end) - ++start; - } - for (p = start; p < end && lines; ++p) { - if (*KDB_WRAP(p) == '\n') - ++lines; - } - end = p; + skip = adjust; + lines = abs(lines); } else if (lines > 0) { - int skip = n - (adjust + lines); + skip = n - lines - adjust; + lines = abs(lines); if (adjust >= n) { kdb_printf("buffer only contains %d lines, " "nothing printed\n", n); @@ -2112,35 +2098,24 @@ static int kdb_dmesg(int argc, const char **argv) kdb_printf("buffer only contains %d lines, first " "%d lines printed\n", n, lines); } - for (; start < end && skip; ++start) { - if (*KDB_WRAP(start) == '\n') - --skip; - } - for (p = start; p < end && lines; ++p) { - if (*KDB_WRAP(p) == '\n') - --lines; - } - end = p; + } else { + lines = n; } - /* Do a line at a time (max 200 chars) to reduce protocol overhead */ - c = '\n'; - while (start != end) { - char buf[201]; - p = buf; - if (KDB_FLAG(CMD_INTERRUPT)) - return 0; - while (start < end && (c = *KDB_WRAP(start)) && - (p - buf) < sizeof(buf)-1) { - ++start; - *p++ = c; - if (c == '\n') - break; + + if (skip >= n || skip < 0) + return 0; + + kmsg_dump_rewind_nolock(&dumper); + while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) { + if (skip) { + skip--; + continue; } - *p = '\0'; - kdb_printf("%s", buf); + if (!lines--) + break; + + kdb_printf("%.*s\n", (int)len - 1, buf); } - if (c != '\n') - kdb_printf("\n"); return 0; } diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 47c4e56e513b..392ec6a25844 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -205,7 +205,6 @@ extern char kdb_grep_string[]; extern int kdb_grep_leading; extern int kdb_grep_trailing; extern char *kdb_cmds[]; -extern void kdb_syslog_data(char *syslog_data[]); extern unsigned long kdb_task_state_string(const char *); extern char kdb_task_state_char (const struct task_struct *); extern unsigned long kdb_task_state(const struct task_struct *p, diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f93532748bca..c08a22d02f72 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -32,6 +32,7 @@ #include <linux/swap.h> /* try_to_free_swap */ #include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/kdebug.h> /* notifier mechanism */ +#include "../../mm/internal.h" /* munlock_vma_page */ #include <linux/uprobes.h> @@ -112,14 +113,14 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) return false; } -static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) +static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) { - loff_t vaddr; - - vaddr = vma->vm_start + offset; - vaddr -= vma->vm_pgoff << PAGE_SHIFT; + return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); +} - return vaddr; +static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) +{ + return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); } /** @@ -127,25 +128,27 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page + * @addr: address the old @page is mapped at * @page: the cowed page we are replacing by kpage * @kpage: the modified page we replace page by * * Returns 0 on success, -EFAULT on failure. */ -static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) +static int __replace_page(struct vm_area_struct *vma, unsigned long addr, + struct page *page, struct page *kpage) { struct mm_struct *mm = vma->vm_mm; - unsigned long addr; spinlock_t *ptl; pte_t *ptep; + int err; - addr = page_address_in_vma(page, vma); - if (addr == -EFAULT) - return -EFAULT; + /* For try_to_free_swap() and munlock_vma_page() below */ + lock_page(page); + err = -EAGAIN; ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) - return -EAGAIN; + goto unlock; get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr); @@ -162,10 +165,16 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page_remove_rmap(page); if (!page_mapped(page)) try_to_free_swap(page); - put_page(page); pte_unmap_unlock(ptep, ptl); - return 0; + if (vma->vm_flags & VM_LOCKED) + munlock_vma_page(page); + put_page(page); + + err = 0; + unlock: + unlock_page(page); + return err; } /** @@ -206,45 +215,23 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; - struct address_space *mapping; void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; - struct uprobe *uprobe; int ret; + retry: /* Read the page with vaddr into memory */ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); if (ret <= 0) return ret; - ret = -EINVAL; - - /* - * We are interested in text pages only. Our pages of interest - * should be mapped for read and execute only. We desist from - * adding probes in write mapped pages since the breakpoints - * might end up in the file copy. - */ - if (!valid_vma(vma, is_swbp_insn(&opcode))) - goto put_out; - - uprobe = container_of(auprobe, struct uprobe, arch); - mapping = uprobe->inode->i_mapping; - if (mapping != vma->vm_file->f_mapping) - goto put_out; - ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) - goto put_out; + goto put_old; __SetPageUptodate(new_page); - /* - * lock page will serialize against do_wp_page()'s - * PageAnon() handling - */ - lock_page(old_page); /* copy the page now that we've got it stable */ vaddr_old = kmap_atomic(old_page); vaddr_new = kmap_atomic(new_page); @@ -257,17 +244,13 @@ retry: ret = anon_vma_prepare(vma); if (ret) - goto unlock_out; + goto put_new; - lock_page(new_page); - ret = __replace_page(vma, old_page, new_page); - unlock_page(new_page); + ret = __replace_page(vma, vaddr, old_page, new_page); -unlock_out: - unlock_page(old_page); +put_new: page_cache_release(new_page); - -put_out: +put_old: put_page(old_page); if (unlikely(ret == -EAGAIN)) @@ -791,7 +774,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) curr = info; info->mm = vma->vm_mm; - info->vaddr = vma_address(vma, offset); + info->vaddr = offset_to_vaddr(vma, offset); } mutex_unlock(&mapping->i_mmap_mutex); @@ -839,12 +822,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) goto free; down_write(&mm->mmap_sem); - vma = find_vma(mm, (unsigned long)info->vaddr); - if (!vma || !valid_vma(vma, is_register)) + vma = find_vma(mm, info->vaddr); + if (!vma || !valid_vma(vma, is_register) || + vma->vm_file->f_mapping->host != uprobe->inode) goto unlock; - if (vma->vm_file->f_mapping->host != uprobe->inode || - vma_address(vma, uprobe->offset) != info->vaddr) + if (vma->vm_start > info->vaddr || + vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; if (is_register) { @@ -960,59 +944,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume put_uprobe(uprobe); } -/* - * Of all the nodes that correspond to the given inode, return the node - * with the least offset. - */ -static struct rb_node *find_least_offset_node(struct inode *inode) +static struct rb_node * +find_node_in_range(struct inode *inode, loff_t min, loff_t max) { - struct uprobe u = { .inode = inode, .offset = 0}; struct rb_node *n = uprobes_tree.rb_node; - struct rb_node *close_node = NULL; - struct uprobe *uprobe; - int match; while (n) { - uprobe = rb_entry(n, struct uprobe, rb_node); - match = match_uprobe(&u, uprobe); - - if (uprobe->inode == inode) - close_node = n; - - if (!match) - return close_node; + struct uprobe *u = rb_entry(n, struct uprobe, rb_node); - if (match < 0) + if (inode < u->inode) { n = n->rb_left; - else + } else if (inode > u->inode) { n = n->rb_right; + } else { + if (max < u->offset) + n = n->rb_left; + else if (min > u->offset) + n = n->rb_right; + else + break; + } } - return close_node; + return n; } /* - * For a given inode, build a list of probes that need to be inserted. + * For a given range in vma, build a list of probes that need to be inserted. */ -static void build_probe_list(struct inode *inode, struct list_head *head) +static void build_probe_list(struct inode *inode, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *head) { - struct uprobe *uprobe; + loff_t min, max; unsigned long flags; - struct rb_node *n; - - spin_lock_irqsave(&uprobes_treelock, flags); - - n = find_least_offset_node(inode); + struct rb_node *n, *t; + struct uprobe *u; - for (; n; n = rb_next(n)) { - uprobe = rb_entry(n, struct uprobe, rb_node); - if (uprobe->inode != inode) - break; + INIT_LIST_HEAD(head); + min = vaddr_to_offset(vma, start); + max = min + (end - start) - 1; - list_add(&uprobe->pending_list, head); - atomic_inc(&uprobe->ref); + spin_lock_irqsave(&uprobes_treelock, flags); + n = find_node_in_range(inode, min, max); + if (n) { + for (t = n; t; t = rb_prev(t)) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset < min) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } + for (t = n; (t = rb_next(t)); ) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset > max) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } } - spin_unlock_irqrestore(&uprobes_treelock, flags); } @@ -1031,7 +1022,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; int ret, count; @@ -1042,21 +1033,15 @@ int uprobe_mmap(struct vm_area_struct *vma) if (!inode) return 0; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); + build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); ret = 0; count = 0; - list_for_each_entry(uprobe, &tmp_list, pending_list) { + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!ret) { - loff_t vaddr = vma_address(vma, uprobe->offset); - - if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { - put_uprobe(uprobe); - continue; - } + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); /* @@ -1097,12 +1082,15 @@ int uprobe_mmap(struct vm_area_struct *vma) void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) return; + if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ + return; + if (!atomic_read(&vma->vm_mm->uprobes_state.count)) return; @@ -1110,21 +1098,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!inode) return; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); - - list_for_each_entry(uprobe, &tmp_list, pending_list) { - loff_t vaddr = vma_address(vma, uprobe->offset); - - if (vaddr >= start && vaddr < end) { - /* - * An unregister could have removed the probe before - * unmap. So check before we decrement the count. - */ - if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) - atomic_dec(&vma->vm_mm->uprobes_state.count); - } + build_probe_list(inode, vma, start, end, &tmp_list); + + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); + /* + * An unregister could have removed the probe before + * unmap. So check before we decrement the count. + */ + if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) + atomic_dec(&vma->vm_mm->uprobes_state.count); put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); @@ -1463,12 +1447,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) vma = find_vma(mm, bp_vaddr); if (vma && vma->vm_start <= bp_vaddr) { if (valid_vma(vma, false)) { - struct inode *inode; - loff_t offset; + struct inode *inode = vma->vm_file->f_mapping->host; + loff_t offset = vaddr_to_offset(vma, bp_vaddr); - inode = vma->vm_file->f_mapping->host; - offset = bp_vaddr - vma->vm_start; - offset += (vma->vm_pgoff << PAGE_SHIFT); uprobe = find_uprobe(inode, offset); } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8b53db38a279..238025f5472e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -27,7 +27,6 @@ #include <linux/syscore_ops.h> #include <linux/ctype.h> #include <linux/genhd.h> -#include <scsi/scsi_scan.h> #include "power.h" @@ -748,13 +747,6 @@ static int software_resume(void) async_synchronize_full(); } - /* - * We can't depend on SCSI devices being available after loading - * one of their modules until scsi_complete_async_scans() is - * called and the resume device usually is a SCSI one. - */ - scsi_complete_async_scans(); - swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { error = -ENODEV; diff --git a/kernel/power/user.c b/kernel/power/user.c index 91b0fd021a95..4ed81e74f86f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -24,7 +24,6 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> -#include <scsi/scsi_scan.h> #include <asm/uaccess.h> @@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) * appear. */ wait_for_device_probe(); - scsi_complete_async_scans(); data->swap = -1; data->mode = O_WRONLY; diff --git a/kernel/printk.c b/kernel/printk.c index 177fa49357a5..ac4bc9e79465 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1192,21 +1192,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) return do_syslog(type, buf, len, SYSLOG_FROM_CALL); } -#ifdef CONFIG_KGDB_KDB -/* kdb dmesg command needs access to the syslog buffer. do_syslog() - * uses locks so it cannot be used during debugging. Just tell kdb - * where the start and end of the physical and logical logs are. This - * is equivalent to do_syslog(3). - */ -void kdb_syslog_data(char *syslog_data[4]) -{ - syslog_data[0] = log_buf; - syslog_data[1] = log_buf + log_buf_len; - syslog_data[2] = log_buf + log_first_idx; - syslog_data[3] = log_buf + log_next_idx; -} -#endif /* CONFIG_KGDB_KDB */ - static bool __read_mostly ignore_loglevel; static int __init ignore_loglevel_setup(char *str) @@ -2525,7 +2510,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) } /** - * kmsg_dump_get_line - retrieve one kmsg log line + * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) * @dumper: registered kmsg dumper * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to @@ -2540,11 +2525,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) * * A return value of FALSE indicates that there are no more records to * read. + * + * The function is similar to kmsg_dump_get_line(), but grabs no locks. */ -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) { - unsigned long flags; struct log *msg; size_t l = 0; bool ret = false; @@ -2552,7 +2538,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, if (!dumper->active) goto out; - raw_spin_lock_irqsave(&logbuf_lock, flags); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; @@ -2560,10 +2545,8 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, } /* last entry */ - if (dumper->cur_seq >= log_next_seq) { - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + if (dumper->cur_seq >= log_next_seq) goto out; - } msg = log_from_idx(dumper->cur_idx); l = msg_print_text(msg, 0, syslog, line, size); @@ -2571,12 +2554,41 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, dumper->cur_idx = log_next(dumper->cur_idx); dumper->cur_seq++; ret = true; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); out: if (len) *len = l; return ret; } + +/** + * kmsg_dump_get_line - retrieve one kmsg log line + * @dumper: registered kmsg dumper + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer + * @len: length of line placed into buffer + * + * Start at the beginning of the kmsg buffer, with the oldest kmsg + * record, and copy one record into the provided buffer. + * + * Consecutive calls will return the next available record moving + * towards the end of the buffer with the youngest messages. + * + * A return value of FALSE indicates that there are no more records to + * read. + */ +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) +{ + unsigned long flags; + bool ret; + + raw_spin_lock_irqsave(&logbuf_lock, flags); + ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + return ret; +} EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** @@ -2679,6 +2691,24 @@ out: EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); /** + * kmsg_dump_rewind_nolock - reset the interator (unlocked version) + * @dumper: registered kmsg dumper + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple + * times within the same dumper.dump() callback. + * + * The function is similar to kmsg_dump_rewind(), but grabs no locks. + */ +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ + dumper->cur_seq = clear_seq; + dumper->cur_idx = clear_idx; + dumper->next_seq = log_next_seq; + dumper->next_idx = log_next_idx; +} + +/** * kmsg_dump_rewind - reset the interator * @dumper: registered kmsg dumper * @@ -2691,10 +2721,7 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) unsigned long flags; raw_spin_lock_irqsave(&logbuf_lock, flags); - dumper->cur_seq = clear_seq; - dumper->cur_idx = clear_idx; - dumper->next_seq = log_next_seq; - dumper->next_idx = log_next_idx; + kmsg_dump_rewind_nolock(dumper); raw_spin_unlock_irqrestore(&logbuf_lock, flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 95cba41ce1e9..4e6a61b15e86 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -54,6 +54,50 @@ #ifdef CONFIG_PREEMPT_RCU /* + * Preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ + current->rcu_read_lock_nesting++; + barrier(); /* critical section after entry code. */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting != 1) { + --t->rcu_read_lock_nesting; + } else { + barrier(); /* critical section before exit code. */ + t->rcu_read_lock_nesting = INT_MIN; + barrier(); /* assign before ->rcu_read_unlock_special load */ + if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); + barrier(); /* ->rcu_read_unlock_special load before assign */ + t->rcu_read_lock_nesting = 0; + } +#ifdef CONFIG_PROVE_LOCKING + { + int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); + + WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); + } +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +/* * Check for a task exiting while in a preemptible-RCU read-side * critical section, clean up if so. No need to issue warnings, * as debug_check_no_locks_held() already does this if lockdep diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 37a5444204d2..547b1fe5b052 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -172,7 +172,7 @@ void rcu_irq_enter(void) local_irq_restore(flags); } -#ifdef CONFIG_PROVE_RCU +#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Test whether RCU thinks that the current CPU is idle. @@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /* * Test whether the current CPU was interrupted from idle. Nested diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index fc31a2d65100..918fd1e8509c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { RCU_TRACE(.rcb.name = "rcu_preempt") }; -static void rcu_read_unlock_special(struct task_struct *t); static int rcu_preempted_readers_exp(void); static void rcu_report_exp_done(void); @@ -351,8 +350,9 @@ static int rcu_initiate_boost(void) rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; invoke_rcu_callbacks(); - } else + } else { RCU_TRACE(rcu_initiate_boost_trace()); + } return 1; } @@ -527,23 +527,11 @@ void rcu_preempt_note_context_switch(void) } /* - * Tiny-preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ - current->rcu_read_lock_nesting++; - barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - -/* * Handle special cases during rcu_read_unlock(), such as needing to * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +void rcu_read_unlock_special(struct task_struct *t) { int empty; int empty_exp; @@ -627,38 +615,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) } /* - * Tiny-preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ - struct task_struct *t = current; - - barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ - if (t->rcu_read_lock_nesting != 1) - --t->rcu_read_lock_nesting; - else { - t->rcu_read_lock_nesting = INT_MIN; - barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) - rcu_read_unlock_special(t); - barrier(); /* ->rcu_read_unlock_special load before assign */ - t->rcu_read_lock_nesting = 0; - } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); - } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - -/* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the rcu_preempt_ctrlblk structure, which is * checked elsewhere. This is called from the scheduling-clock interrupt. @@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void) rpcp->exp_tasks = NULL; /* Wait for tail of ->blkd_tasks list to drain. */ - if (!rcu_preempted_readers_exp()) + if (!rcu_preempted_readers_exp()) { local_irq_restore(flags); - else { + } else { rcu_initiate_boost(); local_irq_restore(flags); wait_event(sync_rcu_preempt_exp_wq, @@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); */ int rcu_preempt_needs_cpu(void) { - if (!rcu_preempt_running_reader()) - rcu_preempt_cpu_qs(); return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; } diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index e66b34ab7555..25b15033c61f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -49,8 +49,7 @@ #include <asm/byteorder.h> MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " - "Josh Triplett <josh@freedesktop.org>"); +MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ static int nfakewriters = 4; /* # fake writer threads */ @@ -206,6 +205,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */ DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ +static bool barrier_phase; /* Test phase. */ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); @@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p) if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { rp->rtort_mbtest = 0; rcu_torture_free(rp); - } else + } else { cur_ops->deferred_free(rp); + } } static int rcu_no_completed(void) @@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void) synchronize_srcu(&srcu_ctl); } +static void srcu_torture_call(struct rcu_head *head, + void (*func)(struct rcu_head *head)) +{ + call_srcu(&srcu_ctl, head, func); +} + +static void srcu_torture_barrier(void) +{ + srcu_barrier(&srcu_ctl); +} + static int srcu_torture_stats(char *page) { int cnt = 0; @@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = { .completed = srcu_torture_completed, .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, - .call = NULL, - .cb_barrier = NULL, + .call = srcu_torture_call, + .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .name = "srcu" }; @@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg) do { schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); udelay(rcu_random(&rand) & 0x3ff); - cur_ops->sync(); + if (cur_ops->cb_barrier != NULL && + rcu_random(&rand) % (nfakewriters * 8) == 0) + cur_ops->cb_barrier(); + else + cur_ops->sync(); rcu_stutter_wait("rcu_torture_fakewriter"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); @@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page) } cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); cnt += sprintf(&page[cnt], - "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " - "rtmbe: %d rtbke: %ld rtbre: %ld " - "rtbf: %ld rtb: %ld nt: %ld " - "onoff: %ld/%ld:%ld/%ld " - "barrier: %ld/%ld:%ld", + "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), atomic_read(&n_rcu_torture_alloc), atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free), + atomic_read(&n_rcu_torture_free)); + cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ", atomic_read(&n_rcu_torture_mberror), n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror, + n_rcu_torture_boost_rterror); + cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ", n_rcu_torture_boost_failure, n_rcu_torture_boosts, - n_rcu_torture_timers, + n_rcu_torture_timers); + cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", n_online_successes, n_online_attempts, n_offline_successes, - n_offline_attempts, + n_offline_attempts); + cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", n_barrier_successes, n_barrier_attempts, n_rcu_torture_barrier_error); @@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg) delta = shutdown_time - jiffies_snap; if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu " - "jiffies remaining\n", + "rcu_torture_shutdown task: %lu jiffies remaining\n", torture_type, delta); schedule_timeout_interruptible(delta); jiffies_snap = ACCESS_ONCE(jiffies); @@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg) if (cpu_down(cpu) == 0) { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: " - "offlined %d\n", + "rcu_torture_onoff task: offlined %d\n", torture_type, cpu); n_offline_successes++; } @@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg) if (cpu_up(cpu) == 0) { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: " - "onlined %d\n", + "rcu_torture_onoff task: onlined %d\n", torture_type, cpu); n_online_successes++; } @@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu) static int rcu_torture_barrier_cbs(void *arg) { long myid = (long)arg; + bool lastphase = 0; struct rcu_head rcu; init_rcu_head_on_stack(&rcu); @@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg) set_user_nice(current, 19); do { wait_event(barrier_cbs_wq[myid], - atomic_read(&barrier_cbs_count) == n_barrier_cbs || + barrier_phase != lastphase || kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP); + lastphase = barrier_phase; + smp_mb(); /* ensure barrier_phase load before ->call(). */ if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) break; cur_ops->call(&rcu, rcu_torture_barrier_cbf); @@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg) do { atomic_set(&barrier_cbs_invoked, 0); atomic_set(&barrier_cbs_count, n_barrier_cbs); - /* wake_up() path contains the required barriers. */ + smp_mb(); /* Ensure barrier_phase after prior assignments. */ + barrier_phase = !barrier_phase; for (i = 0; i < n_barrier_cbs; i++) wake_up(&barrier_cbs_wq[i]); wait_event(barrier_wq, @@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg) schedule_timeout_interruptible(HZ / 10); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); - rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); + rcutorture_shutdown_absorb("rcu_torture_barrier"); while (!kthread_should_stop()) schedule_timeout_interruptible(1); return 0; @@ -1908,8 +1925,8 @@ rcu_torture_init(void) static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, - &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, - &srcu_raw_sync_ops, &srcu_expedited_ops, + &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops, + &srcu_raw_ops, &srcu_raw_sync_ops, &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; mutex_lock(&fullstop_mutex); @@ -1931,8 +1948,7 @@ rcu_torture_init(void) return -EINVAL; } if (cur_ops->fqs == NULL && fqs_duration != 0) { - printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " - "fqs_duration, fqs disabled.\n"); + printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); fqs_duration = 0; } if (cur_ops->init) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396e..f280e542e3e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -60,36 +60,44 @@ /* Data structures. */ -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; - -#define RCU_STATE_INITIALIZER(structname) { \ - .level = { &structname##_state.node[0] }, \ - .levelcnt = { \ - NUM_RCU_LVL_0, /* root of hierarchy. */ \ - NUM_RCU_LVL_1, \ - NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, \ - NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ - }, \ +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; + +#define RCU_STATE_INITIALIZER(sname, cr) { \ + .level = { &sname##_state.node[0] }, \ + .call = cr, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ - .orphan_nxttail = &structname##_state.orphan_nxtlist, \ - .orphan_donetail = &structname##_state.orphan_donelist, \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ - .n_force_qs = 0, \ - .n_force_qs_ngp = 0, \ - .name = #structname, \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ + .orphan_nxttail = &sname##_state.orphan_nxtlist, \ + .orphan_donetail = &sname##_state.orphan_donelist, \ + .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ + .name = #sname, \ } -struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); +struct rcu_state rcu_sched_state = + RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +LIST_HEAD(rcu_struct_flavors); + +/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +module_param(rcu_fanout_leaf, int, 0); +int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; +static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ + NUM_RCU_LVL_0, + NUM_RCU_LVL_1, + NUM_RCU_LVL_2, + NUM_RCU_LVL_3, + NUM_RCU_LVL_4, +}; +int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ /* * The rcu_scheduler_active variable transitions from zero to one just @@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); unsigned long rcutorture_testseq; unsigned long rcutorture_vernum; -/* State information for rcu_barrier() and friends. */ - -static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; -static atomic_t rcu_barrier_cpu_count; -static DEFINE_MUTEX(rcu_barrier_mutex); -static struct completion rcu_barrier_completion; - /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -358,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); - ftrace_dump(DUMP_ALL); + ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ @@ -468,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) trace_rcu_dyntick("Error on exit: not idle task", oldval, rdtp->dynticks_nesting); - ftrace_dump(DUMP_ALL); + ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ @@ -585,8 +586,6 @@ void rcu_nmi_exit(void) WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } -#ifdef CONFIG_PROVE_RCU - /** * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle * @@ -604,7 +603,7 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* * Is the current CPU online? Disable preemption to avoid false positives @@ -645,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void) } EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ /** * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle @@ -733,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) int cpu; long delta; unsigned long flags; - int ndetected; + int ndetected = 0; struct rcu_node *rnp = rcu_get_root(rsp); /* Only let one CPU complain about others per time interval. */ @@ -774,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) */ rnp = rcu_get_root(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); - ndetected = rcu_print_task_stall(rnp); + ndetected += rcu_print_task_stall(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); print_cpu_stall_info_end(); @@ -860,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) */ void rcu_cpu_stall_reset(void) { - rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_preempt_stall_reset(); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rsp->jiffies_stall = jiffies + ULONG_MAX / 2; } static struct notifier_block rcu_panic_block = { @@ -894,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct if (rnp->qsmask & rdp->grpmask) { rdp->qs_pending = 1; rdp->passed_quiesce = 0; - } else + } else { rdp->qs_pending = 0; + } zero_cpu_stall_ticks(rdp); } } @@ -937,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) } /* + * Initialize the specified rcu_data structure's callback list to empty. + */ +static void init_callback_list(struct rcu_data *rdp) +{ + int i; + + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; +} + +/* * Advance this CPU's callbacks, but only if the current grace period * has ended. This may be called only from the CPU to whom the rdp * belongs. In addition, the corresponding leaf rcu_node structure's @@ -1328,8 +1339,6 @@ static void rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { - int i; - /* * Orphan the callbacks. First adjust the counts. This is safe * because ->onofflock excludes _rcu_barrier()'s adoption of @@ -1340,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, rsp->qlen += rdp->qlen; rdp->n_cbs_orphaned += rdp->qlen; rdp->qlen_lazy = 0; - rdp->qlen = 0; + ACCESS_ONCE(rdp->qlen) = 0; } /* @@ -1369,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, } /* Finally, initialize the rcu_data structure's list to empty. */ - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; + init_callback_list(rdp); } /* @@ -1505,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp->lock, flags); if (need_report & RCU_OFL_TASKS_EXP_GP) rcu_report_exp_rnp(rsp, rnp, true); + WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, + "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", + cpu, rdp->qlen, rdp->nxtlist); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -1592,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } smp_mb(); /* List handling before counting for rcu_barrier(). */ rdp->qlen_lazy -= count_lazy; - rdp->qlen -= count; + ACCESS_ONCE(rdp->qlen) -= count; rdp->n_cbs_invoked += count; /* Reinstate batch limit if we have worked down the excess. */ @@ -1605,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rdp->n_force_qs_snap = rsp->n_force_qs; } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = rdp->qlen; + WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); local_irq_restore(flags); @@ -1745,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: - if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) - break; /* So gcc recognizes the dead code. */ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ @@ -1788,9 +1797,10 @@ unlock_fqs_ret: * whom the rdp belongs. */ static void -__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) +__rcu_process_callbacks(struct rcu_state *rsp) { unsigned long flags; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); WARN_ON_ONCE(rdp->beenonline == 0); @@ -1826,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_process_callbacks(struct softirq_action *unused) { + struct rcu_state *rsp; + trace_rcu_utilization("Start RCU core"); - __rcu_process_callbacks(&rcu_sched_state, - &__get_cpu_var(rcu_sched_data)); - __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); - rcu_preempt_process_callbacks(); + for_each_rcu_flavor(rsp) + __rcu_process_callbacks(rsp); trace_rcu_utilization("End RCU core"); } @@ -1857,6 +1867,56 @@ static void invoke_rcu_core(void) raise_softirq(RCU_SOFTIRQ); } +/* + * Handle any core-RCU processing required by a call_rcu() invocation. + */ +static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, + struct rcu_head *head, unsigned long flags) +{ + /* + * If called from an extended quiescent state, invoke the RCU + * core in order to force a re-evaluation of RCU's idleness. + */ + if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) + invoke_rcu_core(); + + /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ + if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) + return; + + /* + * Force the grace period if too many callbacks or too long waiting. + * Enforce hysteresis, and don't invoke force_quiescent_state() + * if some other CPU has recently done so. Also, don't bother + * invoking force_quiescent_state() if the newly enqueued callback + * is the only one waiting for a grace period to complete. + */ + if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { + + /* Are we ignoring a completed grace period? */ + rcu_process_gp_end(rsp, rdp); + check_for_new_grace_period(rsp, rdp); + + /* Start a new grace period if one not already started. */ + if (!rcu_gp_in_progress(rsp)) { + unsigned long nestflag; + struct rcu_node *rnp_root = rcu_get_root(rsp); + + raw_spin_lock_irqsave(&rnp_root->lock, nestflag); + rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ + } else { + /* Give the grace period a kick. */ + rdp->blimit = LONG_MAX; + if (rsp->n_force_qs == rdp->n_force_qs_snap && + *rdp->nxttail[RCU_DONE_TAIL] != head) + force_quiescent_state(rsp, 0); + rdp->n_force_qs_snap = rsp->n_force_qs; + rdp->qlen_last_fqs_check = rdp->qlen; + } + } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) + force_quiescent_state(rsp, 1); +} + static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_state *rsp, bool lazy) @@ -1881,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ - rdp->qlen++; + ACCESS_ONCE(rdp->qlen)++; if (lazy) rdp->qlen_lazy++; else @@ -1896,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), else trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); - /* If interrupts were disabled, don't dive into RCU core. */ - if (irqs_disabled_flags(flags)) { - local_irq_restore(flags); - return; - } - - /* - * Force the grace period if too many callbacks or too long waiting. - * Enforce hysteresis, and don't invoke force_quiescent_state() - * if some other CPU has recently done so. Also, don't bother - * invoking force_quiescent_state() if the newly enqueued callback - * is the only one waiting for a grace period to complete. - */ - if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { - - /* Are we ignoring a completed grace period? */ - rcu_process_gp_end(rsp, rdp); - check_for_new_grace_period(rsp, rdp); - - /* Start a new grace period if one not already started. */ - if (!rcu_gp_in_progress(rsp)) { - unsigned long nestflag; - struct rcu_node *rnp_root = rcu_get_root(rsp); - - raw_spin_lock_irqsave(&rnp_root->lock, nestflag); - rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ - } else { - /* Give the grace period a kick. */ - rdp->blimit = LONG_MAX; - if (rsp->n_force_qs == rdp->n_force_qs_snap && - *rdp->nxttail[RCU_DONE_TAIL] != head) - force_quiescent_state(rsp, 0); - rdp->n_force_qs_snap = rsp->n_force_qs; - rdp->qlen_last_fqs_check = rdp->qlen; - } - } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) - force_quiescent_state(rsp, 1); + /* Go handle any RCU core processing required. */ + __call_rcu_core(rsp, rdp, head, flags); local_irq_restore(flags); } @@ -1962,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); * occasionally incorrectly indicate that there are multiple CPUs online * when there was in fact only one the whole time, as this just adds * some overhead: RCU still operates correctly. - * - * Of course, sampling num_online_cpus() with preemption enabled can - * give erroneous results if there are concurrent CPU-hotplug operations. - * For example, given a demonic sequence of preemptions in num_online_cpus() - * and CPU-hotplug operations, there could be two or more CPUs online at - * all times, but num_online_cpus() might well return one (or even zero). - * - * However, all such demonic sequences require at least one CPU-offline - * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer - * is only a problem if there is an RCU read-side critical section executing - * throughout. But RCU-sched and RCU-bh read-side critical sections - * disable either preemption or bh, which prevents a CPU from going offline. - * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return - * that there is only one CPU when in fact there was more than one throughout - * is when there were no RCU readers in the system. If there are no - * RCU readers, the grace period by definition can be of zero length, - * regardless of the number of online CPUs. */ static inline int rcu_blocking_is_gp(void) { + int ret; + might_sleep(); /* Check for RCU read-side critical section. */ - return num_online_cpus() <= 1; + preempt_disable(); + ret = num_online_cpus() <= 1; + preempt_enable(); + return ret; } /** @@ -2118,9 +2131,9 @@ void synchronize_sched_expedited(void) put_online_cpus(); /* No joy, try again later. Or just synchronize_sched(). */ - if (trycount++ < 10) + if (trycount++ < 10) { udelay(trycount * num_online_cpus()); - else { + } else { synchronize_sched(); return; } @@ -2241,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) */ static int rcu_pending(int cpu) { - return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || - __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_pending(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) + return 1; + return 0; } /* @@ -2253,20 +2269,41 @@ static int rcu_pending(int cpu) */ static int rcu_cpu_has_callbacks(int cpu) { + struct rcu_state *rsp; + /* RCU callbacks either ready or pending? */ - return per_cpu(rcu_sched_data, cpu).nxtlist || - per_cpu(rcu_bh_data, cpu).nxtlist || - rcu_preempt_cpu_has_callbacks(cpu); + for_each_rcu_flavor(rsp) + if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) + return 1; + return 0; +} + +/* + * Helper function for _rcu_barrier() tracing. If tracing is disabled, + * the compiler is expected to optimize this away. + */ +static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, + int cpu, unsigned long done) +{ + trace_rcu_barrier(rsp->name, s, cpu, + atomic_read(&rsp->barrier_cpu_count), done); } /* * RCU callback function for _rcu_barrier(). If we are last, wake * up the task executing _rcu_barrier(). */ -static void rcu_barrier_callback(struct rcu_head *notused) +static void rcu_barrier_callback(struct rcu_head *rhp) { - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) - complete(&rcu_barrier_completion); + struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); + struct rcu_state *rsp = rdp->rsp; + + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { + _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); + complete(&rsp->barrier_completion); + } else { + _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); + } } /* @@ -2274,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused) */ static void rcu_barrier_func(void *type) { - int cpu = smp_processor_id(); - struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + struct rcu_state *rsp = type; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); - atomic_inc(&rcu_barrier_cpu_count); - call_rcu_func = type; - call_rcu_func(head, rcu_barrier_callback); + _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); + atomic_inc(&rsp->barrier_cpu_count); + rsp->call(&rdp->barrier_head, rcu_barrier_callback); } /* * Orchestrate the specified type of RCU barrier, waiting for all * RCU callbacks of the specified type to complete. */ -static void _rcu_barrier(struct rcu_state *rsp, - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head))) +static void _rcu_barrier(struct rcu_state *rsp) { int cpu; unsigned long flags; struct rcu_data *rdp; - struct rcu_head rh; + struct rcu_data rd; + unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); + unsigned long snap_done; - init_rcu_head_on_stack(&rh); + init_rcu_head_on_stack(&rd.barrier_head); + _rcu_barrier_trace(rsp, "Begin", -1, snap); /* Take mutex to serialize concurrent rcu_barrier() requests. */ - mutex_lock(&rcu_barrier_mutex); + mutex_lock(&rsp->barrier_mutex); + + /* + * Ensure that all prior references, including to ->n_barrier_done, + * are ordered before the _rcu_barrier() machinery. + */ + smp_mb(); /* See above block comment. */ + + /* + * Recheck ->n_barrier_done to see if others did our work for us. + * This means checking ->n_barrier_done for an even-to-odd-to-even + * transition. The "if" expression below therefore rounds the old + * value up to the next even number and adds two before comparing. + */ + snap_done = ACCESS_ONCE(rsp->n_barrier_done); + _rcu_barrier_trace(rsp, "Check", -1, snap_done); + if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); + smp_mb(); /* caller's subsequent code after above check. */ + mutex_unlock(&rsp->barrier_mutex); + return; + } - smp_mb(); /* Prevent any prior operations from leaking in. */ + /* + * Increment ->n_barrier_done to avoid duplicate work. Use + * ACCESS_ONCE() to prevent the compiler from speculating + * the increment to precede the early-exit check. + */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); + _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); + smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ /* * Initialize the count to one rather than to zero in order to @@ -2321,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp, * 6. Both rcu_barrier_callback() callbacks are invoked, awakening * us -- but before CPU 1's orphaned callbacks are invoked!!! */ - init_completion(&rcu_barrier_completion); - atomic_set(&rcu_barrier_cpu_count, 1); + init_completion(&rsp->barrier_completion); + atomic_set(&rsp->barrier_cpu_count, 1); raw_spin_lock_irqsave(&rsp->onofflock, flags); rsp->rcu_barrier_in_progress = current; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); @@ -2338,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp, preempt_disable(); rdp = per_cpu_ptr(rsp->rda, cpu); if (cpu_is_offline(cpu)) { + _rcu_barrier_trace(rsp, "Offline", cpu, + rsp->n_barrier_done); preempt_enable(); while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) schedule_timeout_interruptible(1); } else if (ACCESS_ONCE(rdp->qlen)) { - smp_call_function_single(cpu, rcu_barrier_func, - (void *)call_rcu_func, 1); + _rcu_barrier_trace(rsp, "OnlineQ", cpu, + rsp->n_barrier_done); + smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); preempt_enable(); } else { + _rcu_barrier_trace(rsp, "OnlineNQ", cpu, + rsp->n_barrier_done); preempt_enable(); } } @@ -2362,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp, rcu_adopt_orphan_cbs(rsp); rsp->rcu_barrier_in_progress = NULL; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - atomic_inc(&rcu_barrier_cpu_count); + atomic_inc(&rsp->barrier_cpu_count); smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - call_rcu_func(&rh, rcu_barrier_callback); + rd.rsp = rsp; + rsp->call(&rd.barrier_head, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each * CPU, and thus each counted, remove the initial count. */ - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) - complete(&rcu_barrier_completion); + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) + complete(&rsp->barrier_completion); + + /* Increment ->n_barrier_done to prevent duplicate work. */ + smp_mb(); /* Keep increment after above mechanism. */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); + _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); + smp_mb(); /* Keep increment before caller's subsequent code. */ /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ - wait_for_completion(&rcu_barrier_completion); + wait_for_completion(&rsp->barrier_completion); /* Other rcu_barrier() invocations can now safely proceed. */ - mutex_unlock(&rcu_barrier_mutex); + mutex_unlock(&rsp->barrier_mutex); - destroy_rcu_head_on_stack(&rh); + destroy_rcu_head_on_stack(&rd.barrier_head); } /** @@ -2387,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp, */ void rcu_barrier_bh(void) { - _rcu_barrier(&rcu_bh_state, call_rcu_bh); + _rcu_barrier(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -2396,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); */ void rcu_barrier_sched(void) { - _rcu_barrier(&rcu_sched_state, call_rcu_sched); + _rcu_barrier(&rcu_sched_state); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); @@ -2407,18 +2485,15 @@ static void __init rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) { unsigned long flags; - int i; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; + init_callback_list(rdp); rdp->qlen_lazy = 0; - rdp->qlen = 0; + ACCESS_ONCE(rdp->qlen) = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); @@ -2492,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) static void __cpuinit rcu_prepare_cpu(int cpu) { - rcu_init_percpu_data(cpu, &rcu_sched_state, 0); - rcu_init_percpu_data(cpu, &rcu_bh_state, 0); - rcu_preempt_init_percpu_data(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rcu_init_percpu_data(cpu, rsp, + strcmp(rsp->name, "rcu_preempt") == 0); } /* @@ -2506,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; + struct rcu_state *rsp; trace_rcu_utilization("Start CPU hotplug"); switch (action) { @@ -2530,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, * touch any data without introducing corruption. We send the * dying CPU's callbacks to an arbitrarily chosen online CPU. */ - rcu_cleanup_dying_cpu(&rcu_bh_state); - rcu_cleanup_dying_cpu(&rcu_sched_state); - rcu_preempt_cleanup_dying_cpu(); + for_each_rcu_flavor(rsp) + rcu_cleanup_dying_cpu(rsp); rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); - rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); - rcu_preempt_cleanup_dead_cpu(cpu); + for_each_rcu_flavor(rsp) + rcu_cleanup_dead_cpu(cpu, rsp); break; default: break; @@ -2574,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - for (i = NUM_RCU_LVLS - 1; i > 0; i--) + for (i = rcu_num_lvls - 1; i > 0; i--) rsp->levelspread[i] = CONFIG_RCU_FANOUT; - rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; + rsp->levelspread[0] = rcu_fanout_leaf; } #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ static void __init rcu_init_levelspread(struct rcu_state *rsp) @@ -2586,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) int i; cprv = NR_CPUS; - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { ccur = rsp->levelcnt[i]; rsp->levelspread[i] = (cprv + ccur - 1) / ccur; cprv = ccur; @@ -2613,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, /* Initialize the level-tracking arrays. */ - for (i = 1; i < NUM_RCU_LVLS; i++) + for (i = 0; i < rcu_num_lvls; i++) + rsp->levelcnt[i] = num_rcu_lvl[i]; + for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; rcu_init_levelspread(rsp); /* Initialize the elements themselves, starting from the leaves. */ - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { @@ -2649,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rsp->rda = rda; - rnp = rsp->level[NUM_RCU_LVLS - 1]; + rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) rnp++; per_cpu_ptr(rsp->rda, i)->mynode = rnp; rcu_boot_init_percpu_data(i, rsp); } + list_add(&rsp->flavors, &rcu_struct_flavors); +} + +/* + * Compute the rcu_node tree geometry from kernel parameters. This cannot + * replace the definitions in rcutree.h because those are needed to size + * the ->node array in the rcu_state structure. + */ +static void __init rcu_init_geometry(void) +{ + int i; + int j; + int n = nr_cpu_ids; + int rcu_capacity[MAX_RCU_LVLS + 1]; + + /* If the compile-time values are accurate, just leave. */ + if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) + return; + + /* + * Compute number of nodes that can be handled an rcu_node tree + * with the given number of levels. Setting rcu_capacity[0] makes + * some of the arithmetic easier. + */ + rcu_capacity[0] = 1; + rcu_capacity[1] = rcu_fanout_leaf; + for (i = 2; i <= MAX_RCU_LVLS; i++) + rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + + /* + * The boot-time rcu_fanout_leaf parameter is only permitted + * to increase the leaf-level fanout, not decrease it. Of course, + * the leaf-level fanout cannot exceed the number of bits in + * the rcu_node masks. Finally, the tree must be able to accommodate + * the configured number of CPUs. Complain and fall back to the + * compile-time values if these limits are exceeded. + */ + if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || + rcu_fanout_leaf > sizeof(unsigned long) * 8 || + n > rcu_capacity[MAX_RCU_LVLS]) { + WARN_ON(1); + return; + } + + /* Calculate the number of rcu_nodes at each level of the tree. */ + for (i = 1; i <= MAX_RCU_LVLS; i++) + if (n <= rcu_capacity[i]) { + for (j = 0; j <= i; j++) + num_rcu_lvl[j] = + DIV_ROUND_UP(n, rcu_capacity[i - j]); + rcu_num_lvls = i; + for (j = i + 1; j <= MAX_RCU_LVLS; j++) + num_rcu_lvl[j] = 0; + break; + } + + /* Calculate the total number of rcu_node structures. */ + rcu_num_nodes = 0; + for (i = 0; i <= MAX_RCU_LVLS; i++) + rcu_num_nodes += num_rcu_lvl[i]; + rcu_num_nodes -= n; } void __init rcu_init(void) @@ -2663,6 +2802,7 @@ void __init rcu_init(void) int cpu; rcu_bootup_announce(); + rcu_init_geometry(); rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079f..4d29169f2124 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -42,28 +42,28 @@ #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT_1 -# define NUM_RCU_LVLS 1 +# define RCU_NUM_LVLS 1 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_2 -# define NUM_RCU_LVLS 2 +# define RCU_NUM_LVLS 2 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_3 -# define NUM_RCU_LVLS 3 +# define RCU_NUM_LVLS 3 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_3 (NR_CPUS) # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_4 -# define NUM_RCU_LVLS 4 +# define RCU_NUM_LVLS 4 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) @@ -76,6 +76,9 @@ #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) +extern int rcu_num_lvls; +extern int rcu_num_nodes; + /* * Dynticks per-CPU state. */ @@ -97,6 +100,7 @@ struct rcu_dynticks { /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; @@ -206,7 +210,7 @@ struct rcu_node { */ #define rcu_for_each_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* * Do a breadth-first scan of the non-leaf rcu_node structures for the @@ -215,7 +219,7 @@ struct rcu_node { */ #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) + (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) /* * Scan the leaves of the rcu_node hierarchy for the specified rcu_state @@ -224,8 +228,8 @@ struct rcu_node { * It is still a leaf node, even if it is also the root node. */ #define rcu_for_each_leaf_node(rsp, rnp) \ - for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* Index values for nxttail array in struct rcu_data. */ #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ @@ -311,6 +315,9 @@ struct rcu_data { unsigned long n_rp_need_fqs; unsigned long n_rp_need_nothing; + /* 6) _rcu_barrier() callback. */ + struct rcu_head barrier_head; + int cpu; struct rcu_state *rsp; }; @@ -357,10 +364,12 @@ do { \ */ struct rcu_state { struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ - struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ - u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ + void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ + void (*func)(struct rcu_head *head)); /* The following fields are guarded by the root rcu_node's lock. */ @@ -392,6 +401,11 @@ struct rcu_state { struct task_struct *rcu_barrier_in_progress; /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ + struct mutex barrier_mutex; /* Guards barrier fields. */ + atomic_t barrier_cpu_count; /* # CPUs waiting on. */ + struct completion barrier_completion; /* Wake at barrier end. */ + unsigned long n_barrier_done; /* ++ at start and end of */ + /* _rcu_barrier(). */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ @@ -409,8 +423,13 @@ struct rcu_state { unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ char *name; /* Name of structure. */ + struct list_head flavors; /* List of RCU flavors. */ }; +extern struct list_head rcu_struct_flavors; +#define for_each_rcu_flavor(rsp) \ + list_for_each_entry((rsp), &rcu_struct_flavors, flavors) + /* Return values for rcu_preempt_offline_tasks(). */ #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ @@ -453,25 +472,18 @@ static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); -static void rcu_preempt_stall_reset(void); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static int rcu_preempt_offline_tasks(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_preempt_cleanup_dead_cpu(int cpu); static void rcu_preempt_check_callbacks(int cpu); -static void rcu_preempt_process_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ -static int rcu_preempt_pending(int cpu); -static int rcu_preempt_cpu_has_callbacks(int cpu); -static void __cpuinit rcu_preempt_init_percpu_data(int cpu); -static void rcu_preempt_cleanup_dying_cpu(void); static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e4899459f3d..7f3244c0df01 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -68,17 +68,21 @@ static void __init rcu_bootup_announce_oddness(void) printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); #endif #if NUM_RCU_LVL_4 != 0 - printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); + printk(KERN_INFO "\tFour-level hierarchy is enabled.\n"); #endif + if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) + printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); + if (nr_cpu_ids != NR_CPUS) + printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); } #ifdef CONFIG_TREE_PREEMPT_RCU -struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); +struct rcu_state rcu_preempt_state = + RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; -static void rcu_read_unlock_special(struct task_struct *t); static int rcu_preempted_readers_exp(struct rcu_node *rnp); /* @@ -233,18 +237,6 @@ static void rcu_preempt_note_context_switch(int cpu) } /* - * Tree-preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ - current->rcu_read_lock_nesting++; - barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - -/* * Check for preempted RCU readers blocking the current grace period * for the specified rcu_node structure. If the caller needs a reliable * answer, it must hold the rcu_node's ->lock. @@ -310,7 +302,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +void rcu_read_unlock_special(struct task_struct *t) { int empty; int empty_exp; @@ -398,8 +390,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) rnp->grphi, !!rnp->gp_tasks); rcu_report_unblock_qs_rnp(rnp, flags); - } else + } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); + } #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ @@ -418,38 +411,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) } } -/* - * Tree-preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ - struct task_struct *t = current; - - if (t->rcu_read_lock_nesting != 1) - --t->rcu_read_lock_nesting; - else { - barrier(); /* critical section before exit code. */ - t->rcu_read_lock_nesting = INT_MIN; - barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) - rcu_read_unlock_special(t); - barrier(); /* ->rcu_read_unlock_special load before assign */ - t->rcu_read_lock_nesting = 0; - } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); - } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - #ifdef CONFIG_RCU_CPU_STALL_VERBOSE /* @@ -540,16 +501,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) } /* - * Suppress preemptible RCU's CPU stall warnings by pushing the - * time of the next stall-warning message comfortably far into the - * future. - */ -static void rcu_preempt_stall_reset(void) -{ - rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; -} - -/* * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace * period that still has RCU readers blocked! This function must be @@ -650,14 +601,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ /* - * Do CPU-offline processing for preemptible RCU. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ - rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); -} - -/* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the corresponding CPU's rcu_node structure, * which is checked elsewhere. @@ -677,15 +620,6 @@ static void rcu_preempt_check_callbacks(int cpu) t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; } -/* - * Process callbacks for preemptible RCU. - */ -static void rcu_preempt_process_callbacks(void) -{ - __rcu_process_callbacks(&rcu_preempt_state, - &__get_cpu_var(rcu_preempt_data)); -} - #ifdef CONFIG_RCU_BOOST static void rcu_preempt_do_callbacks(void) @@ -824,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) int must_wait = 0; raw_spin_lock_irqsave(&rnp->lock, flags); - if (list_empty(&rnp->blkd_tasks)) + if (list_empty(&rnp->blkd_tasks)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); - else { + } else { rnp->exp_tasks = rnp->blkd_tasks.next; rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ must_wait = 1; @@ -870,9 +804,9 @@ void synchronize_rcu_expedited(void) * expedited grace period for us, just leave. */ while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { - if (trycount++ < 10) + if (trycount++ < 10) { udelay(trycount * num_online_cpus()); - else { + } else { synchronize_rcu(); return; } @@ -917,51 +851,16 @@ mb_ret: } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -/* - * Check to see if there is any immediate preemptible-RCU-related work - * to be done. - */ -static int rcu_preempt_pending(int cpu) -{ - return __rcu_pending(&rcu_preempt_state, - &per_cpu(rcu_preempt_data, cpu)); -} - -/* - * Does preemptible RCU have callbacks on this CPU? - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return !!per_cpu(rcu_preempt_data, cpu).nxtlist; -} - /** * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state, call_rcu); + _rcu_barrier(&rcu_preempt_state); } EXPORT_SYMBOL_GPL(rcu_barrier); /* - * Initialize preemptible RCU's per-CPU data. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ - rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); -} - -/* - * Move preemptible RCU's callbacks from dying CPU to other online CPU - * and record a quiescent state. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ - rcu_cleanup_dying_cpu(&rcu_preempt_state); -} - -/* * Initialize preemptible RCU's state structures. */ static void __init __rcu_init_preempt(void) @@ -1046,14 +945,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) } /* - * Because preemptible RCU does not exist, there is no need to suppress - * its CPU stall warnings. - */ -static void rcu_preempt_stall_reset(void) -{ -} - -/* * Because there is no preemptible RCU, there can be no readers blocked, * so there is no need to check for blocked tasks. So check only for * bogus qsmask values. @@ -1081,14 +972,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ /* - * Because preemptible RCU does not exist, it never needs CPU-offline - * processing. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ -} - -/* * Because preemptible RCU does not exist, it never has any callbacks * to check. */ @@ -1097,14 +980,6 @@ static void rcu_preempt_check_callbacks(int cpu) } /* - * Because preemptible RCU does not exist, it never has any callbacks - * to process. - */ -static void rcu_preempt_process_callbacks(void) -{ -} - -/* * Queue an RCU callback for lazy invocation after a grace period. * This will likely be later named something like "call_rcu_lazy()", * but this change will require some way of tagging the lazy RCU @@ -1145,22 +1020,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ /* - * Because preemptible RCU does not exist, it never has any work to do. - */ -static int rcu_preempt_pending(int cpu) -{ - return 0; -} - -/* - * Because preemptible RCU does not exist, it never has callbacks - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return 0; -} - -/* * Because preemptible RCU does not exist, rcu_barrier() is just * another name for rcu_barrier_sched(). */ @@ -1171,21 +1030,6 @@ void rcu_barrier(void) EXPORT_SYMBOL_GPL(rcu_barrier); /* - * Because preemptible RCU does not exist, there is no per-CPU - * data to initialize. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ -} - -/* - * Because there is no preemptible RCU, there is no cleanup to do. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ -} - -/* * Because preemptible RCU does not exist, it need not be initialized. */ static void __init __rcu_init_preempt(void) @@ -1968,9 +1812,11 @@ static void rcu_idle_count_callbacks_posted(void) */ #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ -#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ +#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ +extern int tick_nohz_enabled; + /* * Does the specified flavor of RCU have non-lazy callbacks pending on * the specified CPU? Both RCU flavor and CPU are specified by the @@ -2047,10 +1893,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) return 1; } /* Set up for the possibility that RCU will post a timer. */ - if (rcu_cpu_has_nonlazy_callbacks(cpu)) - *delta_jiffies = RCU_IDLE_GP_DELAY; - else - *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, + RCU_IDLE_GP_DELAY) - jiffies; + } else { + *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; + *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; + } return 0; } @@ -2109,6 +1958,7 @@ static void rcu_cleanup_after_idle(int cpu) del_timer(&rdtp->idle_gp_timer); trace_rcu_prep_idle("Cleanup after idle"); + rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); } /* @@ -2134,6 +1984,18 @@ static void rcu_prepare_for_idle(int cpu) { struct timer_list *tp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + int tne; + + /* Handle nohz enablement switches conservatively. */ + tne = ACCESS_ONCE(tick_nohz_enabled); + if (tne != rdtp->tick_nohz_enabled_snap) { + if (rcu_cpu_has_callbacks(cpu)) + invoke_rcu_core(); /* force nohz to see update. */ + rdtp->tick_nohz_enabled_snap = tne; + return; + } + if (!tne) + return; /* * If this is an idle re-entry, for example, due to use of @@ -2187,10 +2049,11 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_nonlazy_callbacks(cpu)) { trace_rcu_prep_idle("Dyntick with callbacks"); rdtp->idle_gp_timer_expires = - jiffies + RCU_IDLE_GP_DELAY; + round_up(jiffies + RCU_IDLE_GP_DELAY, + RCU_IDLE_GP_DELAY); } else { rdtp->idle_gp_timer_expires = - jiffies + RCU_IDLE_LAZY_GP_DELAY; + round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); trace_rcu_prep_idle("Dyntick with lazy callbacks"); } tp = &rdtp->idle_gp_timer; @@ -2231,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_callbacks(cpu)) { trace_rcu_prep_idle("More callbacks"); invoke_rcu_core(); - } else + } else { trace_rcu_prep_idle("Callbacks drained"); + } } /* @@ -2269,6 +2133,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { + *cp = '\0'; } #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d4bc16ddd1d4..abffb486e94e 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,6 +46,31 @@ #define RCU_TREE_NONCORE #include "rcutree.h" +static int show_rcubarrier(struct seq_file *m, void *unused) +{ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", + rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', + atomic_read(&rsp->barrier_cpu_count), + rsp->n_barrier_done); + return 0; +} + +static int rcubarrier_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcubarrier, NULL); +} + +static const struct file_operations rcubarrier_fops = { + .owner = THIS_MODULE, + .open = rcubarrier_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + #ifdef CONFIG_RCU_BOOST static char convert_kthread_status(unsigned int kthread_status) @@ -95,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } -#define PRINT_RCU_DATA(name, func, m) \ - do { \ - int _p_r_d_i; \ - \ - for_each_possible_cpu(_p_r_d_i) \ - func(m, &per_cpu(name, _p_r_d_i)); \ - } while (0) - static int show_rcudata(struct seq_file *m, void *unused) { -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "rcu_preempt:\n"); - PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "rcu_sched:\n"); - PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); - seq_puts(m, "rcu_bh:\n"); - PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); + int cpu; + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + seq_printf(m, "%s:\n", rsp->name); + for_each_possible_cpu(cpu) + print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu)); + } return 0; } @@ -166,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) static int show_rcudata_csv(struct seq_file *m, void *unused) { + int cpu; + struct rcu_state *rsp; + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); @@ -173,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) seq_puts(m, "\"kt\",\"ktl\""); #endif /* #ifdef CONFIG_RCU_BOOST */ seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "\"rcu_preempt:\"\n"); - PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "\"rcu_sched:\"\n"); - PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); - seq_puts(m, "\"rcu_bh:\"\n"); - PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m); + for_each_rcu_flavor(rsp) { + seq_printf(m, "\"%s:\"\n", rsp->name); + for_each_possible_cpu(cpu) + print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu)); + } return 0; } @@ -201,8 +218,7 @@ static const struct file_operations rcudata_csv_fops = { static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) { - seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu " - "j=%04x bt=%04x\n", + seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ", rnp->grplo, rnp->grphi, "T."[list_empty(&rnp->blkd_tasks)], "N."[!rnp->gp_tasks], @@ -210,11 +226,11 @@ static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) "B."[!rnp->boost_tasks], convert_kthread_status(rnp->boost_kthread_status), rnp->n_tasks_boosted, rnp->n_exp_boosts, - rnp->n_normal_boosts, + rnp->n_normal_boosts); + seq_printf(m, "j=%04x bt=%04x\n", (int)(jiffies & 0xffff), (int)(rnp->boost_time & 0xffff)); - seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", - " balk", + seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", rnp->n_balk_blkd_tasks, rnp->n_balk_exp_gp_tasks, rnp->n_balk_boost_tasks, @@ -270,15 +286,15 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) struct rcu_node *rnp; gpnum = rsp->gpnum; - seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " - "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", - rsp->completed, gpnum, rsp->fqs_state, + seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ", + rsp->name, rsp->completed, gpnum, rsp->fqs_state, (long)(rsp->jiffies_force_qs - jiffies), - (int)(jiffies & 0xffff), + (int)(jiffies & 0xffff)); + seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); - for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { + for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); level = rnp->level; @@ -295,14 +311,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) static int show_rcuhier(struct seq_file *m, void *unused) { -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "rcu_preempt:\n"); - print_one_rcu_state(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "rcu_sched:\n"); - print_one_rcu_state(m, &rcu_sched_state); - seq_puts(m, "rcu_bh:\n"); - print_one_rcu_state(m, &rcu_bh_state); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + print_one_rcu_state(m, rsp); return 0; } @@ -343,11 +355,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) static int show_rcugp(struct seq_file *m, void *unused) { -#ifdef CONFIG_TREE_PREEMPT_RCU - show_one_rcugp(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - show_one_rcugp(m, &rcu_sched_state); - show_one_rcugp(m, &rcu_bh_state); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + show_one_rcugp(m, rsp); return 0; } @@ -366,44 +377,36 @@ static const struct file_operations rcugp_fops = { static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) { - seq_printf(m, "%3d%cnp=%ld " - "qsp=%ld rpq=%ld cbr=%ld cng=%ld " - "gpc=%ld gps=%ld nf=%ld nn=%ld\n", + seq_printf(m, "%3d%cnp=%ld ", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', - rdp->n_rcu_pending, + rdp->n_rcu_pending); + seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ", rdp->n_rp_qs_pending, rdp->n_rp_report_qs, rdp->n_rp_cb_ready, - rdp->n_rp_cpu_needs_gp, + rdp->n_rp_cpu_needs_gp); + seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n", rdp->n_rp_gp_completed, rdp->n_rp_gp_started, rdp->n_rp_need_fqs, rdp->n_rp_need_nothing); } -static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) +static int show_rcu_pending(struct seq_file *m, void *unused) { int cpu; struct rcu_data *rdp; - - for_each_possible_cpu(cpu) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (rdp->beenonline) - print_one_rcu_pending(m, rdp); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + seq_printf(m, "%s:\n", rsp->name); + for_each_possible_cpu(cpu) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->beenonline) + print_one_rcu_pending(m, rdp); + } } -} - -static int show_rcu_pending(struct seq_file *m, void *unused) -{ -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "rcu_preempt:\n"); - print_rcu_pendings(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "rcu_sched:\n"); - print_rcu_pendings(m, &rcu_sched_state); - seq_puts(m, "rcu_bh:\n"); - print_rcu_pendings(m, &rcu_bh_state); return 0; } @@ -453,6 +456,11 @@ static int __init rcutree_trace_init(void) if (!rcudir) goto free_out; + retval = debugfs_create_file("rcubarrier", 0444, rcudir, + NULL, &rcubarrier_fops); + if (!retval) + goto free_out; + retval = debugfs_create_file("rcudata", 0444, rcudir, NULL, &rcudata_fops); if (!retval) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 468bdd44c1ba..ad732b56ba70 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1910,12 +1910,12 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + trace_sched_switch(prev, next); sched_info_switch(prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); - trace_sched_switch(prev, next); } /** diff --git a/kernel/smp.c b/kernel/smp.c index d0ae5b24875e..29dd40a9f2f4 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -581,26 +581,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait) return 0; } EXPORT_SYMBOL(smp_call_function); - -void ipi_call_lock(void) -{ - raw_spin_lock(&call_function.lock); -} - -void ipi_call_unlock(void) -{ - raw_spin_unlock(&call_function.lock); -} - -void ipi_call_lock_irq(void) -{ - raw_spin_lock_irq(&call_function.lock); -} - -void ipi_call_unlock_irq(void) -{ - raw_spin_unlock_irq(&call_function.lock); -} #endif /* USE_GENERIC_SMP_HELPERS */ /* Setup configured maximum number of CPUs to activate */ diff --git a/kernel/smpboot.h b/kernel/smpboot.h index 80c0acfb8472..6ef9433e1c70 100644 --- a/kernel/smpboot.h +++ b/kernel/smpboot.h @@ -3,8 +3,6 @@ struct task_struct; -int smpboot_prepare(unsigned int cpu); - #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD struct task_struct *idle_thread_get(unsigned int cpu); void idle_thread_set_boot_cpu(void); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 70b33abcc7bb..b7fbadc5c973 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -409,7 +409,9 @@ int second_overflow(unsigned long secs) time_state = TIME_DEL; break; case TIME_INS: - if (secs % 86400 == 0) { + if (!(time_status & STA_INS)) + time_state = TIME_OK; + else if (secs % 86400 == 0) { leap = -1; time_state = TIME_OOP; time_tai++; @@ -418,7 +420,9 @@ int second_overflow(unsigned long secs) } break; case TIME_DEL: - if ((secs + 1) % 86400 == 0) { + if (!(time_status & STA_DEL)) + time_state = TIME_OK; + else if ((secs + 1) % 86400 == 0) { leap = 1; time_tai--; time_state = TIME_WAIT; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4a08472c3ca7..024540f97f74 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void) /* * NO HZ enabled ? */ -static int tick_nohz_enabled __read_mostly = 1; +int tick_nohz_enabled __read_mostly = 1; /* * Enable / Disable tickless mode @@ -271,50 +271,15 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts) +static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, + ktime_t now, int cpu) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; + ktime_t last_update, expires, ret = { .tv64 = 0 }; unsigned long rcu_delta_jiffies; - ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; - int cpu; - - cpu = smp_processor_id(); - ts = &per_cpu(tick_cpu_sched, cpu); - - now = tick_nohz_start_idle(cpu, ts); - - /* - * If this cpu is offline and it is the one which updates - * jiffies, then give up the assignment and let it be taken by - * the cpu which runs the tick timer next. If we don't drop - * this here the jiffies might be stale and do_timer() never - * invoked. - */ - if (unlikely(!cpu_online(cpu))) { - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; - } - - if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) - return; - if (need_resched()) - return; - - if (unlikely(local_softirq_pending() && cpu_online(cpu))) { - static int ratelimit; - - if (ratelimit < 10) { - printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } - return; - } - - ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); @@ -397,6 +362,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; + ret = expires; + /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when @@ -408,16 +375,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) select_nohz_load_balancer(1); calc_load_enter_idle(); - ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); + ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; - ts->idle_jiffies = last_jiffies; } - ts->idle_sleeps++; - - /* Mark expires */ - ts->idle_expires = expires; - /* * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. @@ -448,6 +409,65 @@ out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); + + return ret; +} + +static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) +{ + /* + * If this cpu is offline and it is the one which updates + * jiffies, then give up the assignment and let it be taken by + * the cpu which runs the tick timer next. If we don't drop + * this here the jiffies might be stale and do_timer() never + * invoked. + */ + if (unlikely(!cpu_online(cpu))) { + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = TICK_DO_TIMER_NONE; + } + + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) + return false; + + if (need_resched()) + return false; + + if (unlikely(local_softirq_pending() && cpu_online(cpu))) { + static int ratelimit; + + if (ratelimit < 10) { + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", + (unsigned int) local_softirq_pending()); + ratelimit++; + } + return false; + } + + return true; +} + +static void __tick_nohz_idle_enter(struct tick_sched *ts) +{ + ktime_t now, expires; + int cpu = smp_processor_id(); + + now = tick_nohz_start_idle(cpu, ts); + + if (can_stop_idle_tick(cpu, ts)) { + int was_stopped = ts->tick_stopped; + + ts->idle_calls++; + + expires = tick_nohz_stop_sched_tick(ts, now, cpu); + if (expires.tv64 > 0LL) { + ts->idle_sleeps++; + ts->idle_expires = expires; + } + + if (!was_stopped && ts->tick_stopped) + ts->idle_jiffies = ts->last_jiffies; + } } /** @@ -485,7 +505,7 @@ void tick_nohz_idle_enter(void) * update of the idle time accounting in tick_nohz_start_idle(). */ ts->inidle = 1; - tick_nohz_stop_sched_tick(ts); + __tick_nohz_idle_enter(ts); local_irq_enable(); } @@ -505,7 +525,7 @@ void tick_nohz_irq_exit(void) if (!ts->inidle) return; - tick_nohz_stop_sched_tick(ts); + __tick_nohz_idle_enter(ts); } /** @@ -523,7 +543,7 @@ ktime_t tick_nohz_get_sleep_length(void) static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) { hrtimer_cancel(&ts->sched_timer); - hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); + hrtimer_set_expires(&ts->sched_timer, ts->last_tick); while (1) { /* Forward the time to expire in the future */ @@ -546,6 +566,41 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) } } +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) +{ + /* Update jiffies first */ + select_nohz_load_balancer(0); + tick_do_update_jiffies64(now); + update_cpu_load_nohz(); + + touch_softlockup_watchdog(); + /* + * Cancel the scheduled timer and restore the tick + */ + ts->tick_stopped = 0; + ts->idle_exittime = now; + + tick_nohz_restart(ts, now); +} + +static void tick_nohz_account_idle_ticks(struct tick_sched *ts) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + unsigned long ticks; + /* + * We stopped the tick in idle. Update process times would miss the + * time we slept as update_process_times does only a 1 tick + * accounting. Enforce that this is accounted to idle ! + */ + ticks = jiffies - ts->idle_jiffies; + /* + * We might be one off. Do not randomly account a huge number of ticks! + */ + if (ticks && ticks < LONG_MAX) + account_idle_ticks(ticks); +#endif +} + /** * tick_nohz_idle_exit - restart the idle tick from the idle task * @@ -557,9 +612,6 @@ void tick_nohz_idle_exit(void) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - unsigned long ticks; -#endif ktime_t now; local_irq_disable(); @@ -574,40 +626,11 @@ void tick_nohz_idle_exit(void) if (ts->idle_active) tick_nohz_stop_idle(cpu, now); - if (!ts->tick_stopped) { - local_irq_enable(); - return; + if (ts->tick_stopped) { + tick_nohz_restart_sched_tick(ts, now); + tick_nohz_account_idle_ticks(ts); } - /* Update jiffies first */ - select_nohz_load_balancer(0); - tick_do_update_jiffies64(now); - update_cpu_load_nohz(); - -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - /* - * We stopped the tick in idle. Update process times would miss the - * time we slept as update_process_times does only a 1 tick - * accounting. Enforce that this is accounted to idle ! - */ - ticks = jiffies - ts->idle_jiffies; - /* - * We might be one off. Do not randomly account a huge number of ticks! - */ - if (ticks && ticks < LONG_MAX) - account_idle_ticks(ticks); -#endif - - calc_load_exit_idle(); - touch_softlockup_watchdog(); - /* - * Cancel the scheduled timer and restore the tick - */ - ts->tick_stopped = 0; - ts->idle_exittime = now; - - tick_nohz_restart(ts, now); - local_irq_enable(); } @@ -811,7 +834,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) */ if (ts->tick_stopped) { touch_softlockup_watchdog(); - ts->idle_jiffies++; + if (idle_cpu(cpu)) + ts->idle_jiffies++; } update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3447cfaf11e7..f045cc50832d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -24,32 +24,32 @@ /* Structure holding internal timekeeping values. */ struct timekeeper { /* Current clocksource used for timekeeping. */ - struct clocksource *clock; + struct clocksource *clock; /* NTP adjusted clock multiplier */ - u32 mult; + u32 mult; /* The shift value of the current clocksource. */ - int shift; - + u32 shift; /* Number of clock cycles in one NTP interval. */ - cycle_t cycle_interval; + cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ - u64 xtime_interval; + u64 xtime_interval; /* shifted nano seconds left over when rounding cycle_interval */ - s64 xtime_remainder; + s64 xtime_remainder; /* Raw nano seconds accumulated per NTP interval. */ - u32 raw_interval; + u32 raw_interval; + + /* Current CLOCK_REALTIME time in seconds */ + u64 xtime_sec; + /* Clock shifted nano seconds */ + u64 xtime_nsec; - /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ - u64 xtime_nsec; /* Difference between accumulated time and NTP time in ntp * shifted nano seconds. */ - s64 ntp_error; + s64 ntp_error; /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ - int ntp_error_shift; + u32 ntp_error_shift; - /* The current time */ - struct timespec xtime; /* * wall_to_monotonic is what we need to add to xtime (or xtime corrected * for sub jiffie times) to get to monotonic time. Monotonic is pegged @@ -64,20 +64,17 @@ struct timekeeper { * - wall_to_monotonic is no longer the boot time, getboottime must be * used instead. */ - struct timespec wall_to_monotonic; + struct timespec wall_to_monotonic; /* time spent in suspend */ - struct timespec total_sleep_time; + struct timespec total_sleep_time; /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ - struct timespec raw_time; - + struct timespec raw_time; /* Offset clock monotonic -> clock realtime */ - ktime_t offs_real; - + ktime_t offs_real; /* Offset clock monotonic -> clock boottime */ - ktime_t offs_boot; - + ktime_t offs_boot; /* Seqlock for all timekeeper values */ - seqlock_t lock; + seqlock_t lock; }; static struct timekeeper timekeeper; @@ -88,11 +85,37 @@ static struct timekeeper timekeeper; */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); - /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; +static inline void tk_normalize_xtime(struct timekeeper *tk) +{ + while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { + tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift; + tk->xtime_sec++; + } +} +static struct timespec tk_xtime(struct timekeeper *tk) +{ + struct timespec ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); + return ts; +} + +static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) +{ + tk->xtime_sec = ts->tv_sec; + tk->xtime_nsec = ts->tv_nsec << tk->shift; +} + +static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) +{ + tk->xtime_sec += ts->tv_sec; + tk->xtime_nsec += ts->tv_nsec << tk->shift; +} /** * timekeeper_setup_internals - Set up internals to use clocksource clock. @@ -104,12 +127,14 @@ int __read_mostly timekeeping_suspended; * * Unless you're the timekeeping code, you should not be using this! */ -static void timekeeper_setup_internals(struct clocksource *clock) +static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) { cycle_t interval; u64 tmp, ntpinterval; + struct clocksource *old_clock; - timekeeper.clock = clock; + old_clock = tk->clock; + tk->clock = clock; clock->cycle_last = clock->read(clock); /* Do the ns -> cycle conversion first, using original mult */ @@ -122,80 +147,96 @@ static void timekeeper_setup_internals(struct clocksource *clock) tmp = 1; interval = (cycle_t) tmp; - timekeeper.cycle_interval = interval; + tk->cycle_interval = interval; /* Go back from cycles -> shifted ns */ - timekeeper.xtime_interval = (u64) interval * clock->mult; - timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; - timekeeper.raw_interval = + tk->xtime_interval = (u64) interval * clock->mult; + tk->xtime_remainder = ntpinterval - tk->xtime_interval; + tk->raw_interval = ((u64) interval * clock->mult) >> clock->shift; - timekeeper.xtime_nsec = 0; - timekeeper.shift = clock->shift; + /* if changing clocks, convert xtime_nsec shift units */ + if (old_clock) { + int shift_change = clock->shift - old_clock->shift; + if (shift_change < 0) + tk->xtime_nsec >>= -shift_change; + else + tk->xtime_nsec <<= shift_change; + } + tk->shift = clock->shift; - timekeeper.ntp_error = 0; - timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; + tk->ntp_error = 0; + tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; /* * The timekeeper keeps its own mult values for the currently * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ - timekeeper.mult = clock->mult; + tk->mult = clock->mult; } /* Timekeeper helper functions. */ -static inline s64 timekeeping_get_ns(void) +static inline s64 timekeeping_get_ns(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; + s64 nsec; /* read clocksource: */ - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - /* return delta convert to nanoseconds using ntp adjusted mult. */ - return clocksource_cyc2ns(cycle_delta, timekeeper.mult, - timekeeper.shift); + nsec = cycle_delta * tk->mult + tk->xtime_nsec; + nsec >>= tk->shift; + + /* If arch requires, add in gettimeoffset() */ + return nsec + arch_gettimeoffset(); } -static inline s64 timekeeping_get_ns_raw(void) +static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; + s64 nsec; /* read clocksource: */ - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - /* return delta convert to nanoseconds. */ - return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + /* convert delta to nanoseconds. */ + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + + /* If arch requires, add in gettimeoffset() */ + return nsec + arch_gettimeoffset(); } -static void update_rt_offset(void) +static void update_rt_offset(struct timekeeper *tk) { - struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic; + struct timespec tmp, *wtm = &tk->wall_to_monotonic; set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); - timekeeper.offs_real = timespec_to_ktime(tmp); + tk->offs_real = timespec_to_ktime(tmp); } /* must hold write on timekeeper.lock */ -static void timekeeping_update(bool clearntp) +static void timekeeping_update(struct timekeeper *tk, bool clearntp) { + struct timespec xt; + if (clearntp) { - timekeeper.ntp_error = 0; + tk->ntp_error = 0; ntp_clear(); } - update_rt_offset(); - update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, - timekeeper.clock, timekeeper.mult); + update_rt_offset(tk); + xt = tk_xtime(tk); + update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); } @@ -206,27 +247,26 @@ static void timekeeping_update(bool clearntp) * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void timekeeping_forward_now(void) +static void timekeeping_forward_now(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; s64 nsec; - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, - timekeeper.shift); + tk->xtime_nsec += cycle_delta * tk->mult; /* If arch requires, add in gettimeoffset() */ - nsec += arch_gettimeoffset(); + tk->xtime_nsec += arch_gettimeoffset() << tk->shift; - timespec_add_ns(&timekeeper.xtime, nsec); + tk_normalize_xtime(tk); nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); - timespec_add_ns(&timekeeper.raw_time, nsec); + timespec_add_ns(&tk->raw_time, nsec); } /** @@ -238,18 +278,15 @@ static void timekeeping_forward_now(void) void getnstimeofday(struct timespec *ts) { unsigned long seq; - s64 nsecs; + s64 nsecs = 0; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; - nsecs = timekeeping_get_ns(); - - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); @@ -266,13 +303,10 @@ ktime_t ktime_get(void) do { seq = read_seqbegin(&timekeeper.lock); - secs = timekeeper.xtime.tv_sec + + secs = timekeeper.xtime_sec + timekeeper.wall_to_monotonic.tv_sec; - nsecs = timekeeper.xtime.tv_nsec + + nsecs = timekeeping_get_ns(&timekeeper) + timekeeper.wall_to_monotonic.tv_nsec; - nsecs += timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); /* @@ -295,22 +329,19 @@ void ktime_get_ts(struct timespec *ts) { struct timespec tomono; unsigned int seq; - s64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(&timekeeper); tomono = timekeeper.wall_to_monotonic; - nsecs = timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, - ts->tv_nsec + tomono.tv_nsec + nsecs); + ts->tv_nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts); @@ -333,20 +364,14 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) WARN_ON_ONCE(timekeeping_suspended); do { - u32 arch_offset; - seq = read_seqbegin(&timekeeper.lock); *ts_raw = timekeeper.raw_time; - *ts_real = timekeeper.xtime; - - nsecs_raw = timekeeping_get_ns_raw(); - nsecs_real = timekeeping_get_ns(); + ts_real->tv_sec = timekeeper.xtime_sec; + ts_real->tv_nsec = 0; - /* If arch requires, add in gettimeoffset() */ - arch_offset = arch_gettimeoffset(); - nsecs_raw += arch_offset; - nsecs_real += arch_offset; + nsecs_raw = timekeeping_get_ns_raw(&timekeeper); + nsecs_real = timekeeping_get_ns(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); @@ -381,7 +406,7 @@ EXPORT_SYMBOL(do_gettimeofday); */ int do_settimeofday(const struct timespec *tv) { - struct timespec ts_delta; + struct timespec ts_delta, xt; unsigned long flags; if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) @@ -389,15 +414,18 @@ int do_settimeofday(const struct timespec *tv) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); + + xt = tk_xtime(&timekeeper); + ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; + ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; - ts_delta.tv_sec = tv->tv_sec - timekeeper.xtime.tv_sec; - ts_delta.tv_nsec = tv->tv_nsec - timekeeper.xtime.tv_nsec; timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, ts_delta); - timekeeper.xtime = *tv; - timekeeping_update(true); + tk_set_xtime(&timekeeper, tv); + + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -424,13 +452,14 @@ int timekeeping_inject_offset(struct timespec *ts) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); + - timekeeper.xtime = timespec_add(timekeeper.xtime, *ts); + tk_xtime_add(&timekeeper, ts); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *ts); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -455,14 +484,14 @@ static int change_clocksource(void *data) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); if (!new->enable || new->enable(new) == 0) { old = timekeeper.clock; - timekeeper_setup_internals(new); + tk_setup_internals(&timekeeper, new); if (old->disable) old->disable(old); } - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -512,7 +541,7 @@ void getrawmonotonic(struct timespec *ts) do { seq = read_seqbegin(&timekeeper.lock); - nsecs = timekeeping_get_ns_raw(); + nsecs = timekeeping_get_ns_raw(&timekeeper); *ts = timekeeper.raw_time; } while (read_seqretry(&timekeeper.lock, seq)); @@ -547,6 +576,7 @@ u64 timekeeping_max_deferment(void) { unsigned long seq; u64 ret; + do { seq = read_seqbegin(&timekeeper.lock); @@ -607,19 +637,17 @@ void __init timekeeping_init(void) clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); - timekeeper_setup_internals(clock); + tk_setup_internals(&timekeeper, clock); - timekeeper.xtime.tv_sec = now.tv_sec; - timekeeper.xtime.tv_nsec = now.tv_nsec; + tk_set_xtime(&timekeeper, &now); timekeeper.raw_time.tv_sec = 0; timekeeper.raw_time.tv_nsec = 0; - if (boot.tv_sec == 0 && boot.tv_nsec == 0) { - boot.tv_sec = timekeeper.xtime.tv_sec; - boot.tv_nsec = timekeeper.xtime.tv_nsec; - } + if (boot.tv_sec == 0 && boot.tv_nsec == 0) + boot = tk_xtime(&timekeeper); + set_normalized_timespec(&timekeeper.wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); - update_rt_offset(); + update_rt_offset(&timekeeper); timekeeper.total_sleep_time.tv_sec = 0; timekeeper.total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -641,7 +669,8 @@ static void update_sleep_time(struct timespec t) * Takes a timespec offset measuring a suspend interval and properly * adds the sleep offset to the timekeeping variables. */ -static void __timekeeping_inject_sleeptime(struct timespec *delta) +static void __timekeeping_inject_sleeptime(struct timekeeper *tk, + struct timespec *delta) { if (!timespec_valid(delta)) { printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " @@ -649,10 +678,9 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) return; } - timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); - timekeeper.wall_to_monotonic = - timespec_sub(timekeeper.wall_to_monotonic, *delta); - update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); + tk_xtime_add(tk, delta); + tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta); + update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); } @@ -678,11 +706,11 @@ void timekeeping_inject_sleeptime(struct timespec *delta) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); - __timekeeping_inject_sleeptime(delta); + __timekeeping_inject_sleeptime(&timekeeper, delta); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -711,13 +739,13 @@ static void timekeeping_resume(void) if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { ts = timespec_sub(ts, timekeeping_suspend_time); - __timekeeping_inject_sleeptime(&ts); + __timekeeping_inject_sleeptime(&timekeeper, &ts); } /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; - timekeeping_update(false); + timekeeping_update(&timekeeper, false); write_sequnlock_irqrestore(&timekeeper.lock, flags); touch_softlockup_watchdog(); @@ -737,7 +765,7 @@ static int timekeeping_suspend(void) read_persistent_clock(&timekeeping_suspend_time); write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); timekeeping_suspended = 1; /* @@ -746,7 +774,7 @@ static int timekeeping_suspend(void) * try to compensate so the difference in system time * and persistent_clock time stays close to constant. */ - delta = timespec_sub(timekeeper.xtime, timekeeping_suspend_time); + delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time); delta_delta = timespec_sub(delta, old_delta); if (abs(delta_delta.tv_sec) >= 2) { /* @@ -785,7 +813,8 @@ device_initcall(timekeeping_init_ops); * If the error is already larger, we look ahead even further * to compensate for late or lost adjustments. */ -static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, +static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, + s64 error, s64 *interval, s64 *offset) { s64 tick_error, i; @@ -801,7 +830,7 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ - error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); + error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = abs(error2); for (look_ahead = 0; error2 > 0; look_ahead++) error2 >>= 2; @@ -810,8 +839,8 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * Now calculate the error in (1 << look_ahead) ticks, but first * remove the single look ahead already included in the error. */ - tick_error = ntp_tick_length() >> (timekeeper.ntp_error_shift + 1); - tick_error -= timekeeper.xtime_interval >> 1; + tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); + tick_error -= tk->xtime_interval >> 1; error = ((error - tick_error) >> look_ahead) + tick_error; /* Finally calculate the adjustment shift value. */ @@ -836,9 +865,9 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void timekeeping_adjust(s64 offset) +static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { - s64 error, interval = timekeeper.cycle_interval; + s64 error, interval = tk->cycle_interval; int adj; /* @@ -854,7 +883,7 @@ static void timekeeping_adjust(s64 offset) * * Note: It does not "save" on aggravation when reading the code. */ - error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); + error = tk->ntp_error >> (tk->ntp_error_shift - 1); if (error > interval) { /* * We now divide error by 4(via shift), which checks if @@ -876,7 +905,8 @@ static void timekeeping_adjust(s64 offset) if (likely(error <= interval)) adj = 1; else - adj = timekeeping_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(tk, error, &interval, + &offset); } else if (error < -interval) { /* See comment above, this is just switched for the negative */ error >>= 2; @@ -885,18 +915,17 @@ static void timekeeping_adjust(s64 offset) interval = -interval; offset = -offset; } else - adj = timekeeping_bigadjust(error, &interval, &offset); - } else /* No adjustment needed */ + adj = timekeeping_bigadjust(tk, error, &interval, + &offset); + } else return; - if (unlikely(timekeeper.clock->maxadj && - (timekeeper.mult + adj > - timekeeper.clock->mult + timekeeper.clock->maxadj))) { + if (unlikely(tk->clock->maxadj && + (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", - timekeeper.clock->name, (long)timekeeper.mult + adj, - (long)timekeeper.clock->mult + - timekeeper.clock->maxadj); + tk->clock->name, (long)tk->mult + adj, + (long)tk->clock->mult + tk->clock->maxadj); } /* * So the following can be confusing. @@ -947,11 +976,60 @@ static void timekeeping_adjust(s64 offset) * * XXX - TODO: Doc ntp_error calculation. */ - timekeeper.mult += adj; - timekeeper.xtime_interval += interval; - timekeeper.xtime_nsec -= offset; - timekeeper.ntp_error -= (interval - offset) << - timekeeper.ntp_error_shift; + tk->mult += adj; + tk->xtime_interval += interval; + tk->xtime_nsec -= offset; + tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; + + /* + * It may be possible that when we entered this function, xtime_nsec + * was very small. Further, if we're slightly speeding the clocksource + * in the code above, its possible the required corrective factor to + * xtime_nsec could cause it to underflow. + * + * Now, since we already accumulated the second, cannot simply roll + * the accumulated second back, since the NTP subsystem has been + * notified via second_overflow. So instead we push xtime_nsec forward + * by the amount we underflowed, and add that amount into the error. + * + * We'll correct this error next time through this function, when + * xtime_nsec is not as small. + */ + if (unlikely((s64)tk->xtime_nsec < 0)) { + s64 neg = -(s64)tk->xtime_nsec; + tk->xtime_nsec = 0; + tk->ntp_error += neg << tk->ntp_error_shift; + } + +} + + +/** + * accumulate_nsecs_to_secs - Accumulates nsecs into secs + * + * Helper function that accumulates a the nsecs greater then a second + * from the xtime_nsec field to the xtime_secs field. + * It also calls into the NTP code to handle leapsecond processing. + * + */ +static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) +{ + u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; + + while (tk->xtime_nsec >= nsecps) { + int leap; + + tk->xtime_nsec -= nsecps; + tk->xtime_sec++; + + /* Figure out if its a leap sec and apply if needed */ + leap = second_overflow(tk->xtime_sec); + tk->xtime_sec += leap; + tk->wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); + + } } @@ -964,46 +1042,36 @@ static void timekeeping_adjust(s64 offset) * * Returns the unconsumed cycles. */ -static cycle_t logarithmic_accumulation(cycle_t offset, int shift) +static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, + u32 shift) { - u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; u64 raw_nsecs; - /* If the offset is smaller than a shifted interval, do nothing */ - if (offset < timekeeper.cycle_interval<<shift) + /* If the offset is smaller then a shifted interval, do nothing */ + if (offset < tk->cycle_interval<<shift) return offset; /* Accumulate one shifted interval */ - offset -= timekeeper.cycle_interval << shift; - timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; + offset -= tk->cycle_interval << shift; + tk->clock->cycle_last += tk->cycle_interval << shift; - timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; - while (timekeeper.xtime_nsec >= nsecps) { - int leap; - timekeeper.xtime_nsec -= nsecps; - timekeeper.xtime.tv_sec++; - leap = second_overflow(timekeeper.xtime.tv_sec); - timekeeper.xtime.tv_sec += leap; - timekeeper.wall_to_monotonic.tv_sec -= leap; - if (leap) - clock_was_set_delayed(); - } + tk->xtime_nsec += tk->xtime_interval << shift; + accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - raw_nsecs = timekeeper.raw_interval << shift; - raw_nsecs += timekeeper.raw_time.tv_nsec; + raw_nsecs = tk->raw_interval << shift; + raw_nsecs += tk->raw_time.tv_nsec; if (raw_nsecs >= NSEC_PER_SEC) { u64 raw_secs = raw_nsecs; raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); - timekeeper.raw_time.tv_sec += raw_secs; + tk->raw_time.tv_sec += raw_secs; } - timekeeper.raw_time.tv_nsec = raw_nsecs; + tk->raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ - timekeeper.ntp_error += ntp_tick_length() << shift; - timekeeper.ntp_error -= - (timekeeper.xtime_interval + timekeeper.xtime_remainder) << - (timekeeper.ntp_error_shift + shift); + tk->ntp_error += ntp_tick_length() << shift; + tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << + (tk->ntp_error_shift + shift); return offset; } @@ -1019,6 +1087,7 @@ static void update_wall_time(void) cycle_t offset; int shift = 0, maxshift; unsigned long flags; + s64 remainder; write_seqlock_irqsave(&timekeeper.lock, flags); @@ -1033,8 +1102,6 @@ static void update_wall_time(void) #else offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif - timekeeper.xtime_nsec = (s64)timekeeper.xtime.tv_nsec << - timekeeper.shift; /* * With NO_HZ we may have to accumulate many cycle_intervals @@ -1050,64 +1117,36 @@ static void update_wall_time(void) maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { - offset = logarithmic_accumulation(offset, shift); + offset = logarithmic_accumulation(&timekeeper, offset, shift); if(offset < timekeeper.cycle_interval<<shift) shift--; } /* correct the clock when NTP error is too big */ - timekeeping_adjust(offset); - - /* - * Since in the loop above, we accumulate any amount of time - * in xtime_nsec over a second into xtime.tv_sec, its possible for - * xtime_nsec to be fairly small after the loop. Further, if we're - * slightly speeding the clocksource up in timekeeping_adjust(), - * its possible the required corrective factor to xtime_nsec could - * cause it to underflow. - * - * Now, we cannot simply roll the accumulated second back, since - * the NTP subsystem has been notified via second_overflow. So - * instead we push xtime_nsec forward by the amount we underflowed, - * and add that amount into the error. - * - * We'll correct this error next time through this function, when - * xtime_nsec is not as small. - */ - if (unlikely((s64)timekeeper.xtime_nsec < 0)) { - s64 neg = -(s64)timekeeper.xtime_nsec; - timekeeper.xtime_nsec = 0; - timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; - } + timekeeping_adjust(&timekeeper, offset); /* - * Store full nanoseconds into xtime after rounding it up and - * add the remainder to the error difference. - */ - timekeeper.xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> - timekeeper.shift) + 1; - timekeeper.xtime_nsec -= (s64)timekeeper.xtime.tv_nsec << - timekeeper.shift; - timekeeper.ntp_error += timekeeper.xtime_nsec << - timekeeper.ntp_error_shift; + * Store only full nanoseconds into xtime_nsec after rounding + * it up and add the remainder to the error difference. + * XXX - This is necessary to avoid small 1ns inconsistnecies caused + * by truncating the remainder in vsyscalls. However, it causes + * additional work to be done in timekeeping_adjust(). Once + * the vsyscall implementations are converted to use xtime_nsec + * (shifted nanoseconds), this can be killed. + */ + remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1); + timekeeper.xtime_nsec -= remainder; + timekeeper.xtime_nsec += 1 << timekeeper.shift; + timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift; /* * Finally, make sure that after the rounding - * xtime.tv_nsec isn't larger than NSEC_PER_SEC + * xtime_nsec isn't larger than NSEC_PER_SEC */ - if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { - int leap; - timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; - timekeeper.xtime.tv_sec++; - leap = second_overflow(timekeeper.xtime.tv_sec); - timekeeper.xtime.tv_sec += leap; - timekeeper.wall_to_monotonic.tv_sec -= leap; - if (leap) - clock_was_set_delayed(); - } + accumulate_nsecs_to_secs(&timekeeper); - timekeeping_update(false); + timekeeping_update(&timekeeper, false); out: write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -1152,21 +1191,20 @@ void get_monotonic_boottime(struct timespec *ts) { struct timespec tomono, sleep; unsigned int seq; - s64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(&timekeeper); tomono = timekeeper.wall_to_monotonic; sleep = timekeeper.total_sleep_time; - nsecs = timekeeping_get_ns(); } while (read_seqretry(&timekeeper.lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, - ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs); + ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); } EXPORT_SYMBOL_GPL(get_monotonic_boottime); @@ -1199,13 +1237,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { - return timekeeper.xtime.tv_sec; + return timekeeper.xtime_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return timekeeper.xtime; + return tk_xtime(&timekeeper); } struct timespec current_kernel_time(void) @@ -1216,7 +1254,7 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&timekeeper.lock); - now = timekeeper.xtime; + now = tk_xtime(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); return now; @@ -1231,7 +1269,7 @@ struct timespec get_monotonic_coarse(void) do { seq = read_seqbegin(&timekeeper.lock); - now = timekeeper.xtime; + now = tk_xtime(&timekeeper); mono = timekeeper.wall_to_monotonic; } while (read_seqretry(&timekeeper.lock, seq)); @@ -1266,7 +1304,7 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, do { seq = read_seqbegin(&timekeeper.lock); - *xtim = timekeeper.xtime; + *xtim = tk_xtime(&timekeeper); *wtom = timekeeper.wall_to_monotonic; *sleep = timekeeper.total_sleep_time; } while (read_seqretry(&timekeeper.lock, seq)); @@ -1290,11 +1328,8 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) do { seq = read_seqbegin(&timekeeper.lock); - secs = timekeeper.xtime.tv_sec; - nsecs = timekeeper.xtime.tv_nsec; - nsecs += timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); + secs = timekeeper.xtime_sec; + nsecs = timekeeping_get_ns(&timekeeper); *offs_real = timekeeper.offs_real; *offs_boot = timekeeper.offs_boot; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3258455549f4..af5a7e9f164b 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) { struct tick_sched *ts = tick_get_tick_sched(cpu); P(nohz_mode); - P_ns(idle_tick); + P_ns(last_tick); P(tick_stopped); P(idle_jiffies); P(idle_calls); @@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.6\n"); + SEQ_printf(m, "Timer List Version: v0.7\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); diff --git a/kernel/timer.c b/kernel/timer.c index 6ec7e7e0db43..a61c09374eba 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -77,6 +77,7 @@ struct tvec_base { struct timer_list *running_timer; unsigned long timer_jiffies; unsigned long next_timer; + unsigned long active_timers; struct tvec_root tv1; struct tvec tv2; struct tvec tv3; @@ -330,7 +331,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +static void +__internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; unsigned long idx = expires - base->timer_jiffies; @@ -372,6 +374,19 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) list_add_tail(&timer->entry, vec); } +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +{ + __internal_add_timer(base, timer); + /* + * Update base->active_timers and base->next_timer + */ + if (!tbase_get_deferrable(timer->base)) { + if (time_before(timer->expires, base->next_timer)) + base->next_timer = timer->expires; + base->active_timers++; + } +} + #ifdef CONFIG_TIMER_STATS void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) { @@ -654,8 +669,7 @@ void init_timer_deferrable_key(struct timer_list *timer, } EXPORT_SYMBOL(init_timer_deferrable_key); -static inline void detach_timer(struct timer_list *timer, - int clear_pending) +static inline void detach_timer(struct timer_list *timer, bool clear_pending) { struct list_head *entry = &timer->entry; @@ -667,6 +681,29 @@ static inline void detach_timer(struct timer_list *timer, entry->prev = LIST_POISON2; } +static inline void +detach_expired_timer(struct timer_list *timer, struct tvec_base *base) +{ + detach_timer(timer, true); + if (!tbase_get_deferrable(timer->base)) + timer->base->active_timers--; +} + +static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, + bool clear_pending) +{ + if (!timer_pending(timer)) + return 0; + + detach_timer(timer, clear_pending); + if (!tbase_get_deferrable(timer->base)) { + timer->base->active_timers--; + if (timer->expires == base->next_timer) + base->next_timer = base->timer_jiffies; + } + return 1; +} + /* * We are using hashed locking: holding per_cpu(tvec_bases).lock * means that all timers which are tied to this base via timer->base are @@ -712,16 +749,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, base = lock_timer_base(timer, &flags); - if (timer_pending(timer)) { - detach_timer(timer, 0); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; - } else { - if (pending_only) - goto out_unlock; - } + ret = detach_if_pending(timer, base, false); + if (!ret && pending_only) + goto out_unlock; debug_activate(timer, expires); @@ -752,9 +782,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, } timer->expires = expires; - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; internal_add_timer(base, timer); out_unlock: @@ -920,9 +947,6 @@ void add_timer_on(struct timer_list *timer, int cpu) spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); debug_activate(timer, timer->expires); - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; internal_add_timer(base, timer); /* * Check whether the other CPU is idle and needs to be @@ -959,13 +983,7 @@ int del_timer(struct timer_list *timer) timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); - if (timer_pending(timer)) { - detach_timer(timer, 1); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; - } + ret = detach_if_pending(timer, base, true); spin_unlock_irqrestore(&base->lock, flags); } @@ -990,19 +1008,10 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer == timer) - goto out; - - timer_stats_timer_clear_start_info(timer); - ret = 0; - if (timer_pending(timer)) { - detach_timer(timer, 1); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; + if (base->running_timer != timer) { + timer_stats_timer_clear_start_info(timer); + ret = detach_if_pending(timer, base, true); } -out: spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1089,7 +1098,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) */ list_for_each_entry_safe(timer, tmp, &tv_list, entry) { BUG_ON(tbase_get_base(timer->base) != base); - internal_add_timer(base, timer); + /* No accounting, while moving them */ + __internal_add_timer(base, timer); } return index; @@ -1178,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); base->running_timer = timer; - detach_timer(timer, 1); + detach_expired_timer(timer, base); spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, data); @@ -1316,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, unsigned long get_next_timer_interrupt(unsigned long now) { struct tvec_base *base = __this_cpu_read(tvec_bases); - unsigned long expires; + unsigned long expires = now + NEXT_TIMER_MAX_DELTA; /* * Pretend that there is no timer pending if the cpu is offline. * Possible pending timers will be migrated later to an active cpu. */ if (cpu_is_offline(smp_processor_id())) - return now + NEXT_TIMER_MAX_DELTA; + return expires; + spin_lock(&base->lock); - if (time_before_eq(base->next_timer, base->timer_jiffies)) - base->next_timer = __next_timer_interrupt(base); - expires = base->next_timer; + if (base->active_timers) { + if (time_before_eq(base->next_timer, base->timer_jiffies)) + base->next_timer = __next_timer_interrupt(base); + expires = base->next_timer; + } spin_unlock(&base->lock); if (time_before_eq(expires, now)) @@ -1704,6 +1717,7 @@ static int __cpuinit init_timers_cpu(int cpu) base->timer_jiffies = jiffies; base->next_timer = base->timer_jiffies; + base->active_timers = 0; return 0; } @@ -1714,11 +1728,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea while (!list_empty(head)) { timer = list_first_entry(head, struct timer_list, entry); - detach_timer(timer, 0); + /* We ignore the accounting on the dying cpu */ + detach_timer(timer, false); timer_set_base(timer, new_base); - if (time_before(timer->expires, new_base->next_timer) && - !tbase_get_deferrable(timer->base)) - new_base->next_timer = timer->expires; internal_add_timer(new_base, timer); } } diff --git a/lib/list_debug.c b/lib/list_debug.c index 23a5e031cd8b..c24c2f7e296f 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -87,12 +87,10 @@ void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { WARN(next->prev != prev, - "list_add_rcu corruption. next->prev should be " - "prev (%p), but was %p. (next=%p).\n", + "list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", prev, next->prev, next); WARN(prev->next != next, - "list_add_rcu corruption. prev->next should be " - "next (%p), but was %p. (prev=%p).\n", + "list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", next, prev->next, prev); new->next = next; new->prev = prev; diff --git a/mm/bootmem.c b/mm/bootmem.c index 73096630cb35..bcb63ac48cc5 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -710,6 +710,10 @@ again: if (ptr) return ptr; + /* do not panic in alloc_bootmem_bdata() */ + if (limit && goal + size > limit) + limit = 0; + ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); if (ptr) return ptr; diff --git a/mm/mmap.c b/mm/mmap.c index 3edfcdfa42d9..4fe2697339ed 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2345,9 +2345,6 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; - if (vma->vm_file && uprobe_mmap(vma)) - return -EINVAL; - vma_link(mm, vma, prev, rb_link, rb_parent); return 0; } @@ -2418,9 +2415,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (new_vma->vm_file) { get_file(new_vma->vm_file); - if (uprobe_mmap(new_vma)) - goto out_free_mempol; - if (vma->vm_flags & VM_EXECUTABLE) added_exe_file_vma(mm); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 661576324c7f..66e431060c05 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2688,7 +2688,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) * them before going back to sleep. */ set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); - schedule(); + + if (!kthread_should_stop()) + schedule(); + set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); } else { if (remaining) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b332c3d76059..10255e81be79 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_connection *con) * dropped messages. */ dout("process_connect got RESET peer seq %u\n", - le32_to_cpu(con->in_connect.connect_seq)); + le32_to_cpu(con->in_reply.connect_seq)); pr_err("%s%lld %s connection reset\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr)); @@ -1450,10 +1450,10 @@ static int process_connect(struct ceph_connection *con) * If we sent a smaller connect_seq than the peer has, try * again with a larger value. */ - dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", + dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", le32_to_cpu(con->out_connect.connect_seq), - le32_to_cpu(con->in_connect.connect_seq)); - con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); + le32_to_cpu(con->in_reply.connect_seq)); + con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) @@ -1468,9 +1468,9 @@ static int process_connect(struct ceph_connection *con) */ dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", con->peer_global_seq, - le32_to_cpu(con->in_connect.global_seq)); + le32_to_cpu(con->in_reply.global_seq)); get_global_seq(con->msgr, - le32_to_cpu(con->in_connect.global_seq)); + le32_to_cpu(con->in_reply.global_seq)); ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) diff --git a/net/core/dev.c b/net/core/dev.c index 0f28a9e0b8ad..1cb0d8a6aa6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6283,7 +6283,8 @@ static struct hlist_head *netdev_create_hash(void) /* Initialize per network namespace state */ static int __net_init netdev_init(struct net *net) { - INIT_LIST_HEAD(&net->dev_base_head); + if (net != &init_net) + INIT_LIST_HEAD(&net->dev_base_head); net->dev_name_head = netdev_create_hash(); if (net->dev_name_head == NULL) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dddbacb8f28c..42f1e1c7514f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); -struct net init_net; +struct net init_net = { + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), +}; EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c48adc565e92..667c1d4ca984 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1725,8 +1725,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) case CIPSO_V4_TAG_LOCAL: /* This is a non-standard tag that we only allow for * local connections, so if the incoming interface is - * not the loopback device drop the packet. */ - if (!(skb->dev->flags & IFF_LOOPBACK)) { + * not the loopback device drop the packet. Further, + * there is no legitimate reason for setting this from + * userspace so reject it if skb is NULL. */ + if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 372ec6502aa8..ffd8900a38e8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2717,7 +2717,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) return dentry_has_perm(cred, dentry, FILE__SETATTR); - if (ia_valid & ATTR_SIZE) + if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE)) av |= FILE__OPEN; return dentry_has_perm(cred, dentry, av); diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index b8c53723e09b..df2de54a958d 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -145,7 +145,9 @@ struct security_class_mapping secclass_map[] = { "node_bind", "name_connect", NULL } }, { "memprotect", { "mmap_zero", NULL } }, { "peer", { "recv", NULL } }, - { "capability2", { "mac_override", "mac_admin", "syslog", NULL } }, + { "capability2", + { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend", + NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore new file mode 100644 index 000000000000..35f56be5a4cd --- /dev/null +++ b/tools/lib/traceevent/.gitignore @@ -0,0 +1 @@ +TRACEEVENT-CFLAGS diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 46c2f6b7b123..14131cb0522d 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -207,7 +207,7 @@ libtraceevent.so: $(PEVENT_LIB_OBJS) libtraceevent.a: $(PEVENT_LIB_OBJS) $(Q)$(do_build_static_lib) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c +$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS $(Q)$(do_fpic_compile) define make_version.h @@ -272,6 +272,16 @@ ifneq ($(dep_includes),) include $(dep_includes) endif +### Detect environment changes +TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) + +TRACEEVENT-CFLAGS: force + @FLAGS='$(TRACK_CFLAGS)'; \ + if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ + echo 1>&2 " * new build flags or cross compiler"; \ + echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ + fi + tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -297,7 +307,7 @@ install: install_lib clean: $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d - $(RM) tags TAGS + $(RM) TRACEEVENT-CFLAGS tags TAGS endif # skip-makefile diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 75d74e5db8d5..77f124fe57ad 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -354,6 +354,7 @@ LIB_OBJS += $(OUTPUT)util/usage.o LIB_OBJS += $(OUTPUT)util/wrapper.o LIB_OBJS += $(OUTPUT)util/sigchain.o LIB_OBJS += $(OUTPUT)util/symbol.o +LIB_OBJS += $(OUTPUT)util/dso-test-data.o LIB_OBJS += $(OUTPUT)util/color.o LIB_OBJS += $(OUTPUT)util/pager.o LIB_OBJS += $(OUTPUT)util/header.o @@ -803,6 +804,9 @@ $(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-redundant-decls $< + $(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 5ce30305462b..d909eb74a0eb 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1142,6 +1142,10 @@ static struct test { .func = test__perf_pmu, }, { + .desc = "Test dso data interface", + .func = dso__test_data, + }, + { .func = NULL, }, }; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e3cab5f088f8..35e86c6df713 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -125,7 +125,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) /* * We can't annotate with just /proc/kallsyms */ - if (map->dso->symtab_type == SYMTAB__KALLSYMS) { + if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { pr_err("Can't annotate %s: No vmlinux file was found in the " "path\n", sym->name); sleep(1); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 482f0517b61e..413bd62eedb1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -978,8 +978,8 @@ static int hist_browser__dump(struct hist_browser *browser) fp = fopen(filename, "w"); if (fp == NULL) { char bf[64]; - strerror_r(errno, bf, sizeof(bf)); - ui_helpline__fpush("Couldn't write to %s: %s", filename, bf); + const char *err = strerror_r(errno, bf, sizeof(bf)); + ui_helpline__fpush("Couldn't write to %s: %s", filename, err); return -1; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 8069dfb5ba77..3a282c0057d2 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -426,7 +426,18 @@ int symbol__alloc_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); const size_t size = symbol__size(sym); - size_t sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(u64)); + size_t sizeof_sym_hist; + + /* Check for overflow when calculating sizeof_sym_hist */ + if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(u64)) + return -1; + + sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(u64)); + + /* Check for overflow in zalloc argument */ + if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src)) + / symbol_conf.nr_events) + return -1; notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist); if (notes->src == NULL) @@ -777,7 +788,7 @@ fallback: free_filename = false; } - if (dso->symtab_type == SYMTAB__KALLSYMS) { + if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { char bf[BUILD_ID_SIZE * 2 + 16] = " with build id "; char *build_id_msg = NULL; diff --git a/tools/perf/util/dso-test-data.c b/tools/perf/util/dso-test-data.c new file mode 100644 index 000000000000..541cdc72c7df --- /dev/null +++ b/tools/perf/util/dso-test-data.c @@ -0,0 +1,153 @@ +#include "util.h" + +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> + +#include "symbol.h" + +#define TEST_ASSERT_VAL(text, cond) \ +do { \ + if (!(cond)) { \ + pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ + return -1; \ + } \ +} while (0) + +static char *test_file(int size) +{ + static char buf_templ[] = "/tmp/test-XXXXXX"; + char *templ = buf_templ; + int fd, i; + unsigned char *buf; + + fd = mkostemp(templ, O_CREAT|O_WRONLY|O_TRUNC); + + buf = malloc(size); + if (!buf) { + close(fd); + return NULL; + } + + for (i = 0; i < size; i++) + buf[i] = (unsigned char) ((int) i % 10); + + if (size != write(fd, buf, size)) + templ = NULL; + + close(fd); + return templ; +} + +#define TEST_FILE_SIZE (DSO__DATA_CACHE_SIZE * 20) + +struct test_data_offset { + off_t offset; + u8 data[10]; + int size; +}; + +struct test_data_offset offsets[] = { + /* Fill first cache page. */ + { + .offset = 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read first cache page. */ + { + .offset = 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Fill cache boundary pages. */ + { + .offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read cache boundary pages. */ + { + .offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Fill final cache page. */ + { + .offset = TEST_FILE_SIZE - 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read final cache page. */ + { + .offset = TEST_FILE_SIZE - 10, + .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + .size = 10, + }, + /* Read final cache page. */ + { + .offset = TEST_FILE_SIZE - 3, + .data = { 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 }, + .size = 3, + }, +}; + +int dso__test_data(void) +{ + struct machine machine; + struct dso *dso; + char *file = test_file(TEST_FILE_SIZE); + size_t i; + + TEST_ASSERT_VAL("No test file", file); + + memset(&machine, 0, sizeof(machine)); + + dso = dso__new((const char *)file); + + /* Basic 10 bytes tests. */ + for (i = 0; i < ARRAY_SIZE(offsets); i++) { + struct test_data_offset *data = &offsets[i]; + ssize_t size; + u8 buf[10]; + + memset(buf, 0, 10); + size = dso__data_read_offset(dso, &machine, data->offset, + buf, 10); + + TEST_ASSERT_VAL("Wrong size", size == data->size); + TEST_ASSERT_VAL("Wrong data", !memcmp(buf, data->data, 10)); + } + + /* Read cross multiple cache pages. */ + { + ssize_t size; + int c; + u8 *buf; + + buf = malloc(TEST_FILE_SIZE); + TEST_ASSERT_VAL("ENOMEM\n", buf); + + /* First iteration to fill caches, second one to read them. */ + for (c = 0; c < 2; c++) { + memset(buf, 0, TEST_FILE_SIZE); + size = dso__data_read_offset(dso, &machine, 10, + buf, TEST_FILE_SIZE); + + TEST_ASSERT_VAL("Wrong size", + size == (TEST_FILE_SIZE - 10)); + + for (i = 0; i < (size_t)size; i++) + TEST_ASSERT_VAL("Wrong data", + buf[i] == (i % 10)); + } + + free(buf); + } + + dso__delete(dso); + unlink(file); + return 0; +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f74e9560350e..3edfd3483816 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -214,7 +214,7 @@ int perf_evlist__add_tracepoints(struct perf_evlist *evlist, attrs[i].type = PERF_TYPE_TRACEPOINT; attrs[i].config = err; attrs[i].sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | - PERF_SAMPLE_CPU); + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD); attrs[i].sample_period = 1; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5a47aba46759..3a6d20443330 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1212,6 +1212,12 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) attr.exclude_user, attr.exclude_kernel); + fprintf(fp, ", excl_host = %d, excl_guest = %d", + attr.exclude_host, + attr.exclude_guest); + + fprintf(fp, ", precise_ip = %d", attr.precise_ip); + if (nr) fprintf(fp, ", id = {"); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 514e2a4b367d..f247ef2789a4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -708,7 +708,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, bool printed = false; struct rb_node *node; int i = 0; - int ret; + int ret = 0; /* * If have one single callchain root, don't bother printing @@ -747,8 +747,11 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, root = &cnode->rb_root; } - return __callchain__fprintf_graph(fp, root, total_samples, + ret += __callchain__fprintf_graph(fp, root, total_samples, 1, 1, left_margin); + ret += fprintf(fp, "\n"); + + return ret; } static size_t __callchain__fprintf_flat(FILE *fp, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index a1f4e3669142..cc33486ad9e2 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -7,6 +7,8 @@ #include <stdio.h> #include <unistd.h> #include "map.h" +#include "thread.h" +#include "strlist.h" const char *map_type__name[MAP__NR_TYPES] = { [MAP__FUNCTION] = "Functions", @@ -585,7 +587,21 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid) self->kmaps.machine = self; self->pid = pid; self->root_dir = strdup(root_dir); - return self->root_dir == NULL ? -ENOMEM : 0; + if (self->root_dir == NULL) + return -ENOMEM; + + if (pid != HOST_KERNEL_ID) { + struct thread *thread = machine__findnew_thread(self, pid); + char comm[64]; + + if (thread == NULL) + return -ENOMEM; + + snprintf(comm, sizeof(comm), "[guest/%d]", pid); + thread__set_comm(thread, comm); + } + + return 0; } static void dsos__delete(struct list_head *self) @@ -680,7 +696,15 @@ struct machine *machines__findnew(struct rb_root *self, pid_t pid) (symbol_conf.guestmount)) { sprintf(path, "%s/%d", symbol_conf.guestmount, pid); if (access(path, R_OK)) { - pr_err("Can't access file %s\n", path); + static struct strlist *seen; + + if (!seen) + seen = strlist__new(true, NULL); + + if (!strlist__has_entry(seen, path)) { + pr_err("Can't access file %s\n", path); + strlist__add(seen, path); + } machine = NULL; goto out; } @@ -714,3 +738,16 @@ char *machine__mmap_name(struct machine *self, char *bf, size_t size) return bf; } + +void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size) +{ + struct rb_node *node; + struct machine *machine; + + for (node = rb_first(machines); node; node = rb_next(node)) { + machine = rb_entry(node, struct machine, rb_node); + machine->id_hdr_size = id_hdr_size; + } + + return; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index c14c665d9a25..03a1e9b08b21 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -151,6 +151,7 @@ struct machine *machines__add(struct rb_root *self, pid_t pid, struct machine *machines__find_host(struct rb_root *self); struct machine *machines__find(struct rb_root *self, pid_t pid); struct machine *machines__findnew(struct rb_root *self, pid_t pid); +void machines__set_id_hdr_size(struct rb_root *self, u16 id_hdr_size); char *machine__mmap_name(struct machine *self, char *bf, size_t size); int machine__init(struct machine *self, const char *root_dir, pid_t pid); void machine__exit(struct machine *self); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 1aa721d7c10f..74a5af4d33ec 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -377,6 +377,7 @@ static int add_tracepoint(struct list_head **list, int *idx, attr.sample_type |= PERF_SAMPLE_RAW; attr.sample_type |= PERF_SAMPLE_TIME; attr.sample_type |= PERF_SAMPLE_CPU; + attr.sample_type |= PERF_SAMPLE_PERIOD; attr.sample_period = 1; snprintf(name, MAX_NAME_LEN, "%s:%s", sys_name, evt_name); @@ -489,6 +490,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, attr.bp_len = HW_BREAKPOINT_LEN_4; attr.type = PERF_TYPE_BREAKPOINT; + attr.sample_period = 1; return add_event(list, idx, &attr, NULL); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8e485592ca20..8e4f0755d2aa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -87,6 +87,7 @@ void perf_session__update_sample_type(struct perf_session *self) self->sample_id_all = perf_evlist__sample_id_all(self->evlist); self->id_hdr_size = perf_evlist__id_hdr_size(self->evlist); self->host_machine.id_hdr_size = self->id_hdr_size; + machines__set_id_hdr_size(&self->machines, self->id_hdr_size); } int perf_session__create_kernel_maps(struct perf_session *self) @@ -918,7 +919,9 @@ static struct machine * { const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { + if (perf_guest && + ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || + (cpumode == PERF_RECORD_MISC_GUEST_USER))) { u32 pid; if (event->header.type == PERF_RECORD_MMAP) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 50958bbeb26a..fdad4eeeb429 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -29,6 +29,7 @@ #define NT_GNU_BUILD_ID 3 #endif +static void dso_cache__free(struct rb_root *root); static bool dso__build_id_equal(const struct dso *dso, u8 *build_id); static int elf_read_build_id(Elf *elf, void *bf, size_t size); static void dsos__add(struct list_head *head, struct dso *dso); @@ -48,6 +49,31 @@ struct symbol_conf symbol_conf = { .symfs = "", }; +static enum dso_binary_type binary_type_symtab[] = { + DSO_BINARY_TYPE__KALLSYMS, + DSO_BINARY_TYPE__GUEST_KALLSYMS, + DSO_BINARY_TYPE__JAVA_JIT, + DSO_BINARY_TYPE__DEBUGLINK, + DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + DSO_BINARY_TYPE__GUEST_KMODULE, + DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +#define DSO_BINARY_TYPE__SYMTAB_CNT sizeof(binary_type_symtab) + +static enum dso_binary_type binary_type_data[] = { + DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +#define DSO_BINARY_TYPE__DATA_CNT sizeof(binary_type_data) + int dso__name_len(const struct dso *dso) { if (!dso) @@ -318,7 +344,9 @@ struct dso *dso__new(const char *name) dso__set_short_name(dso, dso->name); for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; - dso->symtab_type = SYMTAB__NOT_FOUND; + dso->cache = RB_ROOT; + dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; + dso->data_type = DSO_BINARY_TYPE__NOT_FOUND; dso->loaded = 0; dso->sorted_by_name = 0; dso->has_build_id = 0; @@ -352,6 +380,7 @@ void dso__delete(struct dso *dso) free((char *)dso->short_name); if (dso->lname_alloc) free(dso->long_name); + dso_cache__free(&dso->cache); free(dso); } @@ -806,9 +835,9 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, symbols__fixup_end(&dso->symbols[map->type]); if (dso->kernel == DSO_TYPE_GUEST_KERNEL) - dso->symtab_type = SYMTAB__GUEST_KALLSYMS; + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; else - dso->symtab_type = SYMTAB__KALLSYMS; + dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; return dso__split_kallsyms(dso, map, filter); } @@ -1660,32 +1689,110 @@ out: char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { - [SYMTAB__KALLSYMS] = 'k', - [SYMTAB__JAVA_JIT] = 'j', - [SYMTAB__DEBUGLINK] = 'l', - [SYMTAB__BUILD_ID_CACHE] = 'B', - [SYMTAB__FEDORA_DEBUGINFO] = 'f', - [SYMTAB__UBUNTU_DEBUGINFO] = 'u', - [SYMTAB__BUILDID_DEBUGINFO] = 'b', - [SYMTAB__SYSTEM_PATH_DSO] = 'd', - [SYMTAB__SYSTEM_PATH_KMODULE] = 'K', - [SYMTAB__GUEST_KALLSYMS] = 'g', - [SYMTAB__GUEST_KMODULE] = 'G', + [DSO_BINARY_TYPE__KALLSYMS] = 'k', + [DSO_BINARY_TYPE__JAVA_JIT] = 'j', + [DSO_BINARY_TYPE__DEBUGLINK] = 'l', + [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', + [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f', + [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u', + [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b', + [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd', + [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', + [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', + [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', }; - if (dso == NULL || dso->symtab_type == SYMTAB__NOT_FOUND) + if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND) return '!'; return origin[dso->symtab_type]; } +int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, + char *root_dir, char *file, size_t size) +{ + char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + int ret = 0; + + switch (type) { + case DSO_BINARY_TYPE__DEBUGLINK: { + char *debuglink; + + strncpy(file, dso->long_name, size); + debuglink = file + dso->long_name_len; + while (debuglink != file && *debuglink != '/') + debuglink--; + if (*debuglink == '/') + debuglink++; + filename__read_debuglink(dso->long_name, debuglink, + size - (debuglink - file)); + } + break; + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + /* skip the locally configured cache if a symfs is given */ + if (symbol_conf.symfs[0] || + (dso__build_id_filename(dso, file, size) == NULL)) + ret = -1; + break; + + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + snprintf(file, size, "%s/usr/lib/debug%s.debug", + symbol_conf.symfs, dso->long_name); + break; + + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + snprintf(file, size, "%s/usr/lib/debug%s", + symbol_conf.symfs, dso->long_name); + break; + + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + if (!dso->has_build_id) { + ret = -1; + break; + } + + build_id__sprintf(dso->build_id, + sizeof(dso->build_id), + build_id_hex); + snprintf(file, size, + "%s/usr/lib/debug/.build-id/%.2s/%s.debug", + symbol_conf.symfs, build_id_hex, build_id_hex + 2); + break; + + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + snprintf(file, size, "%s%s", + symbol_conf.symfs, dso->long_name); + break; + + case DSO_BINARY_TYPE__GUEST_KMODULE: + snprintf(file, size, "%s%s%s", symbol_conf.symfs, + root_dir, dso->long_name); + break; + + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + snprintf(file, size, "%s%s", symbol_conf.symfs, + dso->long_name); + break; + + default: + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__JAVA_JIT: + case DSO_BINARY_TYPE__NOT_FOUND: + ret = -1; + break; + } + + return ret; +} + int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) { - int size = PATH_MAX; char *name; int ret = -1; int fd; + u_int i; struct machine *machine; - const char *root_dir; + char *root_dir = (char *) ""; int want_symtab; dso__set_loaded(dso, map->type); @@ -1700,7 +1807,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) else machine = NULL; - name = malloc(size); + name = malloc(PATH_MAX); if (!name) return -1; @@ -1719,81 +1826,27 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) } ret = dso__load_perf_map(dso, map, filter); - dso->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT : - SYMTAB__NOT_FOUND; + dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : + DSO_BINARY_TYPE__NOT_FOUND; return ret; } + if (machine) + root_dir = machine->root_dir; + /* Iterate over candidate debug images. * On the first pass, only load images if they have a full symtab. * Failing that, do a second pass where we accept .dynsym also */ want_symtab = 1; restart: - for (dso->symtab_type = SYMTAB__DEBUGLINK; - dso->symtab_type != SYMTAB__NOT_FOUND; - dso->symtab_type++) { - switch (dso->symtab_type) { - case SYMTAB__DEBUGLINK: { - char *debuglink; - strncpy(name, dso->long_name, size); - debuglink = name + dso->long_name_len; - while (debuglink != name && *debuglink != '/') - debuglink--; - if (*debuglink == '/') - debuglink++; - filename__read_debuglink(dso->long_name, debuglink, - size - (debuglink - name)); - } - break; - case SYMTAB__BUILD_ID_CACHE: - /* skip the locally configured cache if a symfs is given */ - if (symbol_conf.symfs[0] || - (dso__build_id_filename(dso, name, size) == NULL)) { - continue; - } - break; - case SYMTAB__FEDORA_DEBUGINFO: - snprintf(name, size, "%s/usr/lib/debug%s.debug", - symbol_conf.symfs, dso->long_name); - break; - case SYMTAB__UBUNTU_DEBUGINFO: - snprintf(name, size, "%s/usr/lib/debug%s", - symbol_conf.symfs, dso->long_name); - break; - case SYMTAB__BUILDID_DEBUGINFO: { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) { - if (!dso->has_build_id) - continue; + dso->symtab_type = binary_type_symtab[i]; - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), - build_id_hex); - snprintf(name, size, - "%s/usr/lib/debug/.build-id/%.2s/%s.debug", - symbol_conf.symfs, build_id_hex, build_id_hex + 2); - } - break; - case SYMTAB__SYSTEM_PATH_DSO: - snprintf(name, size, "%s%s", - symbol_conf.symfs, dso->long_name); - break; - case SYMTAB__GUEST_KMODULE: - if (map->groups && machine) - root_dir = machine->root_dir; - else - root_dir = ""; - snprintf(name, size, "%s%s%s", symbol_conf.symfs, - root_dir, dso->long_name); - break; - - case SYMTAB__SYSTEM_PATH_KMODULE: - snprintf(name, size, "%s%s", symbol_conf.symfs, - dso->long_name); - break; - default:; - } + if (dso__binary_type_file(dso, dso->symtab_type, + root_dir, name, PATH_MAX)) + continue; /* Name is now the name of the next image to try */ fd = open(name, O_RDONLY); @@ -2010,9 +2063,9 @@ struct map *machine__new_module(struct machine *machine, u64 start, return NULL; if (machine__is_host(machine)) - dso->symtab_type = SYMTAB__SYSTEM_PATH_KMODULE; + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; else - dso->symtab_type = SYMTAB__GUEST_KMODULE; + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; map_groups__insert(&machine->kmaps, map); return map; } @@ -2564,8 +2617,15 @@ int machine__create_kernel_maps(struct machine *machine) __machine__create_kernel_maps(machine, kernel) < 0) return -1; - if (symbol_conf.use_modules && machine__create_modules(machine) < 0) - pr_debug("Problems creating module maps, continuing anyway...\n"); + if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { + if (machine__is_host(machine)) + pr_debug("Problems creating module maps, " + "continuing anyway...\n"); + else + pr_debug("Problems creating module maps for guest %d, " + "continuing anyway...\n", machine->pid); + } + /* * Now that we have all the maps created, just set the ->end of them: */ @@ -2905,3 +2965,218 @@ struct map *dso__new_map(const char *name) return map; } + +static int open_dso(struct dso *dso, struct machine *machine) +{ + char *root_dir = (char *) ""; + char *name; + int fd; + + name = malloc(PATH_MAX); + if (!name) + return -ENOMEM; + + if (machine) + root_dir = machine->root_dir; + + if (dso__binary_type_file(dso, dso->data_type, + root_dir, name, PATH_MAX)) { + free(name); + return -EINVAL; + } + + fd = open(name, O_RDONLY); + free(name); + return fd; +} + +int dso__data_fd(struct dso *dso, struct machine *machine) +{ + int i = 0; + + if (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND) + return open_dso(dso, machine); + + do { + int fd; + + dso->data_type = binary_type_data[i++]; + + fd = open_dso(dso, machine); + if (fd >= 0) + return fd; + + } while (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND); + + return -EINVAL; +} + +static void +dso_cache__free(struct rb_root *root) +{ + struct rb_node *next = rb_first(root); + + while (next) { + struct dso_cache *cache; + + cache = rb_entry(next, struct dso_cache, rb_node); + next = rb_next(&cache->rb_node); + rb_erase(&cache->rb_node, root); + free(cache); + } +} + +static struct dso_cache* +dso_cache__find(struct rb_root *root, u64 offset) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct dso_cache *cache; + + while (*p != NULL) { + u64 end; + + parent = *p; + cache = rb_entry(parent, struct dso_cache, rb_node); + end = cache->offset + DSO__DATA_CACHE_SIZE; + + if (offset < cache->offset) + p = &(*p)->rb_left; + else if (offset >= end) + p = &(*p)->rb_right; + else + return cache; + } + return NULL; +} + +static void +dso_cache__insert(struct rb_root *root, struct dso_cache *new) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct dso_cache *cache; + u64 offset = new->offset; + + while (*p != NULL) { + u64 end; + + parent = *p; + cache = rb_entry(parent, struct dso_cache, rb_node); + end = cache->offset + DSO__DATA_CACHE_SIZE; + + if (offset < cache->offset) + p = &(*p)->rb_left; + else if (offset >= end) + p = &(*p)->rb_right; + } + + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); +} + +static ssize_t +dso_cache__memcpy(struct dso_cache *cache, u64 offset, + u8 *data, u64 size) +{ + u64 cache_offset = offset - cache->offset; + u64 cache_size = min(cache->size - cache_offset, size); + + memcpy(data, cache->data + cache_offset, cache_size); + return cache_size; +} + +static ssize_t +dso_cache__read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + struct dso_cache *cache; + ssize_t ret; + int fd; + + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -1; + + do { + u64 cache_offset; + + ret = -ENOMEM; + + cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); + if (!cache) + break; + + cache_offset = offset & DSO__DATA_CACHE_MASK; + ret = -EINVAL; + + if (-1 == lseek(fd, cache_offset, SEEK_SET)) + break; + + ret = read(fd, cache->data, DSO__DATA_CACHE_SIZE); + if (ret <= 0) + break; + + cache->offset = cache_offset; + cache->size = ret; + dso_cache__insert(&dso->cache, cache); + + ret = dso_cache__memcpy(cache, offset, data, size); + + } while (0); + + if (ret <= 0) + free(cache); + + close(fd); + return ret; +} + +static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + struct dso_cache *cache; + + cache = dso_cache__find(&dso->cache, offset); + if (cache) + return dso_cache__memcpy(cache, offset, data, size); + else + return dso_cache__read(dso, machine, offset, data, size); +} + +ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + ssize_t r = 0; + u8 *p = data; + + do { + ssize_t ret; + + ret = dso_cache_read(dso, machine, offset, p, size); + if (ret < 0) + return ret; + + /* Reached EOF, return what we have. */ + if (!ret) + break; + + BUG_ON(ret > size); + + r += ret; + p += ret; + offset += ret; + size -= ret; + + } while (size); + + return r; +} + +ssize_t dso__data_read_addr(struct dso *dso, struct map *map, + struct machine *machine, u64 addr, + u8 *data, ssize_t size) +{ + u64 offset = map->map_ip(map, addr); + return dso__data_read_offset(dso, machine, offset, data, size); +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a884b99017f0..1fe733a1e21f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -155,6 +155,21 @@ struct addr_location { s32 cpu; }; +enum dso_binary_type { + DSO_BINARY_TYPE__KALLSYMS = 0, + DSO_BINARY_TYPE__GUEST_KALLSYMS, + DSO_BINARY_TYPE__JAVA_JIT, + DSO_BINARY_TYPE__DEBUGLINK, + DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + DSO_BINARY_TYPE__GUEST_KMODULE, + DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__NOT_FOUND, +}; + enum dso_kernel_type { DSO_TYPE_USER = 0, DSO_TYPE_KERNEL, @@ -167,19 +182,31 @@ enum dso_swap_type { DSO_SWAP__YES, }; +#define DSO__DATA_CACHE_SIZE 4096 +#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) + +struct dso_cache { + struct rb_node rb_node; + u64 offset; + u64 size; + char data[0]; +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct rb_root cache; enum dso_kernel_type kernel; enum dso_swap_type needs_swap; + enum dso_binary_type symtab_type; + enum dso_binary_type data_type; u8 adjust_symbols:1; u8 has_build_id:1; u8 hit:1; u8 annotate_warned:1; u8 sname_alloc:1; u8 lname_alloc:1; - unsigned char symtab_type; u8 sorted_by_name; u8 loaded; u8 build_id[BUILD_ID_SIZE]; @@ -253,21 +280,6 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, enum map_type type, FILE *fp); size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); -enum symtab_type { - SYMTAB__KALLSYMS = 0, - SYMTAB__GUEST_KALLSYMS, - SYMTAB__JAVA_JIT, - SYMTAB__DEBUGLINK, - SYMTAB__BUILD_ID_CACHE, - SYMTAB__FEDORA_DEBUGINFO, - SYMTAB__UBUNTU_DEBUGINFO, - SYMTAB__BUILDID_DEBUGINFO, - SYMTAB__SYSTEM_PATH_DSO, - SYMTAB__GUEST_KMODULE, - SYMTAB__SYSTEM_PATH_KMODULE, - SYMTAB__NOT_FOUND, -}; - char dso__symtab_origin(const struct dso *dso); void dso__set_long_name(struct dso *dso, char *name); void dso__set_build_id(struct dso *dso, void *build_id); @@ -304,4 +316,14 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type); size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); +int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, + char *root_dir, char *file, size_t size); + +int dso__data_fd(struct dso *dso, struct machine *machine); +ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size); +ssize_t dso__data_read_addr(struct dso *dso, struct map *map, + struct machine *machine, u64 addr, + u8 *data, ssize_t size); +int dso__test_data(void); #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 1064d5b148ad..3f59c496e64c 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -110,8 +110,17 @@ int perf_target__strerror(struct perf_target *target, int errnum, int idx; const char *msg; + BUG_ON(buflen > 0); + if (errnum >= 0) { - strerror_r(errnum, buf, buflen); + const char *err = strerror_r(errnum, buf, buflen); + + if (err != buf) { + size_t len = strlen(err); + char *c = mempcpy(buf, err, min(buflen - 1, len)); + *c = '\0'; + } + return 0; } |