diff options
240 files changed, 4115 insertions, 1785 deletions
@@ -69,6 +69,7 @@ James Bottomley <jejb@mulgrave.(none)> James Bottomley <jejb@titanic.il.steeleye.com> James E Wilson <wilson@specifix.com> James Ketrenos <jketreno@io.(none)> +Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com> <javier@osg.samsung.com> <javier.martinez@collabora.co.uk> Jean Tourrilhes <jt@hpl.hp.com> Jeff Garzik <jgarzik@pretzel.yyz.us> diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index ccc60324e738..405da11fc3e4 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -61,3 +61,5 @@ stable kernels. | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | +| | | | | +| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt index e774128935d5..ef5fbe9a77c7 100644 --- a/Documentation/devicetree/bindings/arm/arch_timer.txt +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs. - always-on : a boolean property. If present, the timer is powered through an always-on power domain, therefore it never loses context. +- fsl,erratum-a008585 : A boolean property. Indicates the presence of + QorIQ erratum A-008585, which says that reading the counter is + unreliable unless the same value is returned by back-to-back reads. + This also affects writes to the tval register, due to the implicit + counter read. + ** Optional properties: - arm,cpu-registers-not-fw-configured : Firmware does not initialize diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt new file mode 100644 index 000000000000..f2233138eba9 --- /dev/null +++ b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt @@ -0,0 +1,19 @@ + +* Rockchip rk3399 DFI device + +Required properties: +- compatible: Must be "rockchip,rk3399-dfi". +- reg: physical base address of each DFI and length of memory mapped region +- rockchip,pmu: phandle to the syscon managing the "pmu general register files" +- clocks: phandles for clock specified in "clock-names" property +- clock-names : the name of clock used by the DFI, must be "pclk_ddr_mon"; + +Example: + dfi: dfi@0xff630000 { + compatible = "rockchip,rk3399-dfi"; + reg = <0x00 0xff630000 0x00 0x4000>; + rockchip,pmu = <&pmugrf>; + clocks = <&cru PCLK_DDR_MON>; + clock-names = "pclk_ddr_mon"; + status = "disabled"; + }; diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt new file mode 100644 index 000000000000..7a9e8603c150 --- /dev/null +++ b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt @@ -0,0 +1,209 @@ +* Rockchip rk3399 DMC(Dynamic Memory Controller) device + +Required properties: +- compatible: Must be "rockchip,rk3399-dmc". +- devfreq-events: Node to get DDR loading, Refer to + Documentation/devicetree/bindings/devfreq/ + rockchip-dfi.txt +- interrupts: The interrupt number to the CPU. The interrupt + specifier format depends on the interrupt controller. + It should be DCF interrupts, when DDR dvfs finish, + it will happen. +- clocks: Phandles for clock specified in "clock-names" property +- clock-names : The name of clock used by the DFI, must be + "pclk_ddr_mon"; +- operating-points-v2: Refer to Documentation/devicetree/bindings/power/opp.txt + for details. +- center-supply: DMC supply node. +- status: Marks the node enabled/disabled. + +Following properties are ddr timing: + +- rockchip,dram_speed_bin : Value reference include/dt-bindings/clock/ddr.h, + it select ddr3 cl-trp-trcd type, default value + "DDR3_DEFAULT".it must selected according to + "Speed Bin" in ddr3 datasheet, DO NOT use + smaller "Speed Bin" than ddr3 exactly is. + +- rockchip,pd_idle : Config the PD_IDLE value, defined the power-down + idle period, memories are places into power-down + mode if bus is idle for PD_IDLE DFI clocks. + +- rockchip,sr_idle : Configure the SR_IDLE value, defined the + selfrefresh idle period, memories are places + into self-refresh mode if bus is idle for + SR_IDLE*1024 DFI clocks (DFI clocks freq is + half of dram's clocks), defaule value is "0". + +- rockchip,sr_mc_gate_idle : Defined the self-refresh with memory and + controller clock gating idle period, memories + are places into self-refresh mode and memory + controller clock arg gating if bus is idle for + sr_mc_gate_idle*1024 DFI clocks. + +- rockchip,srpd_lite_idle : Defined the self-refresh power down idle + period, memories are places into self-refresh + power down mode if bus is idle for + srpd_lite_idle*1024 DFI clocks. This parameter + is for LPDDR4 only. + +- rockchip,standby_idle : Defined the standby idle period, memories are + places into self-refresh than controller, pi, + phy and dram clock will gating if bus is idle + for standby_idle * DFI clocks. + +- rockchip,dram_dll_disb_freq : It's defined the DDR3 dll bypass frequency in + MHz, when ddr freq less than DRAM_DLL_DISB_FREQ, + ddr3 dll will bypssed note: if dll was bypassed, + the odt also stop working. + +- rockchip,phy_dll_disb_freq : Defined the PHY dll bypass frequency in + MHz (Mega Hz), when ddr freq less than + DRAM_DLL_DISB_FREQ, phy dll will bypssed. + note: phy dll and phy odt are independent. + +- rockchip,ddr3_odt_disb_freq : When dram type is DDR3, this parameter defined + the odt disable frequency in MHz (Mega Hz), + when ddr frequency less then ddr3_odt_disb_freq, + the odt on dram side and controller side are + both disabled. + +- rockchip,ddr3_drv : When dram type is DDR3, this parameter define + the dram side driver stength in ohm, default + value is DDR3_DS_40ohm. + +- rockchip,ddr3_odt : When dram type is DDR3, this parameter define + the dram side ODT stength in ohm, default value + is DDR3_ODT_120ohm. + +- rockchip,phy_ddr3_ca_drv : When dram type is DDR3, this parameter define + the phy side CA line(incluing command line, + address line and clock line) driver strength. + Default value is PHY_DRV_ODT_40. + +- rockchip,phy_ddr3_dq_drv : When dram type is DDR3, this parameter define + the phy side DQ line(incluing DQS/DQ/DM line) + driver strength. default value is PHY_DRV_ODT_40. + +- rockchip,phy_ddr3_odt : When dram type is DDR3, this parameter define the + phy side odt strength, default value is + PHY_DRV_ODT_240. + +- rockchip,lpddr3_odt_disb_freq : When dram type is LPDDR3, this parameter defined + then odt disable frequency in MHz (Mega Hz), + when ddr frequency less then ddr3_odt_disb_freq, + the odt on dram side and controller side are + both disabled. + +- rockchip,lpddr3_drv : When dram type is LPDDR3, this parameter define + the dram side driver stength in ohm, default + value is LP3_DS_34ohm. + +- rockchip,lpddr3_odt : When dram type is LPDDR3, this parameter define + the dram side ODT stength in ohm, default value + is LP3_ODT_240ohm. + +- rockchip,phy_lpddr3_ca_drv : When dram type is LPDDR3, this parameter define + the phy side CA line(incluing command line, + address line and clock line) driver strength. + default value is PHY_DRV_ODT_40. + +- rockchip,phy_lpddr3_dq_drv : When dram type is LPDDR3, this parameter define + the phy side DQ line(incluing DQS/DQ/DM line) + driver strength. default value is + PHY_DRV_ODT_40. + +- rockchip,phy_lpddr3_odt : When dram type is LPDDR3, this parameter define + the phy side odt strength, default value is + PHY_DRV_ODT_240. + +- rockchip,lpddr4_odt_disb_freq : When dram type is LPDDR4, this parameter + defined the odt disable frequency in + MHz (Mega Hz), when ddr frequency less then + ddr3_odt_disb_freq, the odt on dram side and + controller side are both disabled. + +- rockchip,lpddr4_drv : When dram type is LPDDR4, this parameter define + the dram side driver stength in ohm, default + value is LP4_PDDS_60ohm. + +- rockchip,lpddr4_dq_odt : When dram type is LPDDR4, this parameter define + the dram side ODT on dqs/dq line stength in ohm, + default value is LP4_DQ_ODT_40ohm. + +- rockchip,lpddr4_ca_odt : When dram type is LPDDR4, this parameter define + the dram side ODT on ca line stength in ohm, + default value is LP4_CA_ODT_40ohm. + +- rockchip,phy_lpddr4_ca_drv : When dram type is LPDDR4, this parameter define + the phy side CA line(incluing command address + line) driver strength. default value is + PHY_DRV_ODT_40. + +- rockchip,phy_lpddr4_ck_cs_drv : When dram type is LPDDR4, this parameter define + the phy side clock line and cs line driver + strength. default value is PHY_DRV_ODT_80. + +- rockchip,phy_lpddr4_dq_drv : When dram type is LPDDR4, this parameter define + the phy side DQ line(incluing DQS/DQ/DM line) + driver strength. default value is PHY_DRV_ODT_80. + +- rockchip,phy_lpddr4_odt : When dram type is LPDDR4, this parameter define + the phy side odt strength, default value is + PHY_DRV_ODT_60. + +Example: + dmc_opp_table: dmc_opp_table { + compatible = "operating-points-v2"; + + opp00 { + opp-hz = /bits/ 64 <300000000>; + opp-microvolt = <900000>; + }; + opp01 { + opp-hz = /bits/ 64 <666000000>; + opp-microvolt = <900000>; + }; + }; + + dmc: dmc { + compatible = "rockchip,rk3399-dmc"; + devfreq-events = <&dfi>; + interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cru SCLK_DDRCLK>; + clock-names = "dmc_clk"; + operating-points-v2 = <&dmc_opp_table>; + center-supply = <&ppvar_centerlogic>; + upthreshold = <15>; + downdifferential = <10>; + rockchip,ddr3_speed_bin = <21>; + rockchip,pd_idle = <0x40>; + rockchip,sr_idle = <0x2>; + rockchip,sr_mc_gate_idle = <0x3>; + rockchip,srpd_lite_idle = <0x4>; + rockchip,standby_idle = <0x2000>; + rockchip,dram_dll_dis_freq = <300>; + rockchip,phy_dll_dis_freq = <125>; + rockchip,auto_pd_dis_freq = <666>; + rockchip,ddr3_odt_dis_freq = <333>; + rockchip,ddr3_drv = <DDR3_DS_40ohm>; + rockchip,ddr3_odt = <DDR3_ODT_120ohm>; + rockchip,phy_ddr3_ca_drv = <PHY_DRV_ODT_40>; + rockchip,phy_ddr3_dq_drv = <PHY_DRV_ODT_40>; + rockchip,phy_ddr3_odt = <PHY_DRV_ODT_240>; + rockchip,lpddr3_odt_dis_freq = <333>; + rockchip,lpddr3_drv = <LP3_DS_34ohm>; + rockchip,lpddr3_odt = <LP3_ODT_240ohm>; + rockchip,phy_lpddr3_ca_drv = <PHY_DRV_ODT_40>; + rockchip,phy_lpddr3_dq_drv = <PHY_DRV_ODT_40>; + rockchip,phy_lpddr3_odt = <PHY_DRV_ODT_240>; + rockchip,lpddr4_odt_dis_freq = <333>; + rockchip,lpddr4_drv = <LP4_PDDS_60ohm>; + rockchip,lpddr4_dq_odt = <LP4_DQ_ODT_40ohm>; + rockchip,lpddr4_ca_odt = <LP4_CA_ODT_40ohm>; + rockchip,phy_lpddr4_ca_drv = <PHY_DRV_ODT_40>; + rockchip,phy_lpddr4_ck_cs_drv = <PHY_DRV_ODT_80>; + rockchip,phy_lpddr4_dq_drv = <PHY_DRV_ODT_80>; + rockchip,phy_lpddr4_odt = <PHY_DRV_ODT_60>; + status = "disabled"; + }; diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a4f4d693e2c1..25037ded3683 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -698,6 +698,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. loops can be debugged more effectively on production systems. + clocksource.arm_arch_timer.fsl-a008585= + [ARM64] + Format: <bool> + Enable/disable the workaround of Freescale/NXP + erratum A-008585. This can be useful for KVM + guests, if the guest device tree doesn't show the + erratum. If unspecified, the workaround is + enabled based on the device tree. + clearcpuid=BITNUM [X86] Disable CPUID feature X for the kernel. See arch/x86/include/asm/cpufeatures.h for the valid bit diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index 477927becacb..ea8d7b4e53f0 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -15,6 +15,8 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); +DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); +DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); static_branch_likely() static_branch_unlikely() @@ -140,6 +142,13 @@ static_branch_inc(), will change the branch back to true. Likewise, if the key is initialized false, a 'static_branch_inc()', will change the branch to true. And then a 'static_branch_dec()', will again make the branch false. +Where an array of keys is required, it can be defined as: + + DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); + +or: + + DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); 4) Architecture level code patching interface, 'jump labels' diff --git a/MAINTAINERS b/MAINTAINERS index 01bff8ea28d8..c8926c1acd22 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -913,15 +913,17 @@ F: arch/arm/include/asm/floppy.h ARM PMU PROFILING AND DEBUGGING M: Will Deacon <will.deacon@arm.com> -R: Mark Rutland <mark.rutland@arm.com> +M: Mark Rutland <mark.rutland@arm.com> S: Maintained +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) F: arch/arm*/kernel/perf_* F: arch/arm/oprofile/common.c F: arch/arm*/kernel/hw_breakpoint.c F: arch/arm*/include/asm/hw_breakpoint.h F: arch/arm*/include/asm/perf_event.h -F: drivers/perf/arm_pmu.c +F: drivers/perf/* F: include/linux/perf/arm_pmu.h +F: Documentation/devicetree/bindings/arm/pmu.txt ARM PORT M: Russell King <linux@armlinux.org.uk> @@ -3282,6 +3284,7 @@ L: linux-pm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) +F: Documentation/cpu-freq/ F: drivers/cpufreq/ F: include/linux/cpufreq.h @@ -8745,7 +8748,7 @@ F: drivers/oprofile/ F: include/linux/oprofile.h ORACLE CLUSTER FILESYSTEM 2 (OCFS2) -M: Mark Fasheh <mfasheh@suse.com> +M: Mark Fasheh <mfasheh@versity.com> M: Joel Becker <jlbec@evilplan.org> L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) W: http://ocfs2.wiki.kernel.org @@ -11626,7 +11629,7 @@ F: Documentation/devicetree/bindings/thermal/ THERMAL/CPU_COOLING M: Amit Daniel Kachhap <amit.kachhap@gmail.com> M: Viresh Kumar <viresh.kumar@linaro.org> -M: Javi Merino <javi.merino@arm.com> +M: Javi Merino <javi.merino@kernel.org> L: linux-pm@vger.kernel.org S: Supported F: Documentation/thermal/cpu-cooling-api.txt @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Psychotic Stoned Sheep # *DOCUMENTATION* diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a9c4e48bb7ec..b2113c24850c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1,6 +1,7 @@ config ARM bool default y + select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index af11c2f8f3b7..fc6d541549a2 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -779,7 +779,7 @@ __armv7_mmu_cache_on: orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client bic r6, r6, #1 << 31 @ 32-bit translation system - bic r6, r6, #3 << 0 @ use only ttbr0 + bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer mcrne p15, 0, r1, c3, c0, 0 @ load domain access control mcrne p15, 0, r6, c2, c0, 2 @ load ttb control diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 01986deef7c5..36cc7cc012f9 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -28,7 +28,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_CPUFREQ_DT=y CONFIG_CPU_IDLE=y CONFIG_ARM_EXYNOS_CPUIDLE=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index ea3566fb92e2..58459105cadc 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -135,7 +135,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_QORIQ_CPUFREQ=y CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y diff --git a/arch/arm/include/asm/clocksource.h b/arch/arm/include/asm/clocksource.h new file mode 100644 index 000000000000..0b350a7e26f3 --- /dev/null +++ b/arch/arm/include/asm/clocksource.h @@ -0,0 +1,8 @@ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +struct arch_clocksource_data { + bool vdso_direct; /* Usable for direct VDSO access? */ +}; + +#endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index d009f7911ffc..bf02dbd9ccda 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -111,7 +111,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) /* The ARM override for dma_max_pfn() */ static inline unsigned long dma_max_pfn(struct device *dev) { - return PHYS_PFN_OFFSET + dma_to_pfn(dev, *dev->dma_mask); + return dma_to_pfn(dev, *dev->dma_mask); } #define dma_max_pfn(dev) dma_max_pfn(dev) diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 40ecd5f514a2..f676febbb270 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -88,6 +88,8 @@ void __init arm_dt_init_cpu_maps(void) return; for_each_child_of_node(cpus, cpu) { + const __be32 *cell; + int prop_bytes; u32 hwid; if (of_node_cmp(cpu->type, "cpu")) @@ -99,7 +101,8 @@ void __init arm_dt_init_cpu_maps(void) * properties is considered invalid to build the * cpu_logical_map. */ - if (of_property_read_u32(cpu, "reg", &hwid)) { + cell = of_get_property(cpu, "reg", &prop_bytes); + if (!cell || prop_bytes < sizeof(*cell)) { pr_debug(" * %s missing reg property\n", cpu->full_name); of_node_put(cpu); @@ -107,10 +110,15 @@ void __init arm_dt_init_cpu_maps(void) } /* - * 8 MSBs must be set to 0 in the DT since the reg property + * Bits n:24 must be set to 0 in the DT since the reg property * defines the MPIDR[23:0]. */ - if (hwid & ~MPIDR_HWID_BITMASK) { + do { + hwid = be32_to_cpu(*cell++); + prop_bytes -= sizeof(*cell); + } while (!hwid && prop_bytes > 0); + + if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) { of_node_put(cpu); return; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 15063851cd10..b9423491b9d7 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -596,12 +596,6 @@ static struct attribute_group armv7_pmuv1_events_attr_group = { .attrs = armv7_pmuv1_event_attrs, }; -static const struct attribute_group *armv7_pmuv1_attr_groups[] = { - &armv7_pmuv1_events_attr_group, - &armv7_pmu_format_attr_group, - NULL, -}; - ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); @@ -653,12 +647,6 @@ static struct attribute_group armv7_pmuv2_events_attr_group = { .attrs = armv7_pmuv2_event_attrs, }; -static const struct attribute_group *armv7_pmuv2_attr_groups[] = { - &armv7_pmuv2_events_attr_group, - &armv7_pmu_format_attr_group, - NULL, -}; - /* * Perf Events' indices */ @@ -1208,7 +1196,10 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1217,7 +1208,10 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1226,7 +1220,10 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1236,7 +1233,10 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1246,7 +1246,10 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1256,7 +1259,10 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1264,7 +1270,10 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return ret; } diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 994e971a8538..a0affd14086a 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -270,7 +270,7 @@ static bool tk_is_cntvct(const struct timekeeper *tk) if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) return false; - if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0) + if (!tk->tkr_mono.clock->archdata.vdso_direct) return false; return true; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bc3f00f586f1..17c14a1d9112 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,6 +4,7 @@ config ARM64 select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI + select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -102,10 +103,8 @@ config ARM64 select NO_BOOTMEM select OF select OF_EARLY_FLATTREE - select OF_NUMA if NUMA && OF select OF_RESERVED_MEM select PCI_ECAM if ACPI - select PERF_USE_VMALLOC select POWER_RESET select POWER_SUPPLY select SPARSE_IRQ @@ -122,6 +121,9 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y +config DEBUG_RODATA + def_bool y + config ARM64_PAGE_SHIFT int default 16 if ARM64_64K_PAGES @@ -415,18 +417,13 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" - depends on MODULES default y - select ARM64_MODULE_CMODEL_LARGE + select ARM64_MODULE_CMODEL_LARGE if MODULES help - This option builds kernel modules using the large memory model in - order to avoid the use of the ADRP instruction, which can cause - a subsequent memory access to use an incorrect address on Cortex-A53 - parts up to r0p4. - - Note that the kernel itself must be linked with a version of ld - which fixes potentially affected ADRP instructions through the - use of veneers. + This option links the kernel with '--fix-cortex-a53-843419' and + builds modules using the large memory model in order to avoid the use + of the ADRP instruction, which can cause a subsequent memory access + to use an incorrect address on Cortex-A53 parts up to r0p4. If unsure, say Y. @@ -582,7 +579,8 @@ config HOTPLUG_CPU # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support" - depends on SMP + select ACPI_NUMA if ACPI + select OF_NUMA help Enable NUMA (Non Uniform Memory Access) support. @@ -603,11 +601,18 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + source kernel/Kconfig.preempt source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC - depends on !HIBERNATION def_bool y config ARCH_HAS_HOLES_MEMORYMODEL diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 0cc758cdd0dc..b661fe742615 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -49,16 +49,6 @@ config DEBUG_SET_MODULE_RONX If in doubt, say Y. -config DEBUG_RODATA - bool "Make kernel text and rodata read-only" - default y - help - If this is set, kernel text and rodata will be made read-only. This - is to help catch accidental or malicious attempts to change the - kernel's executable code. - - If in doubt, say Y. - config DEBUG_ALIGN_RODATA depends on DEBUG_RODATA bool "Align linker sections up to SECTION_SIZE" diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index be5d824ebdba..4a4318bc219a 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -159,7 +159,6 @@ config ARCH_TEGRA select CLKSRC_MMIO select CLKSRC_OF select GENERIC_CLOCKEVENTS - select HAVE_CLK select PINCTRL select RESET_CONTROLLER help diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5b54f8c021d8..ab51aed6b6c1 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -18,6 +18,14 @@ ifneq ($(CONFIG_RELOCATABLE),) LDFLAGS_vmlinux += -pie -Bsymbolic endif +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifeq ($(call ld-option, --fix-cortex-a53-843419),) +$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) + else +LDFLAGS_vmlinux += --fix-cortex-a53-843419 + endif +endif + KBUILD_DEFCONFIG := defconfig # Check for binutils support for specific extensions @@ -38,10 +46,12 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian AS += -EB LD += -EB +UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian AS += -EL LD += -EL +UTS_MACHINE := aarch64 endif CHECKFLAGS += -D__aarch64__ diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index f43d2c44c765..44e1d7f10add 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h @@ -10,7 +9,6 @@ generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += errno.h -generic-y += ftrace.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -27,12 +25,10 @@ generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h generic-y += mutex.h -generic-y += pci.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += sections.h generic-y += segment.h generic-y += sembuf.h generic-y += serial.h @@ -45,7 +41,6 @@ generic-y += swab.h generic-y += switch_to.h generic-y += termbits.h generic-y += termios.h -generic-y += topology.h generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 5420cb0fcb3e..e517088d635f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,7 +12,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H -#include <linux/mm.h> +#include <linux/memblock.h> #include <linux/psci.h> #include <asm/cputype.h> @@ -32,7 +32,11 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - if (!page_is_ram(phys >> PAGE_SHIFT)) + /* + * EFI's reserve_regions() call adds memory with the WB attribute + * to memblock via early_init_dt_add_memory_arch(). + */ + if (!memblock_is_memory(phys)) return ioremap(phys, size); return ioremap_cache(phys, size); diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 8746ff6abd77..55101bd86b98 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -2,6 +2,7 @@ #define __ASM_ALTERNATIVE_H #include <asm/cpufeature.h> +#include <asm/insn.h> #ifndef __ASSEMBLY__ @@ -90,34 +91,55 @@ void apply_alternatives(void *start, size_t length); .endm /* - * Begin an alternative code sequence. + * Alternative sequences + * + * The code for the case where the capability is not present will be + * assembled and linked as normal. There are no restrictions on this + * code. + * + * The code for the case where the capability is present will be + * assembled into a special section to be used for dynamic patching. + * Code for that case must: + * + * 1. Be exactly the same length (in bytes) as the default code + * sequence. * - * The code that follows this macro will be assembled and linked as - * normal. There are no restrictions on this code. + * 2. Not contain a branch target that is used outside of the + * alternative sequence it is defined in (branches into an + * alternative sequence are not fixed up). + */ + +/* + * Begin an alternative code sequence. */ .macro alternative_if_not cap + .set .Lasm_alt_mode, 0 .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: .endm +.macro alternative_if cap + .set .Lasm_alt_mode, 1 + .pushsection .altinstructions, "a" + altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f + .popsection + .pushsection .altinstr_replacement, "ax" + .align 2 /* So GAS knows label 661 is suitably aligned */ +661: +.endm + /* - * Provide the alternative code sequence. - * - * The code that follows this macro is assembled into a special - * section to be used for dynamic patching. Code that follows this - * macro must: - * - * 1. Be exactly the same length (in bytes) as the default code - * sequence. - * - * 2. Not contain a branch target that is used outside of the - * alternative sequence it is defined in (branches into an - * alternative sequence are not fixed up). + * Provide the other half of the alternative code sequence. */ .macro alternative_else -662: .pushsection .altinstr_replacement, "ax" +662: + .if .Lasm_alt_mode==0 + .pushsection .altinstr_replacement, "ax" + .else + .popsection + .endif 663: .endm @@ -125,11 +147,25 @@ void apply_alternatives(void *start, size_t length); * Complete an alternative code sequence. */ .macro alternative_endif -664: .popsection +664: + .if .Lasm_alt_mode==0 + .popsection + .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) .endm +/* + * Provides a trivial alternative or default sequence consisting solely + * of NOPs. The number of NOPs is chosen automatically to match the + * previous case. + */ +.macro alternative_else_nop_endif +alternative_else + nops (662b-661b) / AARCH64_INSN_SIZE +alternative_endif +.endm + #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index fbe0ca31a99c..eaa5bbe3fa87 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -20,13 +20,55 @@ #define __ASM_ARCH_TIMER_H #include <asm/barrier.h> +#include <asm/sysreg.h> #include <linux/bug.h> #include <linux/init.h> +#include <linux/jump_label.h> #include <linux/types.h> #include <clocksource/arm_arch_timer.h> +#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585) +extern struct static_key_false arch_timer_read_ool_enabled; +#define needs_fsl_a008585_workaround() \ + static_branch_unlikely(&arch_timer_read_ool_enabled) +#else +#define needs_fsl_a008585_workaround() false +#endif + +u32 __fsl_a008585_read_cntp_tval_el0(void); +u32 __fsl_a008585_read_cntv_tval_el0(void); +u64 __fsl_a008585_read_cntvct_el0(void); + +/* + * The number of retries is an arbitrary value well beyond the highest number + * of iterations the loop has been observed to take. + */ +#define __fsl_a008585_read_reg(reg) ({ \ + u64 _old, _new; \ + int _retries = 200; \ + \ + do { \ + _old = read_sysreg(reg); \ + _new = read_sysreg(reg); \ + _retries--; \ + } while (unlikely(_old != _new) && _retries); \ + \ + WARN_ON_ONCE(!_retries); \ + _new; \ +}) + +#define arch_timer_reg_read_stable(reg) \ +({ \ + u64 _val; \ + if (needs_fsl_a008585_workaround()) \ + _val = __fsl_a008585_read_##reg(); \ + else \ + _val = read_sysreg(reg); \ + _val; \ +}) + /* * These register accessors are marked inline so the compiler can * nicely work out which register we want, and chuck away the rest of @@ -38,19 +80,19 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + write_sysreg(val, cntp_ctl_el0); break; case ARCH_TIMER_REG_TVAL: - asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); + write_sysreg(val, cntp_tval_el0); break; } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); + write_sysreg(val, cntv_ctl_el0); break; case ARCH_TIMER_REG_TVAL: - asm volatile("msr cntv_tval_el0, %0" : : "r" (val)); + write_sysreg(val, cntv_tval_el0); break; } } @@ -61,48 +103,38 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) static __always_inline u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { - u32 val; - if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); - break; + return read_sysreg(cntp_ctl_el0); case ARCH_TIMER_REG_TVAL: - asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); - break; + return arch_timer_reg_read_stable(cntp_tval_el0); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val)); - break; + return read_sysreg(cntv_ctl_el0); case ARCH_TIMER_REG_TVAL: - asm volatile("mrs %0, cntv_tval_el0" : "=r" (val)); - break; + return arch_timer_reg_read_stable(cntv_tval_el0); } } - return val; + BUG(); } static inline u32 arch_timer_get_cntfrq(void) { - u32 val; - asm volatile("mrs %0, cntfrq_el0" : "=r" (val)); - return val; + return read_sysreg(cntfrq_el0); } static inline u32 arch_timer_get_cntkctl(void) { - u32 cntkctl; - asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); - return cntkctl; + return read_sysreg(cntkctl_el1); } static inline void arch_timer_set_cntkctl(u32 cntkctl) { - asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); + write_sysreg(cntkctl, cntkctl_el1); } static inline u64 arch_counter_get_cntpct(void) @@ -116,12 +148,8 @@ static inline u64 arch_counter_get_cntpct(void) static inline u64 arch_counter_get_cntvct(void) { - u64 cval; - isb(); - asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); - - return cval; + return arch_timer_reg_read_stable(cntvct_el0); } static inline int arch_timer_arch_init(void) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d5025c69ca81..28bfe6132eb6 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -87,6 +87,15 @@ .endm /* + * NOP sequence + */ + .macro nops, num + .rept \num + nop + .endr + .endm + +/* * Emit an entry into the exception table */ .macro _asm_extable, from, to @@ -216,11 +225,26 @@ lr .req x30 // link register .macro mmid, rd, rn ldr \rd, [\rn, #MM_CONTEXT_ID] .endm +/* + * read_ctr - read CTR_EL0. If the system has mismatched + * cache line sizes, provide the system wide safe value + * from arm64_ftr_reg_ctrel0.sys_val + */ + .macro read_ctr, reg +alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE + mrs \reg, ctr_el0 // read CTR + nop +alternative_else + ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL +alternative_endif + .endm + /* - * dcache_line_size - get the minimum D-cache line size from the CTR register. + * raw_dcache_line_size - get the minimum D-cache line size on this CPU + * from the CTR register. */ - .macro dcache_line_size, reg, tmp + .macro raw_dcache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR ubfm \tmp, \tmp, #16, #19 // cache line size encoding mov \reg, #4 // bytes per word @@ -228,9 +252,20 @@ lr .req x30 // link register .endm /* - * icache_line_size - get the minimum I-cache line size from the CTR register. + * dcache_line_size - get the safe D-cache line size across all CPUs */ - .macro icache_line_size, reg, tmp + .macro dcache_line_size, reg, tmp + read_ctr \tmp + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * raw_icache_line_size - get the minimum I-cache line size on this CPU + * from the CTR register. + */ + .macro raw_icache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR and \tmp, \tmp, #0xf // cache line size encoding mov \reg, #4 // bytes per word @@ -238,6 +273,16 @@ lr .req x30 // link register .endm /* + * icache_line_size - get the safe I-cache line size across all CPUs + */ + .macro icache_line_size, reg, tmp + read_ctr \tmp + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map */ .macro tcr_set_idmap_t0sz, valreg, tmpreg diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index b5890be8f257..7457ce082b5f 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -86,8 +86,8 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(add_return##name), \ + __LL_SC_ATOMIC(add_return##name) \ + __nops(1), \ /* LSE atomics */ \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ @@ -112,8 +112,8 @@ static inline void atomic_and(int i, atomic_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(and), + __LL_SC_ATOMIC(and) + __nops(1), /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") @@ -130,8 +130,8 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(fetch_and##name), \ + __LL_SC_ATOMIC(fetch_and##name) \ + __nops(1), \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ @@ -156,8 +156,8 @@ static inline void atomic_sub(int i, atomic_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(sub), + __LL_SC_ATOMIC(sub) + __nops(1), /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") @@ -174,9 +174,8 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ __LL_SC_ATOMIC(sub_return##name) \ - " nop", \ + __nops(2), \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ @@ -203,8 +202,8 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(fetch_sub##name), \ + __LL_SC_ATOMIC(fetch_sub##name) \ + __nops(1), \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ @@ -284,8 +283,8 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(add_return##name), \ + __LL_SC_ATOMIC64(add_return##name) \ + __nops(1), \ /* LSE atomics */ \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ @@ -310,8 +309,8 @@ static inline void atomic64_and(long i, atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(and), + __LL_SC_ATOMIC64(and) + __nops(1), /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") @@ -328,8 +327,8 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(fetch_and##name), \ + __LL_SC_ATOMIC64(fetch_and##name) \ + __nops(1), \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ @@ -354,8 +353,8 @@ static inline void atomic64_sub(long i, atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(sub), + __LL_SC_ATOMIC64(sub) + __nops(1), /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") @@ -372,9 +371,8 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ __LL_SC_ATOMIC64(sub_return##name) \ - " nop", \ + __nops(2), \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ @@ -401,8 +399,8 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(fetch_sub##name), \ + __LL_SC_ATOMIC64(fetch_sub##name) \ + __nops(1), \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ @@ -426,13 +424,8 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" __LL_SC_ATOMIC64(dec_if_positive) - " nop\n" - " nop\n" - " nop\n" - " nop\n" - " nop", + __nops(6), /* LSE atomics */ "1: ldr x30, %[v]\n" " subs %[ret], x30, #1\n" @@ -464,9 +457,8 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_CMPXCHG(name) \ - " nop", \ + __LL_SC_CMPXCHG(name) \ + __nops(2), \ /* LSE atomics */ \ " mov " #w "30, %" #w "[old]\n" \ " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \ @@ -517,10 +509,8 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - " nop\n" \ - " nop\n" \ - __LL_SC_CMPXCHG_DBL(name), \ + __LL_SC_CMPXCHG_DBL(name) \ + __nops(3), \ /* LSE atomics */ \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ " eor %[old1], %[old1], %[oldval1]\n" \ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 4eea7f618dce..4e0497f581a0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -20,6 +20,9 @@ #ifndef __ASSEMBLY__ +#define __nops(n) ".rept " #n "\nnop\n.endr\n" +#define nops(n) asm volatile(__nops(n)) + #define sev() asm volatile("sev" : : : "memory") #define wfe() asm volatile("wfe" : : : "memory") #define wfi() asm volatile("wfi" : : : "memory") diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index c64268dbff64..2e5fb976a572 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); @@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma, */ extern void __dma_map_area(const void *, size_t, int); extern void __dma_unmap_area(const void *, size_t, int); -extern void __dma_flush_range(const void *, const void *); +extern void __dma_flush_area(const void *, size_t); /* * Copy user data from/to a page which is mapped into a different diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h new file mode 100644 index 000000000000..0b350a7e26f3 --- /dev/null +++ b/arch/arm64/include/asm/clocksource.h @@ -0,0 +1,8 @@ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +struct arch_clocksource_data { + bool vdso_direct; /* Usable for direct VDSO access? */ +}; + +#endif diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index bd86a79491bc..91b26d26af8a 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -43,10 +43,8 @@ static inline unsigned long __xchg_case_##name(unsigned long x, \ " cbnz %w1, 1b\n" \ " " #mb, \ /* LSE atomics */ \ - " nop\n" \ - " nop\n" \ " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ - " nop\n" \ + __nops(3) \ " " #nop_lse) \ : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ : "r" (x) \ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 7099f26e3702..758d74fedfad 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -9,6 +9,8 @@ #ifndef __ASM_CPUFEATURE_H #define __ASM_CPUFEATURE_H +#include <linux/jump_label.h> + #include <asm/hwcap.h> #include <asm/sysreg.h> @@ -37,8 +39,9 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HYP_OFFSET_LOW 14 +#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 -#define ARM64_NCAPS 15 +#define ARM64_NCAPS 16 #ifndef __ASSEMBLY__ @@ -63,7 +66,7 @@ struct arm64_ftr_bits { enum ftr_type type; u8 shift; u8 width; - s64 safe_val; /* safe value for discrete features */ + s64 safe_val; /* safe value for FTR_EXACT features */ }; /* @@ -72,13 +75,14 @@ struct arm64_ftr_bits { * @sys_val Safe value across the CPUs (system view) */ struct arm64_ftr_reg { - u32 sys_id; - const char *name; - u64 strict_mask; - u64 sys_val; - struct arm64_ftr_bits *ftr_bits; + const char *name; + u64 strict_mask; + u64 sys_val; + const struct arm64_ftr_bits *ftr_bits; }; +extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; + /* scope of capability check */ enum { SCOPE_SYSTEM, @@ -109,6 +113,7 @@ struct arm64_cpu_capabilities { }; extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; bool this_cpu_has_cap(unsigned int cap); @@ -121,16 +126,21 @@ static inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - return test_bit(num, cpu_hwcaps); + if (__builtin_constant_p(num)) + return static_branch_unlikely(&cpu_hwcap_keys[num]); + else + return test_bit(num, cpu_hwcaps); } static inline void cpus_set_cap(unsigned int num) { - if (num >= ARM64_NCAPS) + if (num >= ARM64_NCAPS) { pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", num, ARM64_NCAPS); - else + } else { __set_bit(num, cpu_hwcaps); + static_branch_enable(&cpu_hwcap_keys[num]); + } } static inline int __attribute_const__ @@ -157,7 +167,7 @@ cpuid_feature_extract_unsigned_field(u64 features, int field) return cpuid_feature_extract_unsigned_field_width(features, field, 4); } -static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) +static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); } @@ -170,7 +180,7 @@ cpuid_feature_extract_field(u64 features, int field, bool sign) cpuid_feature_extract_unsigned_field(features, field); } -static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) +static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val) { return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign); } @@ -193,11 +203,11 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); -void check_local_cpu_errata(void); -void __init enable_errata_workarounds(void); +void check_local_cpu_capabilities(void); -void verify_local_cpu_errata(void); -void verify_local_cpu_capabilities(void); +void update_cpu_errata_workarounds(void); +void __init enable_errata_workarounds(void); +void verify_local_cpu_errata_workarounds(void); u64 read_system_reg(u32 id); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9d9fd4b9a72e..26a68ddb11c1 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -93,11 +93,7 @@ #include <asm/sysreg.h> -#define read_cpuid(reg) ({ \ - u64 __val; \ - asm("mrs_s %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \ - __val; \ -}) +#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) /* * The CPU ID never changes at run time, so we might as well tell the diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h index 65e0190e97c8..836b05630003 100644 --- a/arch/arm64/include/asm/dcc.h +++ b/arch/arm64/include/asm/dcc.h @@ -21,21 +21,16 @@ #define __ASM_DCC_H #include <asm/barrier.h> +#include <asm/sysreg.h> static inline u32 __dcc_getstatus(void) { - u32 ret; - - asm volatile("mrs %0, mdccsr_el0" : "=r" (ret)); - - return ret; + return read_sysreg(mdccsr_el0); } static inline char __dcc_getchar(void) { - char c; - - asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c)); + char c = read_sysreg(dbgdtrrx_el0); isb(); return c; @@ -47,8 +42,7 @@ static inline void __dcc_putchar(char c) * The typecast is to make absolutely certain that 'c' is * zero-extended. */ - asm volatile("msr dbgdtrtx_el0, %0" - : : "r" ((unsigned long)(unsigned char)c)); + write_sysreg((unsigned char)c, dbgdtrtx_el0); isb(); } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index f772e15c4766..d14c478976d0 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -78,6 +78,23 @@ #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) + +/* ISS field definitions shared by different classes */ +#define ESR_ELx_WNR (UL(1) << 6) + +/* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_EA (UL(1) << 9) +#define ESR_ELx_S1PTW (UL(1) << 7) + +/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_ACCESS (0x08) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) + +/* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV (UL(1) << 24) #define ESR_ELx_SAS_SHIFT (22) #define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) @@ -86,16 +103,9 @@ #define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) #define ESR_ELx_SF (UL(1) << 15) #define ESR_ELx_AR (UL(1) << 14) -#define ESR_ELx_EA (UL(1) << 9) #define ESR_ELx_CM (UL(1) << 8) -#define ESR_ELx_S1PTW (UL(1) << 7) -#define ESR_ELx_WNR (UL(1) << 6) -#define ESR_ELx_FSC (0x3F) -#define ESR_ELx_FSC_TYPE (0x3C) -#define ESR_ELx_FSC_EXTABT (0x10) -#define ESR_ELx_FSC_ACCESS (0x08) -#define ESR_ELx_FSC_FAULT (0x04) -#define ESR_ELx_FSC_PERM (0x0C) + +/* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -109,6 +119,62 @@ ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ ((imm) & 0xffff)) +/* ISS field definitions for System instruction traps */ +#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 +#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT) +#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1 +#define ESR_ELx_SYS64_ISS_DIR_READ 0x1 +#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_SYS64_ISS_RT_SHIFT 5 +#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT) +#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1 +#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT) +#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10 +#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT) +#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14 +#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT) +#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17 +#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT) +#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20 +#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT) +#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_CRM_MASK) +#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \ + (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \ + ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) + +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +/* + * User space cache operations have the following sysreg encoding + * in System instructions. + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0) + */ +#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 +#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 + +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ + ESR_ELx_SYS64_ISS_DIR_WRITE) + +#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 115ea2a64520..9510ace570e2 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -18,6 +18,7 @@ #include <asm/cputype.h> #include <asm/cpufeature.h> +#include <asm/sysreg.h> #include <asm/virt.h> #ifdef __KERNEL__ @@ -98,18 +99,18 @@ static inline void decode_ctrl_reg(u32 reg, #define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP) /* Debug register names. */ -#define AARCH64_DBG_REG_NAME_BVR "bvr" -#define AARCH64_DBG_REG_NAME_BCR "bcr" -#define AARCH64_DBG_REG_NAME_WVR "wvr" -#define AARCH64_DBG_REG_NAME_WCR "wcr" +#define AARCH64_DBG_REG_NAME_BVR bvr +#define AARCH64_DBG_REG_NAME_BCR bcr +#define AARCH64_DBG_REG_NAME_WVR wvr +#define AARCH64_DBG_REG_NAME_WCR wcr /* Accessor macros for the debug registers. */ #define AARCH64_DBG_READ(N, REG, VAL) do {\ - asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\ + VAL = read_sysreg(dbg##REG##N##_el1);\ } while (0) #define AARCH64_DBG_WRITE(N, REG, VAL) do {\ - asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\ + write_sysreg(VAL, dbg##REG##N##_el1);\ } while (0) struct task_struct; @@ -141,8 +142,6 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) } #endif -extern struct pmu perf_ops_bp; - /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 1dbaa901d7e5..bc853663dd51 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -246,7 +246,8 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ { return (val); } -__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000) +__AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000) +__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) @@ -318,6 +319,11 @@ __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) bool aarch64_insn_is_nop(u32 insn); bool aarch64_insn_is_branch_imm(u32 insn); +static inline bool aarch64_insn_is_adr_adrp(u32 insn) +{ + return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn); +} + int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); @@ -398,6 +404,9 @@ int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); +s32 aarch64_insn_adrp_get_offset(u32 insn); +u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset); + bool aarch32_insn_is_wide(u32 insn); #define A32_RN_OFFSET 16 diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 9b6e408cfa51..0bba427bb4c2 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -40,25 +40,25 @@ #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) { - asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writew __raw_writew static inline void __raw_writew(u16 val, volatile void __iomem *addr) { - asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writel __raw_writel static inline void __raw_writel(u32 val, volatile void __iomem *addr) { - asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writeq __raw_writeq static inline void __raw_writeq(u64 val, volatile void __iomem *addr) { - asm volatile("str %0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_readb __raw_readb @@ -184,17 +184,6 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) #define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); }) -/* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access - */ -#define xlate_dev_mem_ptr(p) __va(p) - -/* - * Convert a virtual cached pointer to an uncached pointer - */ -#define xlate_dev_kmem_ptr(p) p - #include <asm-generic/io.h> /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b6bb83400cd8..dff109871f2a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -99,14 +99,10 @@ .macro kern_hyp_va reg alternative_if_not ARM64_HAS_VIRT_HOST_EXTN and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK -alternative_else - nop -alternative_endif -alternative_if_not ARM64_HYP_OFFSET_LOW - nop -alternative_else +alternative_else_nop_endif +alternative_if ARM64_HYP_OFFSET_LOW and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK -alternative_endif +alternative_else_nop_endif .endm #else diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 31b73227b41f..ba62df8c6e35 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -214,7 +214,7 @@ static inline void *phys_to_virt(phys_addr_t x) #ifndef CONFIG_SPARSEMEM_VMEMMAP #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) #define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) @@ -222,11 +222,15 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) -#define virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ +#define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ + PHYS_OFFSET) >> PAGE_SHIFT) #endif #endif +#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET) +#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \ + _virt_addr_valid(kaddr)) + #include <asm-generic/memory_model.h> #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index b1892a0dbcb0..a50185375f09 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,22 +27,17 @@ #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/pgtable.h> +#include <asm/sysreg.h> #include <asm/tlbflush.h> -#ifdef CONFIG_PID_IN_CONTEXTIDR -static inline void contextidr_thread_switch(struct task_struct *next) -{ - asm( - " msr contextidr_el1, %0\n" - " isb" - : - : "r" (task_pid_nr(next))); -} -#else static inline void contextidr_thread_switch(struct task_struct *next) { + if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR)) + return; + + write_sysreg(task_pid_nr(next), contextidr_el1); + isb(); } -#endif /* * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0. @@ -51,11 +46,8 @@ static inline void cpu_set_reserved_ttbr0(void) { unsigned long ttbr = virt_to_phys(empty_zero_page); - asm( - " msr ttbr0_el1, %0 // set TTBR0\n" - " isb" - : - : "r" (ttbr)); + write_sysreg(ttbr, ttbr0_el1); + isb(); } /* @@ -81,13 +73,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) if (!__cpu_uses_extended_idmap()) return; - asm volatile ( - " mrs %0, tcr_el1 ;" - " bfi %0, %1, %2, %3 ;" - " msr tcr_el1, %0 ;" - " isb" - : "=&r" (tcr) - : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); + tcr = read_sysreg(tcr_el1); + tcr &= ~TCR_T0SZ_MASK; + tcr |= t0sz << TCR_T0SZ_OFFSET; + write_sysreg(tcr, tcr_el1); + isb(); } #define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index c3ae239db3ee..eb0c2bd90de9 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -208,6 +208,7 @@ #define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) #define TCR_TxSZ_WIDTH 6 +#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) #define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 39f5252673f7..2142c7726e76 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -70,12 +70,13 @@ #define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_COPY #define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC #define __P111 PAGE_COPY_EXEC @@ -84,7 +85,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e20bd431184a..ffbb9a520563 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -73,7 +73,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) -#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) +#define pte_ng(pte) (!!(pte_val(pte) & PTE_NG)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -84,8 +84,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +#define pte_valid_global(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) @@ -155,6 +155,16 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pte_t pte_clear_rdonly(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_RDONLY)); +} + +static inline pte_t pte_mkpresent(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_VALID)); +} + static inline pmd_t pmd_mkcont(pmd_t pmd) { return __pmd(pmd_val(pmd) | PMD_SECT_CONT); @@ -168,7 +178,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * Only if the new pte is valid and kernel, otherwise TLB maintenance * or update_mmu_cache() have the necessary barriers. */ - if (pte_valid_not_user(pte)) { + if (pte_valid_global(pte)) { dsb(ishst); isb(); } @@ -202,7 +212,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; - if (pte_user(pte) && pte_exec(pte) && !pte_special(pte)) + if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte, addr); } diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ace0a96e7d6e..df2e53d3a969 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -37,7 +37,6 @@ #include <asm/ptrace.h> #include <asm/types.h> -#ifdef __KERNEL__ #define STACK_TOP_MAX TASK_SIZE_64 #ifdef CONFIG_COMPAT #define AARCH32_VECTORS_BASE 0xffff0000 @@ -49,7 +48,6 @@ extern phys_addr_t arm64_dma_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) -#endif /* __KERNEL__ */ struct debug_info { /* Have we suspended stepping by a debugger? */ diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h new file mode 100644 index 000000000000..4e7e7067afdb --- /dev/null +++ b/arch/arm64/include/asm/sections.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2016 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SECTIONS_H +#define __ASM_SECTIONS_H + +#include <asm-generic/sections.h> + +extern char __alt_instructions[], __alt_instructions_end[]; +extern char __exception_text_start[], __exception_text_end[]; +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; +extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +extern char __hyp_text_start[], __hyp_text_end[]; +extern char __idmap_text_start[], __idmap_text_end[]; +extern char __irqentry_text_start[], __irqentry_text_end[]; +extern char __mmuoff_data_start[], __mmuoff_data_end[]; + +#endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 89206b568cd4..cae331d553f8 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -66,8 +66,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" nop\n" -" nop\n", + __nops(2), /* LSE atomics */ " mov %w1, %w0\n" " cas %w0, %w0, %2\n" @@ -99,9 +98,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) /* LSE atomics */ " mov %w2, %w5\n" " ldadda %w2, %w0, %3\n" -" nop\n" -" nop\n" -" nop\n" + __nops(3) ) /* Did we get the lock? */ @@ -165,8 +162,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) " stlrh %w1, %0", /* LSE atomics */ " mov %w1, #1\n" - " nop\n" - " staddlh %w1, %0") + " staddlh %w1, %0\n" + __nops(1)) : "=Q" (lock->owner), "=&r" (tmp) : : "memory"); @@ -212,7 +209,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) " cbnz %w0, 1b\n" " stxr %w0, %w2, %1\n" " cbnz %w0, 2b\n" - " nop", + __nops(1), /* LSE atomics */ "1: mov %w0, wzr\n" "2: casa %w0, %w2, %1\n" @@ -241,8 +238,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) /* LSE atomics */ " mov %w0, wzr\n" " casa %w0, %w2, %1\n" - " nop\n" - " nop") + __nops(2)) : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -290,8 +286,8 @@ static inline void arch_read_lock(arch_rwlock_t *rw) " add %w0, %w0, #1\n" " tbnz %w0, #31, 1b\n" " stxr %w1, %w0, %2\n" - " nop\n" - " cbnz %w1, 2b", + " cbnz %w1, 2b\n" + __nops(1), /* LSE atomics */ "1: wfe\n" "2: ldxr %w0, %2\n" @@ -317,9 +313,8 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) " cbnz %w1, 1b", /* LSE atomics */ " movn %w0, #0\n" - " nop\n" - " nop\n" - " staddl %w0, %2") + " staddl %w0, %2\n" + __nops(2)) : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : : "memory"); @@ -344,7 +339,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) " tbnz %w1, #31, 1f\n" " casa %w0, %w1, %2\n" " sbc %w1, %w1, %w0\n" - " nop\n" + __nops(1) "1:") : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 024d623f662e..b8a313fd7a09 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -47,4 +47,7 @@ int swsusp_arch_resume(void); int arch_hibernation_header_save(void *addr, unsigned int max_size); int arch_hibernation_header_restore(void *addr); +/* Used to resume on the CPU we hibernated on */ +int hibernate_resume_nonboot_cpu_disable(void); + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cc06794b7346..e8d46e8e6079 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -100,6 +100,7 @@ /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) #define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_UCT (1 << 15) #define SCTLR_EL1_SED (1 << 8) #define SCTLR_EL1_CP15BEN (1 << 5) @@ -253,16 +254,6 @@ asm( " .endm\n" ); -static inline void config_sctlr_el1(u32 clear, u32 set) -{ - u32 val; - - asm volatile("mrs %0, sctlr_el1" : "=r" (val)); - val &= ~clear; - val |= set; - asm volatile("msr sctlr_el1, %0" : : "r" (val)); -} - /* * Unlike read_cpuid, calls to read_sysreg are never expected to be * optimized away or replaced with synthetic values. @@ -273,12 +264,41 @@ static inline void config_sctlr_el1(u32 clear, u32 set) __val; \ }) +/* + * The "Z" constraint normally means a zero immediate, but when combined with + * the "%x0" template means XZR. + */ #define write_sysreg(v, r) do { \ u64 __val = (u64)v; \ - asm volatile("msr " __stringify(r) ", %0" \ - : : "r" (__val)); \ + asm volatile("msr " __stringify(r) ", %x0" \ + : : "rZ" (__val)); \ +} while (0) + +/* + * For registers without architectural names, or simply unsupported by + * GAS. + */ +#define read_sysreg_s(r) ({ \ + u64 __val; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg_s(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val)); \ } while (0) +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + val = read_sysreg(sctlr_el1); + val &= ~clear; + val |= set; + write_sysreg(val, sctlr_el1); +} + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 57f110bea6a8..bc812435bc76 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -56,12 +56,6 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); __show_ratelimited; \ }) -#define UDBG_UNDEFINED (1 << 0) -#define UDBG_SYSCALL (1 << 1) -#define UDBG_BADABORT (1 << 2) -#define UDBG_SEGV (1 << 3) -#define UDBG_BUS (1 << 4) - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index abd64bd1f6d9..e9ea5a6bd449 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -75,6 +75,9 @@ static inline struct thread_info *current_thread_info(void) __attribute_const__; /* * struct thread_info can be accessed directly via sp_el0. + * + * We don't use read_sysreg() as we want the compiler to cache the value where + * possible. */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b460ae28e346..deab52374119 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -25,6 +25,24 @@ #include <asm/cputype.h> /* + * Raw TLBI operations. + * + * Where necessary, use the __tlbi() macro to avoid asm() + * boilerplate. Drivers and most kernel code should use the TLB + * management routines in preference to the macro below. + * + * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending + * on whether a particular TLBI operation takes an argument or + * not. The macros handles invoking the asm with or without the + * register argument as appropriate. + */ +#define __TLBI_0(op, arg) asm ("tlbi " #op) +#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0" : : "r" (arg)) +#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) + +#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) + +/* * TLB Management * ============== * @@ -66,7 +84,7 @@ static inline void local_flush_tlb_all(void) { dsb(nshst); - asm("tlbi vmalle1"); + __tlbi(vmalle1); dsb(nsh); isb(); } @@ -74,7 +92,7 @@ static inline void local_flush_tlb_all(void) static inline void flush_tlb_all(void) { dsb(ishst); - asm("tlbi vmalle1is"); + __tlbi(vmalle1is); dsb(ish); isb(); } @@ -84,7 +102,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) unsigned long asid = ASID(mm) << 48; dsb(ishst); - asm("tlbi aside1is, %0" : : "r" (asid)); + __tlbi(aside1is, asid); dsb(ish); } @@ -94,7 +112,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); dsb(ishst); - asm("tlbi vale1is, %0" : : "r" (addr)); + __tlbi(vale1is, addr); dsb(ish); } @@ -122,9 +140,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { if (last_level) - asm("tlbi vale1is, %0" : : "r"(addr)); + __tlbi(vale1is, addr); else - asm("tlbi vae1is, %0" : : "r"(addr)); + __tlbi(vae1is, addr); } dsb(ish); } @@ -149,7 +167,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vaae1is, %0" : : "r"(addr)); + __tlbi(vaae1is, addr); dsb(ish); isb(); } @@ -163,7 +181,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, { unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); - asm("tlbi vae1is, %0" : : "r" (addr)); + __tlbi(vae1is, addr); dsb(ish); } diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 9cd03f3e812f..02e9035b0685 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -19,6 +19,7 @@ #define __ASM_TRAP_H #include <linux/list.h> +#include <asm/sections.h> struct pt_regs; @@ -39,9 +40,6 @@ void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); #ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { - extern char __irqentry_text_start[]; - extern char __irqentry_text_end[]; - return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } @@ -54,8 +52,6 @@ static inline int __in_irqentry_text(unsigned long ptr) static inline int in_exception_text(unsigned long ptr) { - extern char __exception_text_start[]; - extern char __exception_text_end[]; int in; in = ptr >= (unsigned long)&__exception_text_start && diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 1788545f25bc..fea10736b11f 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -45,6 +45,8 @@ #ifndef __ASSEMBLY__ #include <asm/ptrace.h> +#include <asm/sections.h> +#include <asm/sysreg.h> /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -75,10 +77,7 @@ static inline bool is_hyp_mode_mismatched(void) static inline bool is_kernel_in_hyp_mode(void) { - u64 el; - - asm("mrs %0, CurrentEL" : "=r" (el)); - return el == CurrentEL_EL2; + return read_sysreg(CurrentEL) == CurrentEL_EL2; } #ifdef CONFIG_ARM64_VHE @@ -87,14 +86,6 @@ extern void verify_cpu_run_el(void); static inline void verify_cpu_run_el(void) {} #endif -/* The section containing the hypervisor idmap text */ -extern char __hyp_idmap_text_start[]; -extern char __hyp_idmap_text_end[]; - -/* The section containing the hypervisor text */ -extern char __hyp_text_start[]; -extern char __hyp_text_end[]; - #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 14f7b651c787..7d66bbaafc0c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -10,6 +10,8 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg +CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"' + # Object file lists. arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index f85149cc7c71..f01fab637dab 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -105,8 +105,10 @@ int __init arm64_acpi_numa_init(void) int ret; ret = acpi_numa_init(); - if (ret) + if (ret) { + pr_info("Failed to initialise from firmware\n"); return ret; + } return srat_disabled() ? -EINVAL : 0; } diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d2ee1b21a10d..06d650f61da7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -25,14 +25,13 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/insn.h> +#include <asm/sections.h> #include <linux/stop_machine.h> #define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - struct alt_region { struct alt_instr *begin; struct alt_instr *end; @@ -59,6 +58,8 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) BUG(); } +#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) + static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) { u32 insn; @@ -80,6 +81,25 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) offset = target - (unsigned long)insnptr; insn = aarch64_set_branch_offset(insn, offset); } + } else if (aarch64_insn_is_adrp(insn)) { + s32 orig_offset, new_offset; + unsigned long target; + + /* + * If we're replacing an adrp instruction, which uses PC-relative + * immediate addressing, adjust the offset to reflect the new + * PC. adrp operates on 4K aligned addresses. + */ + orig_offset = aarch64_insn_adrp_get_offset(insn); + target = align_down(altinsnptr, SZ_4K) + orig_offset; + new_offset = target - align_down(insnptr, SZ_4K); + insn = aarch64_insn_adrp_set_offset(insn, new_offset); + } else if (aarch64_insn_uses_literal(insn)) { + /* + * Disallow patching unhandled instructions using PC relative + * literal addresses + */ + BUG(); } return insn; @@ -124,8 +144,8 @@ static int __apply_alternatives_multi_stop(void *unused) { static int patched = 0; struct alt_region region = { - .begin = __alt_instructions, - .end = __alt_instructions_end, + .begin = (struct alt_instr *)__alt_instructions, + .end = (struct alt_instr *)__alt_instructions_end, }; /* We always have a CPU 0 at this point (__init) */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 05070b72fc28..4a2f0f0fef32 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include <linux/dma-mapping.h> #include <linux/kvm_host.h> #include <linux/suspend.h> +#include <asm/cpufeature.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/smp_plat.h> @@ -145,5 +146,6 @@ int main(void) DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); + DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); return 0; } diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index b8629d52fba9..9617301f76b5 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -39,7 +39,7 @@ static inline enum cache_type get_cache_type(int level) if (level > MAX_CACHE_LEVEL) return CACHE_TYPE_NOCACHE; - asm volatile ("mrs %x0, clidr_el1" : "=r" (clidr)); + clidr = read_sysreg(clidr_el1); return CLIDR_CTYPE(clidr, level); } @@ -55,11 +55,9 @@ u64 __attribute_const__ cache_get_ccsidr(u64 csselr) WARN_ON(preemptible()); - /* Put value into CSSELR */ - asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + write_sysreg(csselr, csselr_el1); isb(); - /* Read result out of CCSIDR */ - asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr)); + ccsidr = read_sysreg(ccsidr_el1); return ccsidr; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 82b0fc2e637b..0150394f4cab 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -30,6 +30,21 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) entry->midr_range_max); } +static bool +has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != + (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); +} + +static void cpu_enable_trap_ctr_access(void *__unused) +{ + /* Clear SCTLR_EL1.UCT */ + config_sctlr_el1(SCTLR_EL1_UCT, 0); +} + #define MIDR_RANGE(model, min, max) \ .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ @@ -108,6 +123,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { + .desc = "Mismatched cache line size", + .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, + .matches = has_mismatched_cache_line_size, + .def_scope = SCOPE_LOCAL_CPU, + .enable = cpu_enable_trap_ctr_access, + }, + { } }; @@ -116,7 +138,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { * and the related information is freed soon after. If the new CPU requires * an errata not detected at boot, fail this CPU. */ -void verify_local_cpu_errata(void) +void verify_local_cpu_errata_workarounds(void) { const struct arm64_cpu_capabilities *caps = arm64_errata; @@ -131,7 +153,7 @@ void verify_local_cpu_errata(void) } } -void check_local_cpu_errata(void) +void update_cpu_errata_workarounds(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index c7cfb8fe06f9..e137ceaf5016 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -17,6 +17,7 @@ */ #include <linux/acpi.h> +#include <linux/cache.h> #include <linux/errno.h> #include <linux/of.h> #include <linux/string.h> @@ -28,7 +29,7 @@ extern const struct cpu_operations smp_spin_table_ops; extern const struct cpu_operations acpi_parking_protocol_ops; extern const struct cpu_operations cpu_psci_ops; -const struct cpu_operations *cpu_ops[NR_CPUS]; +const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 62272eac1352..d577f263cc4a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -46,6 +46,9 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); +EXPORT_SYMBOL(cpu_hwcap_keys); + #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .sign = SIGNED, \ @@ -74,7 +77,7 @@ static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); -static struct arm64_ftr_bits ftr_id_aa64isar0[] = { +static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), @@ -87,7 +90,7 @@ static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), @@ -101,7 +104,7 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -119,7 +122,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), @@ -130,7 +133,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -139,7 +142,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_ctr[] = { +static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ @@ -147,15 +150,21 @@ static struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will - * make use of *minLine + * make use of *minLine. + * If we have differing I-cache policies, report it as the weakest - AIVIVT. */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_mmfr0[] = { +struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { + .name = "SYS_CTR_EL0", + .ftr_bits = ftr_ctr +}; + +static const struct arm64_ftr_bits ftr_id_mmfr0[] = { S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ @@ -167,7 +176,7 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), @@ -178,14 +187,14 @@ static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_mvfr2[] = { +static const struct arm64_ftr_bits ftr_mvfr2[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_dczid[] = { +static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ @@ -193,7 +202,7 @@ static struct arm64_ftr_bits ftr_dczid[] = { }; -static struct arm64_ftr_bits ftr_id_isar5[] = { +static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), @@ -204,14 +213,14 @@ static struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_mmfr4[] = { +static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_pfr0[] = { +static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ @@ -220,7 +229,7 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_dfr0[] = { +static const struct arm64_ftr_bits ftr_id_dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), @@ -238,7 +247,7 @@ static struct arm64_ftr_bits ftr_id_dfr0[] = { * 0. Covers the following 32bit registers: * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] */ -static struct arm64_ftr_bits ftr_generic_32bits[] = { +static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), @@ -250,29 +259,32 @@ static struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_generic[] = { +static const struct arm64_ftr_bits ftr_generic[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_generic32[] = { +static const struct arm64_ftr_bits ftr_generic32[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_aa64raz[] = { +static const struct arm64_ftr_bits ftr_aa64raz[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), ARM64_FTR_END, }; -#define ARM64_FTR_REG(id, table) \ - { \ - .sys_id = id, \ +#define ARM64_FTR_REG(id, table) { \ + .sys_id = id, \ + .reg = &(struct arm64_ftr_reg){ \ .name = #id, \ .ftr_bits = &((table)[0]), \ - } + }} -static struct arm64_ftr_reg arm64_ftr_regs[] = { +static const struct __ftr_reg_entry { + u32 sys_id; + struct arm64_ftr_reg *reg; +} arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), @@ -315,7 +327,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 3, CRn = 0, CRm = 0 */ - ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), + { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), /* Op1 = 3, CRn = 14, CRm = 0 */ @@ -324,7 +336,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { static int search_cmp_ftr_reg(const void *id, const void *regp) { - return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id; + return (int)(unsigned long)id - (int)((const struct __ftr_reg_entry *)regp)->sys_id; } /* @@ -339,14 +351,20 @@ static int search_cmp_ftr_reg(const void *id, const void *regp) */ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) { - return bsearch((const void *)(unsigned long)sys_id, + const struct __ftr_reg_entry *ret; + + ret = bsearch((const void *)(unsigned long)sys_id, arm64_ftr_regs, ARRAY_SIZE(arm64_ftr_regs), sizeof(arm64_ftr_regs[0]), search_cmp_ftr_reg); + if (ret) + return ret->reg; + return NULL; } -static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) +static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, + s64 ftr_val) { u64 mask = arm64_ftr_mask(ftrp); @@ -355,7 +373,8 @@ static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val return reg; } -static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) +static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, + s64 cur) { s64 ret = 0; @@ -376,27 +395,13 @@ static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) return ret; } -static int __init sort_cmp_ftr_regs(const void *a, const void *b) -{ - return ((const struct arm64_ftr_reg *)a)->sys_id - - ((const struct arm64_ftr_reg *)b)->sys_id; -} - -static void __init swap_ftr_regs(void *a, void *b, int size) -{ - struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a; - *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b; - *(struct arm64_ftr_reg *)b = tmp; -} - static void __init sort_ftr_regs(void) { - /* Keep the array sorted so that we can do the binary search */ - sort(arm64_ftr_regs, - ARRAY_SIZE(arm64_ftr_regs), - sizeof(arm64_ftr_regs[0]), - sort_cmp_ftr_regs, - swap_ftr_regs); + int i; + + /* Check that the array is sorted so that we can do the binary search */ + for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++) + BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id); } /* @@ -407,7 +412,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) { u64 val = 0; u64 strict_mask = ~0x0ULL; - struct arm64_ftr_bits *ftrp; + const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); BUG_ON(!reg); @@ -464,7 +469,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) { - struct arm64_ftr_bits *ftrp; + const struct arm64_ftr_bits *ftrp; for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); @@ -1004,23 +1009,33 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) * cannot do anything to fix it up and could cause unexpected failures. So * we park the CPU. */ -void verify_local_cpu_capabilities(void) +static void verify_local_cpu_capabilities(void) { + verify_local_cpu_errata_workarounds(); + verify_local_cpu_features(arm64_features); + verify_local_elf_hwcaps(arm64_elf_hwcaps); + if (system_supports_32bit_el0()) + verify_local_elf_hwcaps(compat_elf_hwcaps); +} +void check_local_cpu_capabilities(void) +{ + /* + * All secondary CPUs should conform to the early CPU features + * in use by the kernel based on boot CPU. + */ check_early_cpu_features(); /* - * If we haven't computed the system capabilities, there is nothing - * to verify. + * If we haven't finalised the system capabilities, this CPU gets + * a chance to update the errata work arounds. + * Otherwise, this CPU should verify that it has all the system + * advertised capabilities. */ if (!sys_caps_initialised) - return; - - verify_local_cpu_errata(); - verify_local_cpu_features(arm64_features); - verify_local_elf_hwcaps(arm64_elf_hwcaps); - if (system_supports_32bit_el0()) - verify_local_elf_hwcaps(compat_elf_hwcaps); + update_cpu_errata_workarounds(); + else + verify_local_cpu_capabilities(); } static void __init setup_feature_capabilities(void) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index ed1b84fe6925..b3d5b3e8fbcb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -363,8 +363,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) } cpuinfo_detect_icache_policy(info); - - check_local_cpu_errata(); } void cpuinfo_store_cpu(void) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 91fff48d0f57..73ae90ef434c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -46,16 +46,14 @@ static void mdscr_write(u32 mdscr) { unsigned long flags; local_dbg_save(flags); - asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); + write_sysreg(mdscr, mdscr_el1); local_dbg_restore(flags); } NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { - u32 mdscr; - asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); - return mdscr; + return read_sysreg(mdscr_el1); } NOKPROBE_SYMBOL(mdscr_read); @@ -132,36 +130,18 @@ NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. */ -static void clear_os_lock(void *unused) +static int clear_os_lock(unsigned int cpu) { - asm volatile("msr oslar_el1, %0" : : "r" (0)); -} - -static int os_lock_notify(struct notifier_block *self, - unsigned long action, void *data) -{ - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - clear_os_lock(NULL); - return NOTIFY_OK; + write_sysreg(0, oslar_el1); + isb(); + return 0; } -static struct notifier_block os_lock_nb = { - .notifier_call = os_lock_notify, -}; - static int debug_monitors_init(void) { - cpu_notifier_register_begin(); - - /* Clear the OS lock. */ - on_each_cpu(clear_os_lock, NULL, 1); - isb(); - - /* Register hotplug handler. */ - __register_cpu_notifier(&os_lock_nb); - - cpu_notifier_register_done(); - return 0; + return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, + "CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING", + clear_os_lock, NULL); } postcore_initcall(debug_monitors_init); @@ -254,7 +234,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; if (user_mode(regs)) { - send_user_sigtrap(TRAP_HWBKPT); + send_user_sigtrap(TRAP_TRACE); /* * ptrace will disable single step unless explicitly @@ -382,7 +362,7 @@ NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, - TRAP_HWBKPT, "single-step handler"); + TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, TRAP_BRKPT, "ptrace BRK handler"); return 0; @@ -435,8 +415,10 @@ NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) { - set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); - set_regs_spsr_ss(task_pt_regs(task)); + struct thread_info *ti = task_thread_info(task); + + if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP)) + set_regs_spsr_ss(task_pt_regs(task)); } NOKPROBE_SYMBOL(user_enable_single_step); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 441420ca7d08..223d54a4d66b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -104,7 +104,7 @@ str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 str x20, [tsk, #TI_ADDR_LIMIT] - ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 @@ -150,13 +150,7 @@ ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 -alternative_if_not ARM64_WORKAROUND_845719 - nop - nop -#ifdef CONFIG_PID_IN_CONTEXTIDR - nop -#endif -alternative_else +alternative_if ARM64_WORKAROUND_845719 tbz x22, #4, 1f #ifdef CONFIG_PID_IN_CONTEXTIDR mrs x29, contextidr_el1 @@ -165,7 +159,7 @@ alternative_else msr contextidr_el1, xzr #endif 1: -alternative_endif +alternative_else_nop_endif #endif .endif msr elr_el1, x21 // set up the return data @@ -707,18 +701,13 @@ ret_fast_syscall_trace: * Ok, we need to do extra processing, enter the slow path. */ work_pending: - tbnz x1, #TIF_NEED_RESCHED, work_resched - /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ mov x0, sp // 'regs' - enable_irq // enable interrupts for do_notify_resume() bl do_notify_resume - b ret_to_user -work_resched: #ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off // the IRQs are off here, inform the tracing code + bl trace_hardirqs_on // enabled while in userspace #endif - bl schedule - + ldr x1, [tsk, #TI_FLAGS] // re-check for single-step + b finish_ret_to_user /* * "slow" syscall return path. */ @@ -727,6 +716,7 @@ ret_to_user: ldr x1, [tsk, #TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending +finish_ret_to_user: enable_step_tsk x1, x2 kernel_exit 0 ENDPROC(ret_to_user) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 3e7b050e99dc..427f6d3f084c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -208,13 +208,23 @@ efi_header_end: __INIT + /* + * The following callee saved general purpose registers are used on the + * primary lowlevel boot path: + * + * Register Scope Purpose + * x21 stext() .. start_kernel() FDT pointer passed at boot in x0 + * x23 stext() .. start_kernel() physical misalignment/KASLR offset + * x28 __create_page_tables() callee preserved temp register + * x19/x20 __primary_switch() callee preserved temp registers + */ ENTRY(stext) bl preserve_boot_args - bl el2_setup // Drop to EL1, w20=cpu_boot_mode - adrp x24, __PHYS_OFFSET - and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 + bl el2_setup // Drop to EL1, w0=cpu_boot_mode + adrp x23, __PHYS_OFFSET + and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag - bl __create_page_tables // x25=TTBR0, x26=TTBR1 + bl __create_page_tables /* * The following calls CPU setup code, see arch/arm64/mm/proc.S for * details. @@ -222,9 +232,7 @@ ENTRY(stext) * the TCR will have been set. */ bl __cpu_setup // initialise processor - adr_l x27, __primary_switch // address to jump to after - // MMU has been enabled - b __enable_mmu + b __primary_switch ENDPROC(stext) /* @@ -311,23 +319,21 @@ ENDPROC(preserve_boot_args) * been enabled */ __create_page_tables: - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential * dirty cache lines being evicted. */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -340,7 +346,7 @@ __create_page_tables: /* * Create the identity mapping. */ - mov x0, x25 // idmap_pg_dir + adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) #ifndef CONFIG_ARM64_VA_BITS_48 @@ -390,7 +396,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - mov x0, x26 // swapper_pg_dir + adrp x0, swapper_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 @@ -405,8 +411,8 @@ __create_page_tables: * accesses (MMU disabled), invalidate the idmap and swapper page * tables again to remove any speculatively loaded cache lines. */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE dmb sy bl __inval_cache_range @@ -416,14 +422,27 @@ ENDPROC(__create_page_tables) /* * The following fragment of code is executed with the MMU enabled. + * + * x0 = __PHYS_OFFSET */ - .set initial_sp, init_thread_union + THREAD_START_SP __primary_switched: - mov x28, lr // preserve LR + adrp x4, init_thread_union + add sp, x4, #THREAD_SIZE + msr sp_el0, x4 // Save thread_info + adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb + stp xzr, x30, [sp, #-16]! + mov x29, sp + + str_l x21, __fdt_pointer, x5 // Save FDT pointer + + ldr_l x4, kimage_vaddr // Save the offset between + sub x4, x4, x0 // the kernel virtual and + str_l x4, kimage_voffset, x5 // physical mappings + // Clear BSS adr_l x0, __bss_start mov x1, xzr @@ -432,17 +451,6 @@ __primary_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW - adr_l sp, initial_sp, x4 - mov x4, sp - and x4, x4, #~(THREAD_SIZE - 1) - msr sp_el0, x4 // Save thread_info - str_l x21, __fdt_pointer, x5 // Save FDT pointer - - ldr_l x4, kimage_vaddr // Save the offset between - sub x4, x4, x24 // the kernel virtual and - str_l x4, kimage_voffset, x5 // physical mappings - - mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init #endif @@ -454,8 +462,8 @@ __primary_switched: bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset - ret x28 // we must enable KASLR, return - // to __enable_mmu() + ldp x29, x30, [sp], #16 // we must enable KASLR, return + ret // to __primary_switch() 0: #endif b start_kernel @@ -465,7 +473,7 @@ ENDPROC(__primary_switched) * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region */ - .section ".text","ax" + .section ".idmap.text","ax" ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET @@ -490,7 +498,7 @@ CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr sctlr_el1, x0 - mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret @@ -586,7 +594,7 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems cbz x2, install_el2_stub - mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 isb ret @@ -601,7 +609,7 @@ install_el2_stub: PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr - mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) @@ -611,27 +619,39 @@ ENDPROC(el2_setup) */ set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode - cmp w20, #BOOT_CPU_MODE_EL2 + cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 -1: str w20, [x1] // This CPU has booted in EL1 +1: str w0, [x1] // This CPU has booted in EL1 dmb sy dc ivac, x1 // Invalidate potentially stale cache line ret ENDPROC(set_cpu_boot_mode_flag) /* + * These values are written with the MMU off, but read with the MMU on. + * Writers will invalidate the corresponding address, discarding up to a + * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures + * sufficient alignment that the CWG doesn't overlap another section. + */ + .pushsection ".mmuoff.data.write", "aw" +/* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. * * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data..cacheline_aligned - .align L1_CACHE_SHIFT ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL2 .long BOOT_CPU_MODE_EL1 +/* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + */ +ENTRY(__early_cpu_boot_status) + .long 0 + .popsection /* @@ -639,7 +659,7 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl el2_setup // Drop to EL1, w0=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 mov_q x1, MPIDR_HWID_BITMASK @@ -666,12 +686,10 @@ secondary_startup: /* * Common entry point for secondary CPUs. */ - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - - adr_l x27, __secondary_switch // address to jump to after enabling the MMU - b __enable_mmu + bl __enable_mmu + ldr x8, =__secondary_switched + br x8 ENDPROC(secondary_startup) __secondary_switched: @@ -706,33 +724,27 @@ ENDPROC(__secondary_switched) dc ivac, \tmp1 // Invalidate potentially stale cache line .endm - .pushsection .data..cacheline_aligned - .align L1_CACHE_SHIFT -ENTRY(__early_cpu_boot_status) - .long 0 - .popsection - /* * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. - * x27 = *virtual* address to jump to upon completion * - * Other registers depend on the function called upon completion. + * Returns to the caller via x30/lr. This requires the caller to be covered + * by the .idmap.text section. * * Checks if the selected granule size is supported by the CPU. * If it isn't, park the CPU */ - .section ".idmap.text", "ax" ENTRY(__enable_mmu) - mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support update_early_cpu_boot_status 0, x1, x2 - msr ttbr0_el1, x25 // load TTBR0 - msr ttbr1_el1, x26 // load TTBR1 + adrp x1, idmap_pg_dir + adrp x2, swapper_pg_dir + msr ttbr0_el1, x1 // load TTBR0 + msr ttbr1_el1, x2 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -744,29 +756,7 @@ ENTRY(__enable_mmu) ic iallu dsb nsh isb -#ifdef CONFIG_RANDOMIZE_BASE - mov x19, x0 // preserve new SCTLR_EL1 value - blr x27 - - /* - * If we return here, we have a KASLR displacement in x23 which we need - * to take into account by discarding the current kernel mapping and - * creating a new one. - */ - msr sctlr_el1, x22 // disable the MMU - isb - bl __create_page_tables // recreate kernel mapping - - tlbi vmalle1 // Remove any stale TLB entries - dsb nsh - - msr sctlr_el1, x19 // re-enable the MMU - isb - ic iallu // flush instructions fetched - dsb nsh // via old mapping - isb -#endif - br x27 + ret ENDPROC(__enable_mmu) __no_granule_support: @@ -775,11 +765,11 @@ __no_granule_support: 1: wfe wfi - b 1b + b 1b ENDPROC(__no_granule_support) -__primary_switch: #ifdef CONFIG_RELOCATABLE +__relocate_kernel: /* * Iterate over each entry in the relocation table, and apply the * relocations in place. @@ -801,14 +791,46 @@ __primary_switch: add x13, x13, x23 // relocate str x13, [x11, x23] b 0b +1: ret +ENDPROC(__relocate_kernel) +#endif -1: +__primary_switch: +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + + bl __enable_mmu +#ifdef CONFIG_RELOCATABLE + bl __relocate_kernel +#ifdef CONFIG_RANDOMIZE_BASE ldr x8, =__primary_switched - br x8 -ENDPROC(__primary_switch) + adrp x0, __PHYS_OFFSET + blr x8 -__secondary_switch: - ldr x8, =__secondary_switched + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x20 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + tlbi vmalle1 // Remove any stale TLB entries + dsb nsh + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic iallu // flush instructions fetched + dsb nsh // via old mapping + isb + + bl __relocate_kernel +#endif +#endif + ldr x8, =__primary_switched + adrp x0, __PHYS_OFFSET br x8 -ENDPROC(__secondary_switch) +ENDPROC(__primary_switch) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 46f29b6560ec..e56d848b6466 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -36,8 +36,8 @@ .macro break_before_make_ttbr_switch zero_page, page_table msr ttbr1_el1, \zero_page isb - tlbi vmalle1is - dsb ish + tlbi vmalle1 + dsb nsh msr ttbr1_el1, \page_table isb .endm @@ -96,7 +96,7 @@ ENTRY(swsusp_arch_suspend_exit) add x1, x10, #PAGE_SIZE /* Clean the copied page to PoU - based on flush_icache_range() */ - dcache_line_size x2, x3 + raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 2: dc cvau, x4 /* clean D line / unified line */ diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 65d81f965e74..d55a7b09959b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -15,9 +15,9 @@ * License terms: GNU General Public License (GPL) version 2 */ #define pr_fmt(x) "hibernate: " x +#include <linux/cpu.h> #include <linux/kvm_host.h> #include <linux/mm.h> -#include <linux/notifier.h> #include <linux/pm.h> #include <linux/sched.h> #include <linux/suspend.h> @@ -26,6 +26,7 @@ #include <asm/barrier.h> #include <asm/cacheflush.h> +#include <asm/cputype.h> #include <asm/irqflags.h> #include <asm/memory.h> #include <asm/mmu_context.h> @@ -34,6 +35,7 @@ #include <asm/pgtable-hwdef.h> #include <asm/sections.h> #include <asm/smp.h> +#include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/sysreg.h> #include <asm/virt.h> @@ -54,12 +56,6 @@ extern int in_suspend; /* Do we need to reset el2? */ #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) -/* - * Start/end of the hibernate exit code, this must be copied to a 'safe' - * location in memory, and executed from there. - */ -extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; - /* temporary el2 vectors in the __hibernate_exit_text section. */ extern char hibernate_el2_vectors[]; @@ -67,6 +63,12 @@ extern char hibernate_el2_vectors[]; extern char __hyp_stub_vectors[]; /* + * The logical cpu number we should resume on, initialised to a non-cpu + * number. + */ +static int sleep_cpu = -EINVAL; + +/* * Values that may not change over hibernate/resume. We put the build number * and date in here so that we guarantee not to resume with a different * kernel. @@ -88,6 +90,8 @@ static struct arch_hibernate_hdr { * re-configure el2. */ phys_addr_t __hyp_stub_vectors; + + u64 sleep_cpu_mpidr; } resume_hdr; static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) @@ -130,12 +134,22 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) else hdr->__hyp_stub_vectors = 0; + /* Save the mpidr of the cpu we called cpu_suspend() on... */ + if (sleep_cpu < 0) { + pr_err("Failing to hibernate on an unkown CPU.\n"); + return -ENODEV; + } + hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu); + pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu, + hdr->sleep_cpu_mpidr); + return 0; } EXPORT_SYMBOL(arch_hibernation_header_save); int arch_hibernation_header_restore(void *addr) { + int ret; struct arch_hibernate_hdr_invariants invariants; struct arch_hibernate_hdr *hdr = addr; @@ -145,6 +159,24 @@ int arch_hibernation_header_restore(void *addr) return -EINVAL; } + sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr); + pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu, + hdr->sleep_cpu_mpidr); + if (sleep_cpu < 0) { + pr_crit("Hibernated on a CPU not known to this kernel!\n"); + sleep_cpu = -EINVAL; + return -EINVAL; + } + if (!cpu_online(sleep_cpu)) { + pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n"); + ret = cpu_up(sleep_cpu); + if (ret) { + pr_err("Failed to bring hibernate-CPU up!\n"); + sleep_cpu = -EINVAL; + return ret; + } + } + resume_hdr = *hdr; return 0; @@ -241,6 +273,7 @@ out: return rc; } +#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) int swsusp_arch_suspend(void) { @@ -256,10 +289,16 @@ int swsusp_arch_suspend(void) local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { - /* Clean kernel to PoC for secondary core startup */ - __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + /* Clean kernel core startup/idle code to PoC*/ + dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); + dcache_clean_range(__idmap_text_start, __idmap_text_end); + + /* Clean kvm setup code to PoC? */ + if (el2_reset_needed()) + dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); /* * Tell the hibernation core that we've just restored @@ -267,6 +306,7 @@ int swsusp_arch_suspend(void) */ in_suspend = 0; + sleep_cpu = -EINVAL; __cpu_suspend_exit(); } @@ -275,6 +315,33 @@ int swsusp_arch_suspend(void) return ret; } +static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) +{ + pte_t pte = *src_pte; + + if (pte_valid(pte)) { + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, pte_clear_rdonly(pte)); + } else if (debug_pagealloc_enabled() && !pte_none(pte)) { + /* + * debug_pagealloc will removed the PTE_VALID bit if + * the page isn't in use by the resume kernel. It may have + * been in use by the original kernel, in which case we need + * to put it back in our copy to do the restore. + * + * Before marking this entry valid, check the pfn should + * be mapped. + */ + BUG_ON(!pfn_valid(pte_pfn(pte))); + + set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte))); + } +} + static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, unsigned long end) { @@ -290,13 +357,7 @@ static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, src_pte = pte_offset_kernel(src_pmd, start); do { - if (!pte_none(*src_pte)) - /* - * Resume will overwrite areas that may be marked - * read only (code, rodata). Clear the RDONLY bit from - * the temporary mappings we use during restore. - */ - set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + _copy_pte(dst_pte, src_pte, addr); } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); return 0; @@ -483,27 +544,12 @@ out: return rc; } -static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, - unsigned long action, void *ptr) +int hibernate_resume_nonboot_cpu_disable(void) { - if (action == PM_HIBERNATION_PREPARE && - cpumask_first(cpu_online_mask) != 0) { - pr_warn("CPU0 is offline.\n"); - return notifier_from_errno(-ENODEV); + if (sleep_cpu < 0) { + pr_err("Failing to resume from hibernate on an unkown CPU.\n"); + return -ENODEV; } - return NOTIFY_OK; -} - -static int __init check_boot_cpu_online_init(void) -{ - /* - * Set this pm_notifier callback with a lower priority than - * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be - * called earlier to disable cpu hotplug before the cpu online check. - */ - pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); - - return 0; + return freeze_secondary_cpus(sleep_cpu); } -core_initcall(check_boot_cpu_online_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 26a6bf77d272..948b73148d56 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -857,7 +857,7 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void hw_breakpoint_reset(void *unused) +static int hw_breakpoint_reset(unsigned int cpu) { int i; struct perf_event **slots; @@ -888,28 +888,14 @@ static void hw_breakpoint_reset(void *unused) write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); } } -} -static int hw_breakpoint_reset_notify(struct notifier_block *self, - unsigned long action, - void *hcpu) -{ - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) { - local_irq_disable(); - hw_breakpoint_reset(NULL); - local_irq_enable(); - } - return NOTIFY_OK; + return 0; } -static struct notifier_block hw_breakpoint_reset_nb = { - .notifier_call = hw_breakpoint_reset_notify, -}; - #ifdef CONFIG_CPU_PM -extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); #else -static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) { } #endif @@ -919,36 +905,34 @@ static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ static int __init arch_hw_breakpoint_init(void) { + int ret; + core_num_brps = get_num_brps(); core_num_wrps = get_num_wrps(); pr_info("found %d breakpoint and %d watchpoint registers.\n", core_num_brps, core_num_wrps); - cpu_notifier_register_begin(); - - /* - * Reset the breakpoint resources. We assume that a halting - * debugger will leave the world in a nice state for us. - */ - smp_call_function(hw_breakpoint_reset, NULL, 1); - hw_breakpoint_reset(NULL); - /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-breakpoint handler"); hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-watchpoint handler"); - /* Register hotplug notifier. */ - __register_cpu_notifier(&hw_breakpoint_reset_nb); - - cpu_notifier_register_done(); + /* + * Reset the breakpoint resources. We assume that a halting + * debugger will leave the world in a nice state for us. + */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, + "CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING", + hw_breakpoint_reset, NULL); + if (ret) + pr_err("failed to register CPU hotplug notifier: %d\n", ret); /* Register cpu_suspend hw breakpoint restore hook */ cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); - return 0; + return ret; } arch_initcall(arch_hw_breakpoint_init); diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 63f9432d05e8..6f2ac4fc66ca 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); - else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) + else if (!module) page = pfn_to_page(PHYS_PFN(__pa(addr))); else return addr; @@ -1202,6 +1202,19 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +s32 aarch64_insn_adrp_get_offset(u32 insn) +{ + BUG_ON(!aarch64_insn_is_adrp(insn)); + return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12; +} + +u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset) +{ + BUG_ON(!aarch64_insn_is_adrp(insn)); + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, + offset >> 12); +} + /* * Extract the Op/CR data from a msr/mrs instruction. */ diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index b05469173ba5..769f24ef628c 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include <linux/cache.h> #include <linux/crc32.h> #include <linux/init.h> #include <linux/libfdt.h> @@ -20,7 +21,7 @@ #include <asm/pgtable.h> #include <asm/sections.h> -u64 __read_mostly module_alloc_base; +u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; static __init u64 get_kaslr_seed(void *fdt) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 838ccf123307..a9310a69fffd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -24,6 +24,7 @@ #include <asm/sysreg.h> #include <asm/virt.h> +#include <linux/acpi.h> #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> @@ -190,13 +191,23 @@ #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED /* PMUv3 HW events mapping. */ + +/* + * ARMv8 Architectural defined events, not all of these may + * be supported on any given implementation. Undefined events will + * be disabled at run-time. + */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, }; /* ARM Cortex-A53 HW events mapping. */ @@ -258,6 +269,15 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, @@ -523,12 +543,6 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { .attrs = armv8_pmuv3_format_attrs, }; -static const struct attribute_group *armv8_pmuv3_attr_groups[] = { - &armv8_pmuv3_events_attr_group, - &armv8_pmuv3_format_attr_group, - NULL, -}; - /* * Perf Events' indices */ @@ -905,9 +919,22 @@ static void armv8pmu_reset(void *info) static int armv8_pmuv3_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + int hw_event_id; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + + hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); + if (hw_event_id < 0) + return hw_event_id; + + /* disable micro/arch events not supported by this PMU */ + if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && + !test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return -EOPNOTSUPP; + } + + return hw_event_id; } static int armv8_a53_map_event(struct perf_event *event) @@ -985,7 +1012,10 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -994,7 +1024,10 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1003,7 +1036,10 @@ static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1012,7 +1048,10 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a72"; cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1021,7 +1060,10 @@ static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cavium_thunder"; cpu_pmu->map_event = armv8_thunder_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1030,7 +1072,10 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_brcm_vulcan"; cpu_pmu->map_event = armv8_vulcan_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1044,21 +1089,32 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {}, }; +/* + * Non DT systems have their micro/arch events probed at run-time. + * A fairly complete list of generic events are provided and ones that + * aren't supported by the current PMU are disabled. + */ +static const struct pmu_probe_info armv8_pmu_probe_table[] = { + PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */ + { /* sentinel value */ } +}; + static int armv8_pmu_device_probe(struct platform_device *pdev) { - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); + if (acpi_disabled) + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, + NULL); + + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, + armv8_pmu_probe_table); } static struct platform_driver armv8_pmu_driver = { .driver = { - .name = "armv8-pmu", + .name = ARMV8_PMU_PDEV_NAME, .of_match_table = armv8_pmu_of_device_ids, }, .probe = armv8_pmu_device_probe, }; -static int __init register_armv8_pmu_driver(void) -{ - return platform_driver_register(&armv8_pmu_driver); -} -device_initcall(register_armv8_pmu_driver); +builtin_platform_driver(armv8_pmu_driver); diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 37e47a9d617e..d1731bf977ef 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/module.h> +#include <linux/kallsyms.h> #include <asm/kprobes.h> #include <asm/insn.h> #include <asm/sections.h> @@ -122,7 +123,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) static bool __kprobes is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) { - while (scan_start > scan_end) { + while (scan_start >= scan_end) { /* * atomic region starts from exclusive load and ends with * exclusive store. @@ -142,33 +143,30 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) { enum kprobe_insn decoded; kprobe_opcode_t insn = le32_to_cpu(*addr); - kprobe_opcode_t *scan_start = addr - 1; - kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; -#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) - struct module *mod; -#endif - - if (addr >= (kprobe_opcode_t *)_text && - scan_end < (kprobe_opcode_t *)_text) - scan_end = (kprobe_opcode_t *)_text; -#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) - else { - preempt_disable(); - mod = __module_address((unsigned long)addr); - if (mod && within_module_init((unsigned long)addr, mod) && - !within_module_init((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->init_layout.base; - else if (mod && within_module_core((unsigned long)addr, mod) && - !within_module_core((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->core_layout.base; - preempt_enable(); + kprobe_opcode_t *scan_end = NULL; + unsigned long size = 0, offset = 0; + + /* + * If there's a symbol defined in front of and near enough to + * the probe address assume it is the entry point to this + * code and use it to further limit how far back we search + * when determining if we're in an atomic sequence. If we could + * not find any symbol skip the atomic test altogether as we + * could otherwise end up searching irrelevant text/literals. + * KPROBES depends on KALLSYMS so this last case should never + * happen. + */ + if (kallsyms_lookup_size_offset((unsigned long) addr, &size, &offset)) { + if (offset < (MAX_ATOMIC_CONTEXT_SIZE*sizeof(kprobe_opcode_t))) + scan_end = addr - (offset / sizeof(kprobe_opcode_t)); + else + scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; } -#endif decoded = arm_probe_decode_insn(insn, asi); - if (decoded == INSN_REJECTED || - is_probed_address_atomic(scan_start, scan_end)) - return INSN_REJECTED; + if (decoded != INSN_REJECTED && scan_end) + if (is_probed_address_atomic(addr - 1, scan_end)) + return INSN_REJECTED; return decoded; } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c6b0f40620d8..f5077ea7af6d 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -19,7 +19,7 @@ #include <linux/kasan.h> #include <linux/kernel.h> #include <linux/kprobes.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/slab.h> #include <linux/stop_machine.h> #include <linux/stringify.h> @@ -31,7 +31,7 @@ #include <asm/insn.h> #include <asm/uaccess.h> #include <asm/irq.h> -#include <asm-generic/sections.h> +#include <asm/sections.h> #include "decode-insn.h" @@ -166,13 +166,18 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * The D-flag (Debug mask) is set (masked) upon debug exception entry. - * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive - * probe i.e. when probe hit from kprobe handler context upon - * executing the pre/post handlers. In this case we return with - * D-flag clear so that single-stepping can be carried-out. - * - * Leave D-flag set in all other cases. + * When PSTATE.D is set (masked), then software step exceptions can not be + * generated. + * SPSR's D bit shows the value of PSTATE.D immediately before the + * exception was taken. PSTATE.D is set while entering into any exception + * mode, however software clears it for any normal (none-debug-exception) + * mode in the exception entry. Therefore, when we are entering into kprobe + * breakpoint handler from any normal mode then SPSR.D bit is already + * cleared, however it is set when we are entering from any debug exception + * mode. + * Since we always need to generate single step exception after a kprobe + * breakpoint exception therefore we need to clear it unconditionally, when + * we become sure that the current breakpoint exception is for kprobe. */ static void __kprobes spsr_set_debug_flag(struct pt_regs *regs, int mask) @@ -245,10 +250,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, set_ss_context(kcb, slot); /* mark pending ss */ - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 0); - else - WARN_ON(regs->pstate & PSR_D_BIT); + spsr_set_debug_flag(regs, 0); /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); @@ -333,8 +335,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) BUG(); kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 1); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); @@ -457,9 +457,6 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) kprobes_restore_local_irqflag(kcb, regs); kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 1); - post_kprobe_handler(kcb, regs); } @@ -543,9 +540,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) bool arch_within_kprobe_blacklist(unsigned long addr) { - extern char __idmap_text_start[], __idmap_text_end[]; - extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - if ((addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6cd2612236dc..a4f5f766af08 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -202,7 +202,7 @@ void show_regs(struct pt_regs * regs) static void tls_thread_flush(void) { - asm ("msr tpidr_el0, xzr"); + write_sysreg(0, tpidr_el0); if (is_compat_task()) { current->thread.tp_value = 0; @@ -213,7 +213,7 @@ static void tls_thread_flush(void) * with a stale shadow state during context switch. */ barrier(); - asm ("msr tpidrro_el0, xzr"); + write_sysreg(0, tpidrro_el0); } } @@ -253,7 +253,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * Read the current TLS pointer from tpidr_el0 as it may be * out-of-sync with the saved value. */ - asm("mrs %0, tpidr_el0" : "=r" (*task_user_tls(p))); + *task_user_tls(p) = read_sysreg(tpidr_el0); if (stack_start) { if (is_compat_thread(task_thread_info(p))) @@ -289,17 +289,15 @@ static void tls_thread_switch(struct task_struct *next) { unsigned long tpidr, tpidrro; - asm("mrs %0, tpidr_el0" : "=r" (tpidr)); + tpidr = read_sysreg(tpidr_el0); *task_user_tls(current) = tpidr; tpidr = *task_user_tls(next); tpidrro = is_compat_thread(task_thread_info(next)) ? next->thread.tp_value : 0; - asm( - " msr tpidr_el0, %0\n" - " msr tpidrro_el0, %1" - : : "r" (tpidr), "r" (tpidrro)); + write_sysreg(tpidr, tpidr_el0); + write_sysreg(tpidrro, tpidrro_el0); } /* Restore the UAO state depending on next's addr_limit */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 51b73cdde287..ce704a4aeadd 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -34,7 +34,7 @@ ENTRY(arm64_relocate_new_kernel) /* Setup the list loop variables. */ mov x17, x1 /* x17 = kimage_start */ mov x16, x0 /* x16 = kimage_head */ - dcache_line_size x15, x0 /* x15 = dcache line size */ + raw_dcache_line_size x15, x0 /* x15 = dcache line size */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 536dce22fe76..f534f492a268 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -206,10 +206,15 @@ static void __init request_standard_resources(void) for_each_memblock(memory, region) { res = alloc_bootmem_low(sizeof(*res)); - res->name = "System RAM"; + if (memblock_is_nomap(region)) { + res->name = "reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + } else { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + } res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); @@ -228,7 +233,7 @@ void __init setup_arch(char **cmdline_p) { pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); - sprintf(init_utsname()->machine, ELF_PLATFORM); + sprintf(init_utsname()->machine, UTS_MACHINE); init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a8eafdbc7cb8..404dd67080b9 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -402,15 +402,31 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) { - if (thread_flags & _TIF_SIGPENDING) - do_signal(regs); - - if (thread_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } - - if (thread_flags & _TIF_FOREIGN_FPSTATE) - fpsimd_restore_current_state(); + /* + * The assembly code enters us with IRQs off, but it hasn't + * informed the tracing code of that for efficiency reasons. + * Update the trace code with the current status. + */ + trace_hardirqs_off(); + do { + if (thread_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + + if (thread_flags & _TIF_SIGPENDING) + do_signal(regs); + + if (thread_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + } + local_irq_disable(); + thread_flags = READ_ONCE(current_thread_info()->flags); + } while (thread_flags & _TIF_WORK_MASK); } diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index ccf79d849e0a..b8799e7c79de 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -73,10 +73,9 @@ ENTRY(__cpu_suspend_enter) str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] /* find the mpidr_hash */ - ldr x1, =sleep_save_stash - ldr x1, [x1] + ldr_l x1, sleep_save_stash mrs x7, mpidr_el1 - ldr x9, =mpidr_hash + adr_l x9, mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] /* * Following code relies on the struct mpidr_hash @@ -95,36 +94,30 @@ ENTRY(__cpu_suspend_enter) mov x0, #1 ret ENDPROC(__cpu_suspend_enter) - .ltorg + .pushsection ".idmap.text", "ax" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ - adr_l lr, __enable_mmu /* __cpu_setup will return here */ - adr_l x27, _resume_switched /* __enable_mmu will branch here */ - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir - b __cpu_setup -ENDPROC(cpu_resume) - - .pushsection ".idmap.text", "ax" -_resume_switched: + bl __enable_mmu ldr x8, =_cpu_resume br x8 -ENDPROC(_resume_switched) +ENDPROC(cpu_resume) .ltorg .popsection ENTRY(_cpu_resume) mrs x1, mpidr_el1 - adrp x8, mpidr_hash - add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address - /* retrieve mpidr_hash members to compute the hash */ + adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address + + /* retrieve mpidr_hash members to compute the hash */ ldr x2, [x8, #MPIDR_HASH_MASK] ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 - /* x7 contains hash index, let's use it to grab context pointer */ + + /* x7 contains hash index, let's use it to grab context pointer */ ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3ff173e92582..d3f151cfd4a1 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -233,7 +233,7 @@ asmlinkage void secondary_start_kernel(void) * this CPU ticks all of those. If it doesn't, the CPU will * fail to come online. */ - verify_local_cpu_capabilities(); + check_local_cpu_capabilities(); if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); @@ -431,8 +431,19 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + /* + * Initialise the static keys early as they may be enabled by the + * cpufeature code. + */ + jump_label_init(); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); + /* + * Run the errata work around checks on the boot CPU, once we have + * initialised the cpu feature infrastructure from + * cpuinfo_store_boot_cpu() above. + */ + update_cpu_errata_workarounds(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) @@ -613,6 +624,7 @@ static void __init of_parse_and_init_cpus(void) } bootcpu_valid = true; + early_map_cpu_to_node(0, of_node_to_nid(dn)); /* * cpu_logical_map has already been diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 18a71bcd26ee..9a00eee9acc8 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -29,7 +29,8 @@ #include <asm/smp_plat.h> extern void secondary_holding_pen(void); -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; +volatile unsigned long __section(".mmuoff.data.read") +secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d9751a4769e7..c2efddfca18c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -43,6 +43,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; unsigned long irq_stack_ptr; + if (!tsk) + tsk = current; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -67,7 +70,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (tsk && tsk->ret_stack && + if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { /* * This is a case where function graph tracer has @@ -152,6 +155,27 @@ static int save_trace(struct stackframe *frame, void *d) return trace->nr_entries >= trace->max_entries; } +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + struct stack_trace_data data; + struct stackframe frame; + + data.trace = trace; + data.skip = trace->skip; + data.no_sched_functions = 0; + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif + + walk_stackframe(current, &frame, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { struct stack_trace_data data; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index b616e365cee3..ad734142070d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -23,8 +23,8 @@ unsigned long *sleep_save_stash; * time the notifier runs debug exceptions might have been enabled already, * with HW breakpoints registers content still in an unknown state. */ -static void (*hw_breakpoint_restore)(void *); -void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +static int (*hw_breakpoint_restore)(unsigned int); +void __init cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) { /* Prevent multiple restore hook initializations */ if (WARN_ON(hw_breakpoint_restore)) @@ -34,6 +34,8 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) void notrace __cpu_suspend_exit(void) { + unsigned int cpu = smp_processor_id(); + /* * We are resuming from reset with the idmap active in TTBR0_EL1. * We must uninstall the idmap and restore the expected MMU @@ -45,7 +47,7 @@ void notrace __cpu_suspend_exit(void) * Restore per-cpu offset before any kernel * subsystem relying on it has a chance to run. */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + set_my_cpu_offset(per_cpu_offset(cpu)); /* * Restore HW breakpoint registers to sane values @@ -53,7 +55,7 @@ void notrace __cpu_suspend_exit(void) * through local_dbg_restore. */ if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + hw_breakpoint_restore(cpu); } /* diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 28c511b06edf..abaf582fc7a8 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -94,7 +94,7 @@ long compat_arm_syscall(struct pt_regs *regs) * See comment in tls_thread_flush. */ barrier(); - asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); + write_sysreg(regs->regs[0], tpidrro_el0); return 0; default: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e04f83873af7..5ff020f8fb7f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -142,6 +142,11 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) unsigned long irq_stack_ptr; int skip; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -151,11 +156,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) else irq_stack_ptr = 0; - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - - if (!tsk) - tsk = current; - if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; @@ -447,36 +447,29 @@ void cpu_enable_cache_maint_trap(void *__unused) : "=r" (res) \ : "r" (address), "i" (-EFAULT) ) -asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int ret; - - /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */ - if ((esr & 0x01fffc01) == 0x0012dc00) { - int rt = (esr >> 5) & 0x1f; - int crm = (esr >> 1) & 0x0f; + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + int ret = 0; - address = (rt == 31) ? 0 : regs->regs[rt]; + address = (rt == 31) ? 0 : regs->regs[rt]; - switch (crm) { - case 11: /* DC CVAU, gets promoted */ - __user_cache_maint("dc civac", address, ret); - break; - case 10: /* DC CVAC, gets promoted */ - __user_cache_maint("dc civac", address, ret); - break; - case 14: /* DC CIVAC */ - __user_cache_maint("dc civac", address, ret); - break; - case 5: /* IC IVAU */ - __user_cache_maint("ic ivau", address, ret); - break; - default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); - return; - } - } else { + switch (crm) { + case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_IC_IVAU: /* IC IVAU */ + __user_cache_maint("ic ivau", address, ret); + break; + default: force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); return; } @@ -487,6 +480,48 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val; + regs->pc += 4; +} + +struct sys64_hook { + unsigned int esr_mask; + unsigned int esr_val; + void (*handler)(unsigned int esr, struct pt_regs *regs); +}; + +static struct sys64_hook sys64_hooks[] = { + { + .esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL, + .handler = user_cache_maint_handler, + }, + { + /* Trap read access to CTR_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ, + .handler = ctr_read_handler, + }, + {}, +}; + +asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook; + + for (hook = sys64_hooks; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); +} + long compat_arm_syscall(struct pt_regs *regs); asmlinkage long do_ni_syscall(struct pt_regs *regs) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 076312b17d4f..a2c2478e7d78 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -18,12 +18,13 @@ * Author: Will Deacon <will.deacon@arm.com> */ -#include <linux/kernel.h> +#include <linux/cache.h> #include <linux/clocksource.h> #include <linux/elf.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/gfp.h> +#include <linux/kernel.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/signal.h> @@ -37,8 +38,7 @@ #include <asm/vdso_datapage.h> extern char vdso_start, vdso_end; -static unsigned long vdso_pages; -static struct page **vdso_pagelist; +static unsigned long vdso_pages __ro_after_init; /* * The vDSO data page. @@ -53,9 +53,9 @@ struct vdso_data *vdso_data = &vdso_data_store.data; /* * Create and map the vectors page for AArch32 tasks. */ -static struct page *vectors_page[1]; +static struct page *vectors_page[1] __ro_after_init; -static int alloc_vectors_page(void) +static int __init alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; @@ -88,7 +88,7 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - static struct vm_special_mapping spec = { + static const struct vm_special_mapping spec = { .name = "[vectors]", .pages = vectors_page, @@ -110,11 +110,19 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) } #endif /* CONFIG_COMPAT */ -static struct vm_special_mapping vdso_spec[2]; +static struct vm_special_mapping vdso_spec[2] __ro_after_init = { + { + .name = "[vvar]", + }, + { + .name = "[vdso]", + }, +}; static int __init vdso_init(void) { int i; + struct page **vdso_pagelist; if (memcmp(&vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); @@ -138,16 +146,8 @@ static int __init vdso_init(void) for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i); - /* Populate the special mapping structures */ - vdso_spec[0] = (struct vm_special_mapping) { - .name = "[vvar]", - .pages = vdso_pagelist, - }; - - vdso_spec[1] = (struct vm_special_mapping) { - .name = "[vdso]", - .pages = &vdso_pagelist[1], - }; + vdso_spec[0].pages = &vdso_pagelist[0]; + vdso_spec[1].pages = &vdso_pagelist[1]; return 0; } @@ -201,7 +201,7 @@ up_fail: */ void update_vsyscall(struct timekeeper *tk) { - u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; ++vdso_data->tb_seq_count; smp_wmb(); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 659963d40bb4..5ce9b2929e0d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -185,6 +185,25 @@ SECTIONS _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + + /* + * Data written with the MMU off but read with the MMU on requires + * cache lines to be invalidated, discarding up to a Cache Writeback + * Granule (CWG) of data from the cache. Keep the section that + * requires this type of maintenance to be in its own Cache Writeback + * Granule (CWG) area so the cache maintenance operations don't + * interfere with adjacent data. + */ + .mmuoff.data.write : ALIGN(SZ_2K) { + __mmuoff_data_start = .; + *(.mmuoff.data.write) + } + . = ALIGN(SZ_2K); + .mmuoff.data.read : { + *(.mmuoff.data.read) + __mmuoff_data_end = .; + } + PECOFF_EDATA_PADDING _edata = .; diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 7ce931565151..2726635dceba 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -46,10 +46,6 @@ alternative_if_not ARM64_HAS_VIRT_HOST_EXTN hvc #0 ldr lr, [sp], #16 ret -alternative_else +alternative_else_nop_endif b __vhe_hyp_call - nop - nop - nop -alternative_endif ENDPROC(__kvm_call_hyp) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e51367d159d0..f302fdb3a030 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -36,6 +36,7 @@ #include <asm/kvm_host.h> #include <asm/kvm_mmu.h> #include <asm/perf_event.h> +#include <asm/sysreg.h> #include <trace/events/kvm.h> @@ -67,11 +68,9 @@ static u32 get_ccsidr(u32 csselr) /* Make sure noone else changes CSSELR during this! */ local_irq_disable(); - /* Put value into CSSELR */ - asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + write_sysreg(csselr, csselr_el1); isb(); - /* Read result out of CCSIDR */ - asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr)); + ccsidr = read_sysreg(ccsidr_el1); local_irq_enable(); return ccsidr; @@ -174,9 +173,7 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u32 val; - asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); - p->regval = val; + p->regval = read_sysreg(dbgauthstatus_el1); return true; } } @@ -429,10 +426,7 @@ static void reset_wcr(struct kvm_vcpu *vcpu, static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 amair; - - asm volatile("mrs %0, amair_el1\n" : "=r" (amair)); - vcpu_sys_reg(vcpu, AMAIR_EL1) = amair; + vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1); } static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -456,8 +450,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; - asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr)); - /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN + pmcr = read_sysreg(pmcr_el0); + /* + * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN * except PMCR.E resetting to zero. */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) @@ -557,9 +552,9 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (!(p->Op2 & 1)) - asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid)); + pmceid = read_sysreg(pmceid0_el0); else - asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid)); + pmceid = read_sysreg(pmceid1_el0); p->regval = pmceid; @@ -1833,11 +1828,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, static void get_##reg(struct kvm_vcpu *v, \ const struct sys_reg_desc *r) \ { \ - u64 val; \ - \ - asm volatile("mrs %0, " __stringify(reg) "\n" \ - : "=r" (val)); \ - ((struct sys_reg_desc *)r)->val = val; \ + ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \ } FUNCTION_INVARIANT(midr_el1) diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index ed90578fa120..46af7186bca6 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -26,6 +26,7 @@ #include <asm/kvm_host.h> #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/sysreg.h> #include <linux/init.h> #include "sys_regs.h" @@ -43,10 +44,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu, static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 actlr; - - asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr)); - vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr; + vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); } /* diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 4c1e700840b6..c3cd65e31814 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -29,14 +29,11 @@ * x1 - src */ ENTRY(copy_page) -alternative_if_not ARM64_HAS_NO_HW_PREFETCH - nop - nop -alternative_else +alternative_if ARM64_HAS_NO_HW_PREFETCH # Prefetch two cache lines ahead. prfm pldl1strm, [x1, #128] prfm pldl1strm, [x1, #256] -alternative_endif +alternative_else_nop_endif ldp x2, x3, [x1] ldp x4, x5, [x1, #16] @@ -52,11 +49,9 @@ alternative_endif 1: subs x18, x18, #128 -alternative_if_not ARM64_HAS_NO_HW_PREFETCH - nop -alternative_else +alternative_if ARM64_HAS_NO_HW_PREFETCH prfm pldl1strm, [x1, #384] -alternative_endif +alternative_else_nop_endif stnp x2, x3, [x0] ldp x2, x3, [x1] diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 07d7352d7c38..58b5a906ff78 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -105,19 +105,20 @@ ENTRY(__clean_dcache_area_pou) ENDPROC(__clean_dcache_area_pou) /* - * __inval_cache_range(start, end) - * - start - start address of region - * - end - end address of region + * __dma_inv_area(start, size) + * - start - virtual start address of region + * - size - size in question */ -ENTRY(__inval_cache_range) +__dma_inv_area: + add x1, x1, x0 /* FALLTHROUGH */ /* - * __dma_inv_range(start, end) - * - start - virtual start address of region - * - end - virtual end address of region + * __inval_cache_range(start, end) + * - start - start address of region + * - end - end address of region */ -__dma_inv_range: +ENTRY(__inval_cache_range) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -136,46 +137,43 @@ __dma_inv_range: dsb sy ret ENDPIPROC(__inval_cache_range) -ENDPROC(__dma_inv_range) +ENDPROC(__dma_inv_area) + +/* + * __clean_dcache_area_poc(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoC. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_poc) + /* FALLTHROUGH */ /* - * __dma_clean_range(start, end) + * __dma_clean_area(start, size) * - start - virtual start address of region - * - end - virtual end address of region + * - size - size in question */ -__dma_clean_range: - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x0, x0, x3 -1: -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc cvac, x0 -alternative_else - dc civac, x0 -alternative_endif - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy +__dma_clean_area: + dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -ENDPROC(__dma_clean_range) +ENDPIPROC(__clean_dcache_area_poc) +ENDPROC(__dma_clean_area) /* - * __dma_flush_range(start, end) + * __dma_flush_area(start, size) + * + * clean & invalidate D / U line + * * - start - virtual start address of region - * - end - virtual end address of region + * - size - size in question */ -ENTRY(__dma_flush_range) - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D / U line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy +ENTRY(__dma_flush_area) + dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__dma_flush_range) +ENDPIPROC(__dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -184,10 +182,9 @@ ENDPIPROC(__dma_flush_range) * - dir - DMA direction */ ENTRY(__dma_map_area) - add x1, x1, x0 cmp w2, #DMA_FROM_DEVICE - b.eq __dma_inv_range - b __dma_clean_range + b.eq __dma_inv_area + b __dma_clean_area ENDPIPROC(__dma_map_area) /* @@ -197,8 +194,7 @@ ENDPIPROC(__dma_map_area) * - dir - DMA direction */ ENTRY(__dma_unmap_area) - add x1, x1, x0 cmp w2, #DMA_TO_DEVICE - b.ne __dma_inv_range + b.ne __dma_inv_area ret ENDPIPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c4284c432ae8..bdacead5b802 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -20,6 +20,7 @@ #include <linux/gfp.h> #include <linux/acpi.h> #include <linux/bootmem.h> +#include <linux/cache.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/genalloc.h> @@ -30,7 +31,7 @@ #include <asm/cacheflush.h> -static int swiotlb __read_mostly; +static int swiotlb __ro_after_init; static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) @@ -168,7 +169,7 @@ static void *__dma_alloc(struct device *dev, size_t size, return ptr; /* remove any dirty cache lines on the kernel alias */ - __dma_flush_range(ptr, ptr + size); + __dma_flush_area(ptr, size); /* create a coherent mapping */ page = virt_to_page(ptr); @@ -387,7 +388,7 @@ static int __init atomic_pool_init(void) void *page_addr = page_address(page); memset(page_addr, 0, atomic_pool_size); - __dma_flush_range(page_addr, page_addr + atomic_pool_size); + __dma_flush_area(page_addr, atomic_pool_size); atomic_pool = gen_pool_create(PAGE_SHIFT, -1); if (!atomic_pool) @@ -548,7 +549,7 @@ fs_initcall(dma_debug_do_init); /* Thankfully, all cache ops are by VA so we can ignore phys here */ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) { - __dma_flush_range(virt, virt + PAGE_SIZE); + __dma_flush_area(virt, PAGE_SIZE); } static void *__iommu_alloc_attrs(struct device *dev, size_t size, diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 81acd4706878..c9f118cd812b 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -2,7 +2,7 @@ * Based on arch/arm/mm/extable.c */ -#include <linux/module.h> +#include <linux/extable.h> #include <linux/uaccess.h> int fixup_exception(struct pt_regs *regs) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 05d2bd776c69..53d9159662fe 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -18,7 +18,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/module.h> +#include <linux/extable.h> #include <linux/signal.h> #include <linux/mm.h> #include <linux/hardirq.h> @@ -251,8 +251,7 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, good_area: /* * Check that the permissions on the VMA allow for the fault which - * occurred. If we encountered a write or exec fault, we must have - * appropriate permissions, otherwise we allow any permission. + * occurred. */ if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; @@ -288,7 +287,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, esr)) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 43a76b07eb32..8377329d8c97 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -25,8 +25,6 @@ #include <asm/cachetype.h> #include <asm/tlbflush.h> -#include "mm.h" - void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index bbb7ee76e319..21c489bdeb4e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -23,6 +23,7 @@ #include <linux/swap.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/cache.h> #include <linux/mman.h> #include <linux/nodemask.h> #include <linux/initrd.h> @@ -34,6 +35,7 @@ #include <linux/dma-contiguous.h> #include <linux/efi.h> #include <linux/swiotlb.h> +#include <linux/vmalloc.h> #include <asm/boot.h> #include <asm/fixmap.h> @@ -47,16 +49,14 @@ #include <asm/tlb.h> #include <asm/alternative.h> -#include "mm.h" - /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() * executes, which assigns it its actual value. So use a default value * that cannot be mistaken for a real physical address. */ -s64 memstart_addr __read_mostly = -1; -phys_addr_t arm64_dma_phys_limit __read_mostly; +s64 memstart_addr __ro_after_init = -1; +phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_BLK_DEV_INITRD static int __init early_initrd(char *p) @@ -485,7 +485,12 @@ void free_initmem(void) { free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)), 0, "unused kernel"); - fixup_init(); + /* + * Unmap the __init region but leave the VM area in place. This + * prevents the region from being reused for kernel modules, which + * is not supported by kallsyms. + */ + unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h deleted file mode 100644 index 71fe98985455..000000000000 --- a/arch/arm64/mm/mm.h +++ /dev/null @@ -1,2 +0,0 @@ - -void fixup_init(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4989948d1feb..05615a3fdc6f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cache.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -42,11 +43,9 @@ #include <asm/memblock.h> #include <asm/mmu_context.h> -#include "mm.h" - u64 idmap_t0sz = TCR_T0SZ(VA_BITS); -u64 kimage_voffset __read_mostly; +u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); /* @@ -399,16 +398,6 @@ void mark_rodata_ro(void) section_size, PAGE_KERNEL_RO); } -void fixup_init(void) -{ - /* - * Unmap the __init region but leave the VM area in place. This - * prevents the region from being reused for kernel modules, which - * is not supported by kallsyms. - */ - unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); -} - static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma) { diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 5bb15eab6f00..778a985c8a70 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,6 +17,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#define pr_fmt(fmt) "NUMA: " fmt + #include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/memblock.h> @@ -24,6 +26,7 @@ #include <linux/of.h> #include <asm/acpi.h> +#include <asm/sections.h> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -38,10 +41,9 @@ static __init int numa_parse_early_param(char *opt) { if (!opt) return -EINVAL; - if (!strncmp(opt, "off", 3)) { - pr_info("%s\n", "NUMA turned off"); + if (!strncmp(opt, "off", 3)) numa_off = true; - } + return 0; } early_param("numa", numa_parse_early_param); @@ -93,7 +95,6 @@ void numa_clear_node(unsigned int cpu) */ static void __init setup_node_to_cpumask_map(void) { - unsigned int cpu; int node; /* setup nr_node_ids if not done yet */ @@ -106,11 +107,8 @@ static void __init setup_node_to_cpumask_map(void) cpumask_clear(node_to_cpumask_map[node]); } - for_each_possible_cpu(cpu) - set_cpu_numa_node(cpu, NUMA_NO_NODE); - /* cpumask_of_node() will now work */ - pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids); + pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); } /* @@ -118,18 +116,77 @@ static void __init setup_node_to_cpumask_map(void) */ void numa_store_cpu_info(unsigned int cpu) { - map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]); + map_cpu_to_node(cpu, cpu_to_node_map[cpu]); } void __init early_map_cpu_to_node(unsigned int cpu, int nid) { /* fallback to node 0 */ - if (nid < 0 || nid >= MAX_NUMNODES) + if (nid < 0 || nid >= MAX_NUMNODES || numa_off) nid = 0; cpu_to_node_map[cpu] = nid; + + /* + * We should set the numa node of cpu0 as soon as possible, because it + * has already been set up online before. cpu_to_node(0) will soon be + * called. + */ + if (!cpu) + set_cpu_numa_node(cpu, nid); +} + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init early_cpu_to_node(int cpu) +{ + return cpu_to_node_map[cpu]; +} + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(from, to); } +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + int nid = early_cpu_to_node(cpu); + + return memblock_virt_alloc_try_nid(size, align, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + /** * numa_add_memblk - Set node id to memblk * @nid: NUMA node ID of the new memblk @@ -145,13 +202,13 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) ret = memblock_set_node(start, (end - start), &memblock.memory, nid); if (ret < 0) { - pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n", + pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n", start, (end - 1), nid); return ret; } node_set(nid, numa_nodes_parsed); - pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n", + pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n", start, (end - 1), nid); return ret; } @@ -166,19 +223,18 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start_pfn << PAGE_SHIFT, - (end_pfn << PAGE_SHIFT) - 1); + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); nd = __va(nd_pa); /* report and initialize */ - pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n", + pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n", nd_pa, nd_pa + nd_size - 1); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (tnid != nid) - pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid); + pr_info("NODE_DATA(%d) on node %d\n", nid, tnid); node_data[nid] = nd; memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); @@ -235,8 +291,7 @@ static int __init numa_alloc_distance(void) numa_distance[i * numa_distance_cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - pr_debug("NUMA: Initialized distance table, cnt=%d\n", - numa_distance_cnt); + pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt); return 0; } @@ -257,20 +312,20 @@ static int __init numa_alloc_distance(void) void __init numa_set_distance(int from, int to, int distance) { if (!numa_distance) { - pr_warn_once("NUMA: Warning: distance table not allocated yet\n"); + pr_warn_once("Warning: distance table not allocated yet\n"); return; } if (from >= numa_distance_cnt || to >= numa_distance_cnt || from < 0 || to < 0) { - pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", + pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n", from, to, distance); return; } if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { - pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", from, to, distance); return; } @@ -297,7 +352,7 @@ static int __init numa_register_nodes(void) /* Check that valid nid is set to memblks */ for_each_memblock(memory, mblk) if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) { - pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", mblk->nid, mblk->base, mblk->base + mblk->size - 1); return -EINVAL; @@ -335,8 +390,10 @@ static int __init numa_init(int (*init_func)(void)) if (ret < 0) return ret; - if (nodes_empty(numa_nodes_parsed)) + if (nodes_empty(numa_nodes_parsed)) { + pr_info("No NUMA configuration found\n"); return -EINVAL; + } ret = numa_register_nodes(); if (ret < 0) @@ -344,10 +401,6 @@ static int __init numa_init(int (*init_func)(void)) setup_node_to_cpumask_map(); - /* init boot processor */ - cpu_to_node_map[0] = 0; - map_cpu_to_node(0, 0); - return 0; } @@ -367,10 +420,8 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ - else - pr_info("No NUMA configuration found\n"); - pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", + 0LLU, PFN_PHYS(max_pfn) - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index ca6d268e3313..8def55e7249b 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -139,4 +139,43 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) __pgprot(0), __pgprot(PTE_VALID)); } -#endif +#ifdef CONFIG_HIBERNATION +/* + * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function + * is used to determine if a linear map page has been marked as not-valid by + * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. + * This is based on kern_addr_valid(), which almost does what we need. + * + * Because this is only called on the kernel linear map, p?d_sect() implies + * p?d_present(). When debug_pagealloc is enabled, sections mappings are + * disabled. + */ +bool kernel_page_present(struct page *page) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr = (unsigned long)page_address(page); + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return false; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return false; + if (pud_sect(*pud)) + return true; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return false; + if (pmd_sect(*pmd)) + return true; + + pte = pte_offset_kernel(pmd, addr); + return pte_valid(*pte); +} +#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index ae11d4e03d0e..371c5f03a170 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,8 +26,6 @@ #include <asm/page.h> #include <asm/tlbflush.h> -#include "mm.h" - static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9d37e967fa19..352c73b6a59e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -83,6 +83,7 @@ ENDPROC(cpu_do_suspend) * * x0: Address of context pointer */ + .pushsection ".idmap.text", "ax" ENTRY(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] @@ -120,6 +121,7 @@ ENTRY(cpu_do_resume) isb ret ENDPROC(cpu_do_resume) + .popsection #endif /* @@ -134,17 +136,12 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb -alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 - ret - nop - nop - nop -alternative_else +alternative_if ARM64_WORKAROUND_CAVIUM_27456 ic iallu dsb nsh isb +alternative_else_nop_endif ret -alternative_endif ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" @@ -181,6 +178,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) * Initialise the processor for turning the MMU on. Return in x0 the * value of the SCTLR_EL1 register. */ + .pushsection ".idmap.text", "ax" ENTRY(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -266,3 +264,4 @@ ENDPROC(__cpu_setup) crval: .word 0xfcffffff // clear .word 0x34d5d91d // set + .popsection diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 5a9b87b7993e..c1eb1ff7c800 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1619,6 +1619,12 @@ static int __init octeon_irq_init_gpio( return -ENOMEM; } + /* + * Clear the OF_POPULATED flag that was set by of_irq_init() + * so that all GPIO devices will be probed. + */ + of_node_clear_flag(gpio_node, OF_POPULATED); + return 0; } /* diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index d7b99180c6e1..1910223a9c02 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -146,7 +146,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,cpu_mask_nr_tbl + # open coded PTR_LA t1, cpu_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, cpu_mask_nr_tbl + lui t1, %hi(cpu_mask_nr_tbl) + addiu t1, %lo(cpu_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, cpu_mask_nr_tbl + .set push + .set noat + lui t1, %highest(cpu_mask_nr_tbl) + lui AT, %hi(cpu_mask_nr_tbl) + daddiu t1, t1, %higher(cpu_mask_nr_tbl) + daddiu AT, AT, %lo(cpu_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 1: lw t2,(t1) nop and t2,t0 @@ -195,7 +213,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,asic_mask_nr_tbl + # open coded PTR_LA t1,asic_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, asic_mask_nr_tbl + lui t1, %hi(asic_mask_nr_tbl) + addiu t1, %lo(asic_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, asic_mask_nr_tbl + .set push + .set noat + lui t1, %highest(asic_mask_nr_tbl) + lui AT, %hi(asic_mask_nr_tbl) + daddiu t1, t1, %higher(asic_mask_nr_tbl) + daddiu AT, AT, %lo(asic_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 2: lw t2,(t1) nop and t2,t0 diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 58e7874e9347..4fafeefe65c2 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -458,10 +458,21 @@ static inline int mips_cm_revision(void) static inline unsigned int mips_cm_max_vp_width(void) { extern int smp_num_siblings; + uint32_t cfg; if (mips_cm_revision() >= CM_REV_CM3) return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; + if (mips_cm_present()) { + /* + * We presume that all cores in the system will have the same + * number of VP(E)s, and if that ever changes then this will + * need revisiting. + */ + cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE_MSK; + return (cfg >> CM_GCR_Cx_CONFIG_PVPE_SHF) + 1; + } + if (IS_ENABLED(CONFIG_SMP)) return smp_num_siblings; diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index def9d8d13f6e..7dd2dd47909a 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -660,8 +660,6 @@ #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) -/* FTLB probability bits for R6 */ -#define MIPS_CONF7_FTLBP_SHIFT (18) /* WatchLo* register definitions */ #define MIPS_WATCHLO_IRW (_ULCAST_(0x7) << 0) diff --git a/arch/mips/include/asm/uprobes.h b/arch/mips/include/asm/uprobes.h index 34c325c674c4..70a4a2f173ff 100644 --- a/arch/mips/include/asm/uprobes.h +++ b/arch/mips/include/asm/uprobes.h @@ -36,7 +36,6 @@ struct arch_uprobe { unsigned long resume_epc; u32 insn[2]; u32 ixol[2]; - union mips_instruction orig_inst[MAX_UINSN_BYTES / 4]; }; struct arch_uprobe_task { diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index a88d44247cc8..dd3175442c9e 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -352,7 +352,12 @@ __setup("nohtw", htw_disable); static int mips_ftlb_disabled; static int mips_has_ftlb_configured; -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable); +enum ftlb_flags { + FTLB_EN = 1 << 0, + FTLB_SET_PROB = 1 << 1, +}; + +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags); static int __init ftlb_disable(char *s) { @@ -371,8 +376,6 @@ static int __init ftlb_disable(char *s) return 1; } - back_to_back_c0_hazard(); - config4 = read_c0_config4(); /* Check that FTLB has been disabled */ @@ -531,7 +534,7 @@ static unsigned int calculate_ftlb_probability(struct cpuinfo_mips *c) return 3; } -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) { unsigned int config; @@ -542,33 +545,33 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) case CPU_P6600: /* proAptiv & related cores use Config6 to enable the FTLB */ config = read_c0_config6(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); - if (enable) - /* Enable FTLB */ - write_c0_config6(config | - (calculate_ftlb_probability(c) - << MIPS_CONF6_FTLBP_SHIFT) - | MIPS_CONF6_FTLBEN); + + if (flags & FTLB_EN) + config |= MIPS_CONF6_FTLBEN; else - /* Disable FTLB */ - write_c0_config6(config & ~MIPS_CONF6_FTLBEN); + config &= ~MIPS_CONF6_FTLBEN; + + if (flags & FTLB_SET_PROB) { + config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); + config |= calculate_ftlb_probability(c) + << MIPS_CONF6_FTLBP_SHIFT; + } + + write_c0_config6(config); + back_to_back_c0_hazard(); break; case CPU_I6400: - /* I6400 & related cores use Config7 to configure FTLB */ - config = read_c0_config7(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF7_FTLBP_SHIFT); - write_c0_config7(config | (calculate_ftlb_probability(c) - << MIPS_CONF7_FTLBP_SHIFT)); - break; + /* There's no way to disable the FTLB */ + if (!(flags & FTLB_EN)) + return 1; + return 0; case CPU_LOONGSON3: /* Flush ITLB, DTLB, VTLB and FTLB */ write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB | LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB); /* Loongson-3 cores use Config6 to enable the FTLB */ config = read_c0_config6(); - if (enable) + if (flags & FTLB_EN) /* Enable FTLB */ write_c0_config6(config & ~MIPS_CONF6_FTLBDIS); else @@ -788,6 +791,7 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) PAGE_SIZE, config4); /* Switch FTLB off */ set_ftlb_enable(c, 0); + mips_ftlb_disabled = 1; break; } c->tlbsizeftlbsets = 1 << @@ -852,7 +856,7 @@ static void decode_configs(struct cpuinfo_mips *c) c->scache.flags = MIPS_CACHE_NOT_PRESENT; /* Enable FTLB if present and not disabled */ - set_ftlb_enable(c, !mips_ftlb_disabled); + set_ftlb_enable(c, mips_ftlb_disabled ? 0 : FTLB_EN); ok = decode_config0(c); /* Read Config registers. */ BUG_ON(!ok); /* Arch spec violation! */ @@ -902,6 +906,9 @@ static void decode_configs(struct cpuinfo_mips *c) } } + /* configure the FTLB write probability */ + set_ftlb_enable(c, (mips_ftlb_disabled ? 0 : FTLB_EN) | FTLB_SET_PROB); + mips_probe_watch_registers(c); #ifndef CONFIG_MIPS_CPS diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 17326a90d53c..dc0b29612891 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -142,9 +142,8 @@ LEAF(__r4k_wait) PTR_LA k1, __r4k_wait ori k0, 0x1f /* 32 byte rollback region */ xori k0, 0x1f - bne k0, k1, 9f + bne k0, k1, \handler MTC0 k0, CP0_EPC -9: .set pop .endm diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 3be0e6ba2797..0d57909d9026 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -764,7 +764,6 @@ static void __init arch_mem_init(char **cmdline_p) device_tree_init(); sparse_init(); plat_swiotlb_setup(); - paging_init(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); /* Tell bootmem about cma reserved memblock section */ @@ -877,6 +876,7 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); cpu_cache_init(); + paging_init(); } unsigned long kernelsp[NR_CPUS]; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index e9d9fc6c754c..6183ad84cc73 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -513,7 +513,7 @@ static void cps_cpu_die(unsigned int cpu) * in which case the CPC will refuse to power down the core. */ do { - mips_cm_lock_other(core, vpe_id); + mips_cm_lock_other(core, 0); mips_cpc_lock_other(core); stat = read_cpc_co_stat_conf(); stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK; diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 1149b30c9aeb..4c7c1558944a 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -157,7 +157,6 @@ bool is_trap_insn(uprobe_opcode_t *insn) int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - union mips_instruction insn; /* * Now find the EPC where to resume after the breakpoint has been @@ -168,10 +167,10 @@ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) unsigned long epc; epc = regs->cp0_epc; - __compute_return_epc_for_insn(regs, insn); + __compute_return_epc_for_insn(regs, + (union mips_instruction) aup->insn[0]); aup->resume_epc = regs->cp0_epc; } - utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; regs->cp0_epc = current->utask->xol_vaddr; @@ -257,7 +256,7 @@ unsigned long arch_uretprobe_hijack_return_addr( ra = regs->regs[31]; /* Replace the return address with the trampoline address */ - regs->regs[31] = ra; + regs->regs[31] = trampoline_vaddr; return ra; } @@ -280,24 +279,6 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } -/** - * set_orig_insn - Restore the original instruction. - * @mm: the probed process address space. - * @auprobe: arch specific probepoint information. - * @vaddr: the virtual address to insert the opcode. - * - * For mm @mm, restore the original opcode (opcode) at @vaddr. - * Return 0 (success) or a negative errno. - * - * This overrides the weak version in kernel/events/uprobes.c. - */ -int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, - unsigned long vaddr) -{ - return uprobe_write_opcode(mm, vaddr, - *(uprobe_opcode_t *)&auprobe->orig_inst[0].word); -} - void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len) { diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 2c3749d98f04..72f7478ee068 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -440,6 +440,9 @@ static inline void mem_init_free_highmem(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; + if (cpu_has_dc_aliases) + return; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { struct page *page = pfn_to_page(tmp); diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index ec5b21678fad..7e7364b0501e 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -39,6 +39,9 @@ #include <linux/console.h> #endif +#define ROCIT_CONFIG_GEN0 0x1f403000 +#define ROCIT_CONFIG_GEN0_PCI_IOCU BIT(7) + extern void malta_be_init(void); extern int malta_be_handler(struct pt_regs *regs, int is_fixup); @@ -107,6 +110,8 @@ static void __init fd_activate(void) static int __init plat_enable_iocoherency(void) { int supported = 0; + u32 cfg; + if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) { if (BONITO_PCICACHECTRL & BONITO_PCICACHECTRL_CPUCOH_PRES) { BONITO_PCICACHECTRL |= BONITO_PCICACHECTRL_CPUCOH_EN; @@ -129,7 +134,8 @@ static int __init plat_enable_iocoherency(void) } else if (mips_cm_numiocu() != 0) { /* Nothing special needs to be done to enable coherency */ pr_info("CMP IOCU detected\n"); - if ((*(unsigned int *)0xbf403000 & 0x81) != 0x81) { + cfg = __raw_readl((u32 *)CKSEG1ADDR(ROCIT_CONFIG_GEN0)); + if (!(cfg & ROCIT_CONFIG_GEN0_PCI_IOCU)) { pr_crit("IOCU OPERATION DISABLED BY SWITCH - DEFAULTING TO SW IO COHERENCY\n"); return 0; } diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 8c2a8c937540..c1263fc390db 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,6 +25,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 26d9e7726867..ce2233f7e662 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask); int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) +void smp_fill_in_cpu_possible_map(void); void smp_fill_in_sib_core_maps(void); void cpu_play_dead(void); @@ -72,6 +73,7 @@ void __cpu_die(unsigned int cpu); #define smp_fill_in_sib_core_maps() do { } while (0) #define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_pmu() do { } while (0) +#define smp_fill_in_cpu_possible_map() do { } while (0) #endif /* !(CONFIG_SMP) */ diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 599f1207eed2..6b7331d198e9 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -31,6 +31,7 @@ #include <linux/initrd.h> #include <linux/module.h> #include <linux/start_kernel.h> +#include <linux/bootmem.h> #include <asm/io.h> #include <asm/processor.h> @@ -50,6 +51,8 @@ #include <asm/elf.h> #include <asm/mdesc.h> #include <asm/cacheflush.h> +#include <asm/dma.h> +#include <asm/irq.h> #ifdef CONFIG_IP_PNP #include <net/ipconfig.h> @@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void) pause_patch(); } +void __init alloc_irqstack_bootmem(void) +{ + unsigned int i, node; + + for_each_possible_cpu(i) { + node = cpu_to_node(i); + + softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + } +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -650,6 +669,13 @@ void __init setup_arch(char **cmdline_p) paging_init(); init_sparc64_elf_hwcap(); + smp_fill_in_cpu_possible_map(); + /* + * Once the OF device tree and MDESC have been setup and nr_cpus has + * been parsed, we know the list of possible cpus. Therefore we can + * allocate the IRQ stacks. + */ + alloc_irqstack_bootmem(); } extern int stop_a_enabled; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8a6151a628ce..d3035ba6cd31 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void) xcall_deliver_impl = hypervisor_xcall_deliver; } +void __init smp_fill_in_cpu_possible_map(void) +{ + int possible_cpus = num_possible_cpus(); + int i; + + if (possible_cpus > nr_cpu_ids) + possible_cpus = nr_cpu_ids; + + for (i = 0; i < possible_cpus; i++) + set_cpu_possible(i, true); + for (; i < NR_CPUS; i++) + set_cpu_possible(i, false); +} + void smp_fill_in_sib_core_maps(void) { unsigned int i; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index e16fdd28a931..3f291d8c57f7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -484,6 +484,7 @@ good_area: tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + mm_rss *= REAL_HPAGE_PER_HPAGE; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 65457c9f1365..7ac6b62fb7c1 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1160,7 +1160,7 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } -static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) +static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; @@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) return i; } -static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, - int index) +static void __init find_numa_latencies_for_group(struct mdesc_handle *md, + u64 grp, int index) { u64 arc; @@ -2081,7 +2081,6 @@ void __init paging_init(void) { unsigned long end_pfn, shift, phys_base; unsigned long real_end, i; - int node; setup_page_offset(); @@ -2250,21 +2249,6 @@ void __init paging_init(void) /* Setup bootmem... */ last_valid_pfn = end_pfn = bootmem_init(phys_base); - /* Once the OF device tree and MDESC have been setup, we know - * the list of possible cpus. Therefore we can allocate the - * IRQ stacks. - */ - for_each_possible_cpu(i) { - node = cpu_to_node(i); - - softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - } - kernel_physical_mapping_init(); { diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 3659d37b4d81..c56a195c9071 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { - if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.thp_pte_count++; - else - mm->context.thp_pte_count--; + /* + * Note that this routine only sets pmds for THP pages. + * Hugetlb pages are handled elsewhere. We need to check + * for huge zero page. Huge zero pages are like hugetlb + * pages in that there is no RSS, but there is the need + * for TSB entries. So, huge zero page counts go into + * hugetlb_pte_count. + */ + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { + if (is_huge_zero_page(pmd_page(pmd))) + mm->context.hugetlb_pte_count++; + else + mm->context.thp_pte_count++; + } else { + if (is_huge_zero_page(pmd_page(orig))) + mm->context.hugetlb_pte_count--; + else + mm->context.thp_pte_count--; + } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +/* + * This routine is only called when splitting a THP + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + /* + * set_pmd_at() will not be called in a way to decrement + * thp_pte_count when splitting a THP, so do it now. + * Sanity check pmd before doing the actual decrement. + */ + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && + !is_huge_zero_page(pmd_page(entry))) + (vma->vm_mm)->context.thp_pte_count--; } void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 6725ed45580e..f2b77112e9d8 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -469,8 +469,10 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long mm_rss = get_mm_rss(mm); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long total_huge_pte_count; + unsigned long saved_hugetlb_pte_count; + unsigned long saved_thp_pte_count; #endif unsigned int i; @@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - total_huge_pte_count = mm->context.hugetlb_pte_count + - mm->context.thp_pte_count; + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; + saved_thp_pte_count = mm->context.thp_pte_count; mm->context.hugetlb_pte_count = 0; mm->context.thp_pte_count = 0; + + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* If this is fork, inherit the parent's TSB size. We would * grow it to that size on the first page fault anyways. */ - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(total_huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, + (saved_hugetlb_pte_count + saved_thp_pte_count) * + REAL_HPAGE_PER_HPAGE); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d172c619c449..02fff3ebfb87 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1002,7 +1002,6 @@ ENTRY(error_entry) testb $3, CS+8(%rsp) jz .Lerror_kernelspace -.Lerror_entry_from_usermode_swapgs: /* * We entered from user mode or we're pretending to have entered * from user mode due to an IRET fault. @@ -1045,7 +1044,8 @@ ENTRY(error_entry) * gsbase and proceed. We'll fix up the exception and land in * .Lgs_change's error handler with kernel gsbase. */ - jmp .Lerror_entry_from_usermode_swapgs + SWAPGS + jmp .Lerror_entry_done .Lbstep_iret: /* Fix truncated RIP */ diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 4f741192846d..3dab75f2a673 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -22,7 +22,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); - if (hdr->e_type != ET_DYN) + if (GET_LE(&hdr->e_type) != ET_DYN) fail("input is not a shared object\n"); /* Walk the segment table. */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..dee8a70382ba 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { - this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); + this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe()); } /* Set in this cpu's CR4. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 809eda03c527..bcc9ccc220c9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -804,21 +804,20 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) - return; + if (have_cpuid_p()) { + cpu_detect(c); + get_cpu_vendor(c); + get_cpu_cap(c); - cpu_detect(c); - get_cpu_vendor(c); - get_cpu_cap(c); - - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); - c->cpu_index = 0; - filter_cpuid_features(c, false); + c->cpu_index = 0; + filter_cpuid_features(c, false); - if (this_cpu->c_bsp_init) - this_cpu->c_bsp_init(c); + if (this_cpu->c_bsp_init) + this_cpu->c_bsp_init(c); + } setup_force_cpu_cap(X86_FEATURE_ALWAYS); fpu__init_system(c); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa60f5f5a16..98c9cd6f3b5d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1137,9 +1137,7 @@ void __init setup_arch(char **cmdline_p) * auditing all the early-boot CR4 manipulation would be needed to * rule it out. */ - if (boot_cpu_data.cpuid_level >= 0) - /* A CPU has %cr4 if and only if it has CPUID. */ - mmu_cr4_features = __read_cr4(); + mmu_cr4_features = __read_cr4_safe(); memblock_set_current_limit(get_max_mapped()); diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 80cc7c089a15..e1d5ea6d5e40 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -94,54 +94,50 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) return to_acpi_device(acpi_desc->dev); } -static int xlat_status(void *buf, unsigned int cmd) +static int xlat_status(void *buf, unsigned int cmd, u32 status) { struct nd_cmd_clear_error *clear_err; struct nd_cmd_ars_status *ars_status; - struct nd_cmd_ars_start *ars_start; - struct nd_cmd_ars_cap *ars_cap; u16 flags; switch (cmd) { case ND_CMD_ARS_CAP: - ars_cap = buf; - if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE) + if ((status & 0xffff) == NFIT_ARS_CAP_NONE) return -ENOTTY; /* Command failed */ - if (ars_cap->status & 0xffff) + if (status & 0xffff) return -EIO; /* No supported scan types for this range */ flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE; - if ((ars_cap->status >> 16 & flags) == 0) + if ((status >> 16 & flags) == 0) return -ENOTTY; break; case ND_CMD_ARS_START: - ars_start = buf; /* ARS is in progress */ - if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY) + if ((status & 0xffff) == NFIT_ARS_START_BUSY) return -EBUSY; /* Command failed */ - if (ars_start->status & 0xffff) + if (status & 0xffff) return -EIO; break; case ND_CMD_ARS_STATUS: ars_status = buf; /* Command failed */ - if (ars_status->status & 0xffff) + if (status & 0xffff) return -EIO; /* Check extended status (Upper two bytes) */ - if (ars_status->status == NFIT_ARS_STATUS_DONE) + if (status == NFIT_ARS_STATUS_DONE) return 0; /* ARS is in progress */ - if (ars_status->status == NFIT_ARS_STATUS_BUSY) + if (status == NFIT_ARS_STATUS_BUSY) return -EBUSY; /* No ARS performed for the current boot */ - if (ars_status->status == NFIT_ARS_STATUS_NONE) + if (status == NFIT_ARS_STATUS_NONE) return -EAGAIN; /* @@ -149,19 +145,19 @@ static int xlat_status(void *buf, unsigned int cmd) * agent wants the scan to stop. If we didn't overflow * then just continue with the returned results. */ - if (ars_status->status == NFIT_ARS_STATUS_INTR) { + if (status == NFIT_ARS_STATUS_INTR) { if (ars_status->flags & NFIT_ARS_F_OVERFLOW) return -ENOSPC; return 0; } /* Unknown status */ - if (ars_status->status >> 16) + if (status >> 16) return -EIO; break; case ND_CMD_CLEAR_ERROR: clear_err = buf; - if (clear_err->status & 0xffff) + if (status & 0xffff) return -EIO; if (!clear_err->cleared) return -EIO; @@ -172,6 +168,9 @@ static int xlat_status(void *buf, unsigned int cmd) break; } + /* all other non-zero status results in an error */ + if (status) + return -EIO; return 0; } @@ -186,10 +185,10 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nd_cmd_pkg *call_pkg = NULL; const char *cmd_name, *dimm_name; unsigned long cmd_mask, dsm_mask; + u32 offset, fw_status = 0; acpi_handle handle; unsigned int func; const u8 *uuid; - u32 offset; int rc, i; func = cmd; @@ -317,6 +316,15 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, out_obj->buffer.pointer + offset, out_size); offset += out_size; } + + /* + * Set fw_status for all the commands with a known format to be + * later interpreted by xlat_status(). + */ + if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR) + || (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR))) + fw_status = *(u32 *) out_obj->buffer.pointer; + if (offset + in_buf.buffer.length < buf_len) { if (i >= 1) { /* @@ -325,7 +333,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, */ rc = buf_len - offset - in_buf.buffer.length; if (cmd_rc) - *cmd_rc = xlat_status(buf, cmd); + *cmd_rc = xlat_status(buf, cmd, fw_status); } else { dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -335,7 +343,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, } else { rc = 0; if (cmd_rc) - *cmd_rc = xlat_status(buf, cmd); + *cmd_rc = xlat_status(buf, cmd, fw_status); } out: diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index a1f2aff33997..e023066e4215 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -45,7 +45,7 @@ static DEFINE_MUTEX(gpd_list_lock); * and checks that the PM domain pointer is a real generic PM domain. * Any failure results in NULL being returned. */ -struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev) +static struct generic_pm_domain *genpd_lookup_dev(struct device *dev) { struct generic_pm_domain *genpd = NULL, *gpd; @@ -586,7 +586,7 @@ static int __init genpd_poweroff_unused(void) } late_initcall(genpd_poweroff_unused); -#ifdef CONFIG_PM_SLEEP +#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_GENERIC_DOMAINS_OF) /** * pm_genpd_present - Check if the given PM domain has been initialized. @@ -606,6 +606,10 @@ static bool pm_genpd_present(const struct generic_pm_domain *genpd) return false; } +#endif + +#ifdef CONFIG_PM_SLEEP + static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, struct device *dev) { @@ -613,9 +617,8 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, } /** - * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. + * genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. - * @timed: True if latency measurements are allowed. * * Check if the given PM domain can be powered off (during system suspend or * hibernation) and do that if so. Also, in that case propagate to its masters. @@ -625,8 +628,7 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, * executed sequentially, so it is guaranteed that it will never run twice in * parallel). */ -static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd, - bool timed) +static void genpd_sync_poweroff(struct generic_pm_domain *genpd) { struct gpd_link *link; @@ -639,28 +641,26 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd, /* Choose the deepest state when suspending */ genpd->state_idx = genpd->state_count - 1; - genpd_power_off(genpd, timed); + genpd_power_off(genpd, false); genpd->status = GPD_STATE_POWER_OFF; list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_dec(link->master); - pm_genpd_sync_poweroff(link->master, timed); + genpd_sync_poweroff(link->master); } } /** - * pm_genpd_sync_poweron - Synchronously power on a PM domain and its masters. + * genpd_sync_poweron - Synchronously power on a PM domain and its masters. * @genpd: PM domain to power on. - * @timed: True if latency measurements are allowed. * * This function is only called in "noirq" and "syscore" stages of system power * transitions, so it need not acquire locks (all of the "noirq" callbacks are * executed sequentially, so it is guaranteed that it will never run twice in * parallel). */ -static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd, - bool timed) +static void genpd_sync_poweron(struct generic_pm_domain *genpd) { struct gpd_link *link; @@ -668,11 +668,11 @@ static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd, return; list_for_each_entry(link, &genpd->slave_links, slave_node) { - pm_genpd_sync_poweron(link->master, timed); + genpd_sync_poweron(link->master); genpd_sd_counter_inc(link->master); } - genpd_power_on(genpd, timed); + genpd_power_on(genpd, false); genpd->status = GPD_STATE_ACTIVE; } @@ -784,7 +784,7 @@ static int pm_genpd_suspend_noirq(struct device *dev) * the same PM domain, so it is not necessary to use locking here. */ genpd->suspended_count++; - pm_genpd_sync_poweroff(genpd, true); + genpd_sync_poweroff(genpd); return 0; } @@ -814,7 +814,7 @@ static int pm_genpd_resume_noirq(struct device *dev) * guaranteed that this function will never run twice in parallel for * the same PM domain, so it is not necessary to use locking here. */ - pm_genpd_sync_poweron(genpd, true); + genpd_sync_poweron(genpd); genpd->suspended_count--; if (genpd->dev_ops.stop && genpd->dev_ops.start) @@ -902,12 +902,12 @@ static int pm_genpd_restore_noirq(struct device *dev) if (genpd->suspended_count++ == 0) /* * The boot kernel might put the domain into arbitrary state, - * so make it appear as powered off to pm_genpd_sync_poweron(), + * so make it appear as powered off to genpd_sync_poweron(), * so that it tries to power it on in case it was really off. */ genpd->status = GPD_STATE_POWER_OFF; - pm_genpd_sync_poweron(genpd, true); + genpd_sync_poweron(genpd); if (genpd->dev_ops.stop && genpd->dev_ops.start) ret = pm_runtime_force_resume(dev); @@ -962,9 +962,9 @@ static void genpd_syscore_switch(struct device *dev, bool suspend) if (suspend) { genpd->suspended_count++; - pm_genpd_sync_poweroff(genpd, false); + genpd_sync_poweroff(genpd); } else { - pm_genpd_sync_poweron(genpd, false); + genpd_sync_poweron(genpd); genpd->suspended_count--; } } @@ -1056,14 +1056,8 @@ static void genpd_free_dev_data(struct device *dev, dev_pm_put_subsys_data(dev); } -/** - * __pm_genpd_add_device - Add a device to an I/O PM domain. - * @genpd: PM domain to add the device to. - * @dev: Device to be added. - * @td: Set of PM QoS timing parameters to attach to the device. - */ -int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, - struct gpd_timing_data *td) +static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, + struct gpd_timing_data *td) { struct generic_pm_domain_data *gpd_data; int ret = 0; @@ -1103,15 +1097,28 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, return ret; } -EXPORT_SYMBOL_GPL(__pm_genpd_add_device); /** - * pm_genpd_remove_device - Remove a device from an I/O PM domain. - * @genpd: PM domain to remove the device from. - * @dev: Device to be removed. + * __pm_genpd_add_device - Add a device to an I/O PM domain. + * @genpd: PM domain to add the device to. + * @dev: Device to be added. + * @td: Set of PM QoS timing parameters to attach to the device. */ -int pm_genpd_remove_device(struct generic_pm_domain *genpd, - struct device *dev) +int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, + struct gpd_timing_data *td) +{ + int ret; + + mutex_lock(&gpd_list_lock); + ret = genpd_add_device(genpd, dev, td); + mutex_unlock(&gpd_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(__pm_genpd_add_device); + +static int genpd_remove_device(struct generic_pm_domain *genpd, + struct device *dev) { struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; @@ -1119,10 +1126,6 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, dev_dbg(dev, "%s()\n", __func__); - if (!genpd || genpd != pm_genpd_lookup_dev(dev)) - return -EINVAL; - - /* The above validation also means we have existing domain_data. */ pdd = dev->power.subsys_data->domain_data; gpd_data = to_gpd_data(pdd); dev_pm_qos_remove_notifier(dev, &gpd_data->nb); @@ -1154,15 +1157,24 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, return ret; } -EXPORT_SYMBOL_GPL(pm_genpd_remove_device); /** - * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. - * @genpd: Master PM domain to add the subdomain to. - * @subdomain: Subdomain to be added. + * pm_genpd_remove_device - Remove a device from an I/O PM domain. + * @genpd: PM domain to remove the device from. + * @dev: Device to be removed. */ -int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *subdomain) +int pm_genpd_remove_device(struct generic_pm_domain *genpd, + struct device *dev) +{ + if (!genpd || genpd != genpd_lookup_dev(dev)) + return -EINVAL; + + return genpd_remove_device(genpd, dev); +} +EXPORT_SYMBOL_GPL(pm_genpd_remove_device); + +static int genpd_add_subdomain(struct generic_pm_domain *genpd, + struct generic_pm_domain *subdomain) { struct gpd_link *link, *itr; int ret = 0; @@ -1205,6 +1217,23 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, kfree(link); return ret; } + +/** + * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. + * @genpd: Master PM domain to add the subdomain to. + * @subdomain: Subdomain to be added. + */ +int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, + struct generic_pm_domain *subdomain) +{ + int ret; + + mutex_lock(&gpd_list_lock); + ret = genpd_add_subdomain(genpd, subdomain); + mutex_unlock(&gpd_list_lock); + + return ret; +} EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); /** @@ -1278,27 +1307,17 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->device_count = 0; genpd->max_off_time_ns = -1; genpd->max_off_time_changed = true; + genpd->provider = NULL; + genpd->has_provider = false; genpd->domain.ops.runtime_suspend = genpd_runtime_suspend; genpd->domain.ops.runtime_resume = genpd_runtime_resume; genpd->domain.ops.prepare = pm_genpd_prepare; - genpd->domain.ops.suspend = pm_generic_suspend; - genpd->domain.ops.suspend_late = pm_generic_suspend_late; genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq; genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq; - genpd->domain.ops.resume_early = pm_generic_resume_early; - genpd->domain.ops.resume = pm_generic_resume; - genpd->domain.ops.freeze = pm_generic_freeze; - genpd->domain.ops.freeze_late = pm_generic_freeze_late; genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq; genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq; - genpd->domain.ops.thaw_early = pm_generic_thaw_early; - genpd->domain.ops.thaw = pm_generic_thaw; - genpd->domain.ops.poweroff = pm_generic_poweroff; - genpd->domain.ops.poweroff_late = pm_generic_poweroff_late; genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq; genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq; - genpd->domain.ops.restore_early = pm_generic_restore_early; - genpd->domain.ops.restore = pm_generic_restore; genpd->domain.ops.complete = pm_genpd_complete; if (genpd->flags & GENPD_FLAG_PM_CLK) { @@ -1328,7 +1347,71 @@ int pm_genpd_init(struct generic_pm_domain *genpd, } EXPORT_SYMBOL_GPL(pm_genpd_init); +static int genpd_remove(struct generic_pm_domain *genpd) +{ + struct gpd_link *l, *link; + + if (IS_ERR_OR_NULL(genpd)) + return -EINVAL; + + mutex_lock(&genpd->lock); + + if (genpd->has_provider) { + mutex_unlock(&genpd->lock); + pr_err("Provider present, unable to remove %s\n", genpd->name); + return -EBUSY; + } + + if (!list_empty(&genpd->master_links) || genpd->device_count) { + mutex_unlock(&genpd->lock); + pr_err("%s: unable to remove %s\n", __func__, genpd->name); + return -EBUSY; + } + + list_for_each_entry_safe(link, l, &genpd->slave_links, slave_node) { + list_del(&link->master_node); + list_del(&link->slave_node); + kfree(link); + } + + list_del(&genpd->gpd_list_node); + mutex_unlock(&genpd->lock); + cancel_work_sync(&genpd->power_off_work); + pr_debug("%s: removed %s\n", __func__, genpd->name); + + return 0; +} + +/** + * pm_genpd_remove - Remove a generic I/O PM domain + * @genpd: Pointer to PM domain that is to be removed. + * + * To remove the PM domain, this function: + * - Removes the PM domain as a subdomain to any parent domains, + * if it was added. + * - Removes the PM domain from the list of registered PM domains. + * + * The PM domain will only be removed, if the associated provider has + * been removed, it is not a parent to any other PM domain and has no + * devices associated with it. + */ +int pm_genpd_remove(struct generic_pm_domain *genpd) +{ + int ret; + + mutex_lock(&gpd_list_lock); + ret = genpd_remove(genpd); + mutex_unlock(&gpd_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pm_genpd_remove); + #ifdef CONFIG_PM_GENERIC_DOMAINS_OF + +typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, + void *data); + /* * Device Tree based PM domain providers. * @@ -1340,8 +1423,8 @@ EXPORT_SYMBOL_GPL(pm_genpd_init); * maps a PM domain specifier retrieved from the device tree to a PM domain. * * Two simple mapping functions have been provided for convenience: - * - __of_genpd_xlate_simple() for 1:1 device tree node to PM domain mapping. - * - __of_genpd_xlate_onecell() for mapping of multiple PM domains per node by + * - genpd_xlate_simple() for 1:1 device tree node to PM domain mapping. + * - genpd_xlate_onecell() for mapping of multiple PM domains per node by * index. */ @@ -1366,7 +1449,7 @@ static LIST_HEAD(of_genpd_providers); static DEFINE_MUTEX(of_genpd_mutex); /** - * __of_genpd_xlate_simple() - Xlate function for direct node-domain mapping + * genpd_xlate_simple() - Xlate function for direct node-domain mapping * @genpdspec: OF phandle args to map into a PM domain * @data: xlate function private data - pointer to struct generic_pm_domain * @@ -1374,7 +1457,7 @@ static DEFINE_MUTEX(of_genpd_mutex); * have their own device tree nodes. The private data of xlate function needs * to be a valid pointer to struct generic_pm_domain. */ -struct generic_pm_domain *__of_genpd_xlate_simple( +static struct generic_pm_domain *genpd_xlate_simple( struct of_phandle_args *genpdspec, void *data) { @@ -1382,10 +1465,9 @@ struct generic_pm_domain *__of_genpd_xlate_simple( return ERR_PTR(-EINVAL); return data; } -EXPORT_SYMBOL_GPL(__of_genpd_xlate_simple); /** - * __of_genpd_xlate_onecell() - Xlate function using a single index. + * genpd_xlate_onecell() - Xlate function using a single index. * @genpdspec: OF phandle args to map into a PM domain * @data: xlate function private data - pointer to struct genpd_onecell_data * @@ -1394,7 +1476,7 @@ EXPORT_SYMBOL_GPL(__of_genpd_xlate_simple); * A single cell is used as an index into an array of PM domains specified in * the genpd_onecell_data struct when registering the provider. */ -struct generic_pm_domain *__of_genpd_xlate_onecell( +static struct generic_pm_domain *genpd_xlate_onecell( struct of_phandle_args *genpdspec, void *data) { @@ -1414,16 +1496,15 @@ struct generic_pm_domain *__of_genpd_xlate_onecell( return genpd_data->domains[idx]; } -EXPORT_SYMBOL_GPL(__of_genpd_xlate_onecell); /** - * __of_genpd_add_provider() - Register a PM domain provider for a node + * genpd_add_provider() - Register a PM domain provider for a node * @np: Device node pointer associated with the PM domain provider. * @xlate: Callback for decoding PM domain from phandle arguments. * @data: Context pointer for @xlate callback. */ -int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, - void *data) +static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, + void *data) { struct of_genpd_provider *cp; @@ -1442,7 +1523,83 @@ int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, return 0; } -EXPORT_SYMBOL_GPL(__of_genpd_add_provider); + +/** + * of_genpd_add_provider_simple() - Register a simple PM domain provider + * @np: Device node pointer associated with the PM domain provider. + * @genpd: Pointer to PM domain associated with the PM domain provider. + */ +int of_genpd_add_provider_simple(struct device_node *np, + struct generic_pm_domain *genpd) +{ + int ret = -EINVAL; + + if (!np || !genpd) + return -EINVAL; + + mutex_lock(&gpd_list_lock); + + if (pm_genpd_present(genpd)) + ret = genpd_add_provider(np, genpd_xlate_simple, genpd); + + if (!ret) { + genpd->provider = &np->fwnode; + genpd->has_provider = true; + } + + mutex_unlock(&gpd_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(of_genpd_add_provider_simple); + +/** + * of_genpd_add_provider_onecell() - Register a onecell PM domain provider + * @np: Device node pointer associated with the PM domain provider. + * @data: Pointer to the data associated with the PM domain provider. + */ +int of_genpd_add_provider_onecell(struct device_node *np, + struct genpd_onecell_data *data) +{ + unsigned int i; + int ret = -EINVAL; + + if (!np || !data) + return -EINVAL; + + mutex_lock(&gpd_list_lock); + + for (i = 0; i < data->num_domains; i++) { + if (!data->domains[i]) + continue; + if (!pm_genpd_present(data->domains[i])) + goto error; + + data->domains[i]->provider = &np->fwnode; + data->domains[i]->has_provider = true; + } + + ret = genpd_add_provider(np, genpd_xlate_onecell, data); + if (ret < 0) + goto error; + + mutex_unlock(&gpd_list_lock); + + return 0; + +error: + while (i--) { + if (!data->domains[i]) + continue; + data->domains[i]->provider = NULL; + data->domains[i]->has_provider = false; + } + + mutex_unlock(&gpd_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell); /** * of_genpd_del_provider() - Remove a previously registered PM domain provider @@ -1451,10 +1608,21 @@ EXPORT_SYMBOL_GPL(__of_genpd_add_provider); void of_genpd_del_provider(struct device_node *np) { struct of_genpd_provider *cp; + struct generic_pm_domain *gpd; + mutex_lock(&gpd_list_lock); mutex_lock(&of_genpd_mutex); list_for_each_entry(cp, &of_genpd_providers, link) { if (cp->node == np) { + /* + * For each PM domain associated with the + * provider, set the 'has_provider' to false + * so that the PM domain can be safely removed. + */ + list_for_each_entry(gpd, &gpd_list, gpd_list_node) + if (gpd->provider == &np->fwnode) + gpd->has_provider = false; + list_del(&cp->link); of_node_put(cp->node); kfree(cp); @@ -1462,11 +1630,12 @@ void of_genpd_del_provider(struct device_node *np) } } mutex_unlock(&of_genpd_mutex); + mutex_unlock(&gpd_list_lock); } EXPORT_SYMBOL_GPL(of_genpd_del_provider); /** - * of_genpd_get_from_provider() - Look-up PM domain + * genpd_get_from_provider() - Look-up PM domain * @genpdspec: OF phandle args to use for look-up * * Looks for a PM domain provider under the node specified by @genpdspec and if @@ -1476,7 +1645,7 @@ EXPORT_SYMBOL_GPL(of_genpd_del_provider); * Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR() * on failure. */ -struct generic_pm_domain *of_genpd_get_from_provider( +static struct generic_pm_domain *genpd_get_from_provider( struct of_phandle_args *genpdspec) { struct generic_pm_domain *genpd = ERR_PTR(-ENOENT); @@ -1499,7 +1668,109 @@ struct generic_pm_domain *of_genpd_get_from_provider( return genpd; } -EXPORT_SYMBOL_GPL(of_genpd_get_from_provider); + +/** + * of_genpd_add_device() - Add a device to an I/O PM domain + * @genpdspec: OF phandle args to use for look-up PM domain + * @dev: Device to be added. + * + * Looks-up an I/O PM domain based upon phandle args provided and adds + * the device to the PM domain. Returns a negative error code on failure. + */ +int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev) +{ + struct generic_pm_domain *genpd; + int ret; + + mutex_lock(&gpd_list_lock); + + genpd = genpd_get_from_provider(genpdspec); + if (IS_ERR(genpd)) { + ret = PTR_ERR(genpd); + goto out; + } + + ret = genpd_add_device(genpd, dev, NULL); + +out: + mutex_unlock(&gpd_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(of_genpd_add_device); + +/** + * of_genpd_add_subdomain - Add a subdomain to an I/O PM domain. + * @parent_spec: OF phandle args to use for parent PM domain look-up + * @subdomain_spec: OF phandle args to use for subdomain look-up + * + * Looks-up a parent PM domain and subdomain based upon phandle args + * provided and adds the subdomain to the parent PM domain. Returns a + * negative error code on failure. + */ +int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, + struct of_phandle_args *subdomain_spec) +{ + struct generic_pm_domain *parent, *subdomain; + int ret; + + mutex_lock(&gpd_list_lock); + + parent = genpd_get_from_provider(parent_spec); + if (IS_ERR(parent)) { + ret = PTR_ERR(parent); + goto out; + } + + subdomain = genpd_get_from_provider(subdomain_spec); + if (IS_ERR(subdomain)) { + ret = PTR_ERR(subdomain); + goto out; + } + + ret = genpd_add_subdomain(parent, subdomain); + +out: + mutex_unlock(&gpd_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(of_genpd_add_subdomain); + +/** + * of_genpd_remove_last - Remove the last PM domain registered for a provider + * @provider: Pointer to device structure associated with provider + * + * Find the last PM domain that was added by a particular provider and + * remove this PM domain from the list of PM domains. The provider is + * identified by the 'provider' device structure that is passed. The PM + * domain will only be removed, if the provider associated with domain + * has been removed. + * + * Returns a valid pointer to struct generic_pm_domain on success or + * ERR_PTR() on failure. + */ +struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) +{ + struct generic_pm_domain *gpd, *genpd = ERR_PTR(-ENOENT); + int ret; + + if (IS_ERR_OR_NULL(np)) + return ERR_PTR(-EINVAL); + + mutex_lock(&gpd_list_lock); + list_for_each_entry(gpd, &gpd_list, gpd_list_node) { + if (gpd->provider == &np->fwnode) { + ret = genpd_remove(gpd); + genpd = ret ? ERR_PTR(ret) : gpd; + break; + } + } + mutex_unlock(&gpd_list_lock); + + return genpd; +} +EXPORT_SYMBOL_GPL(of_genpd_remove_last); /** * genpd_dev_pm_detach - Detach a device from its PM domain. @@ -1515,14 +1786,14 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) unsigned int i; int ret = 0; - pd = pm_genpd_lookup_dev(dev); - if (!pd) + pd = dev_to_genpd(dev); + if (IS_ERR(pd)) return; dev_dbg(dev, "removing from PM domain %s\n", pd->name); for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { - ret = pm_genpd_remove_device(pd, dev); + ret = genpd_remove_device(pd, dev); if (ret != -EAGAIN) break; @@ -1596,9 +1867,11 @@ int genpd_dev_pm_attach(struct device *dev) return -ENOENT; } - pd = of_genpd_get_from_provider(&pd_args); + mutex_lock(&gpd_list_lock); + pd = genpd_get_from_provider(&pd_args); of_node_put(pd_args.np); if (IS_ERR(pd)) { + mutex_unlock(&gpd_list_lock); dev_dbg(dev, "%s() failed to find PM domain: %ld\n", __func__, PTR_ERR(pd)); return -EPROBE_DEFER; @@ -1607,13 +1880,14 @@ int genpd_dev_pm_attach(struct device *dev) dev_dbg(dev, "adding to PM domain %s\n", pd->name); for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { - ret = pm_genpd_add_device(pd, dev); + ret = genpd_add_device(pd, dev, NULL); if (ret != -EAGAIN) break; mdelay(i); cond_resched(); } + mutex_unlock(&gpd_list_lock); if (ret < 0) { dev_err(dev, "failed to add to PM domain %s: %d", @@ -1636,7 +1910,7 @@ EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); /*** debugfs support ***/ -#ifdef CONFIG_PM_ADVANCED_DEBUG +#ifdef CONFIG_DEBUG_FS #include <linux/pm.h> #include <linux/device.h> #include <linux/debugfs.h> @@ -1784,4 +2058,4 @@ static void __exit pm_genpd_debug_exit(void) debugfs_remove_recursive(pm_genpd_debugfs_dir); } __exitcall(pm_genpd_debug_exit); -#endif /* CONFIG_PM_ADVANCED_DEBUG */ +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index df0c70963d9e..4c7c6da7a989 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -584,7 +584,6 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) struct clk *clk; unsigned long freq, old_freq; unsigned long u_volt, u_volt_min, u_volt_max; - unsigned long ou_volt, ou_volt_min, ou_volt_max; int ret; if (unlikely(!target_freq)) { @@ -620,11 +619,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) } old_opp = _find_freq_ceil(opp_table, &old_freq); - if (!IS_ERR(old_opp)) { - ou_volt = old_opp->u_volt; - ou_volt_min = old_opp->u_volt_min; - ou_volt_max = old_opp->u_volt_max; - } else { + if (IS_ERR(old_opp)) { dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n", __func__, old_freq, PTR_ERR(old_opp)); } @@ -683,7 +678,8 @@ restore_freq: restore_voltage: /* This shouldn't harm even if the voltages weren't updated earlier */ if (!IS_ERR(old_opp)) - _set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max); + _set_opp_voltage(dev, reg, old_opp->u_volt, + old_opp->u_volt_min, old_opp->u_volt_max); return ret; } diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c index 1dfd3dd92624..5552211e6fcd 100644 --- a/drivers/base/power/opp/of.c +++ b/drivers/base/power/opp/of.c @@ -71,8 +71,18 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, u32 version; int ret; - if (!opp_table->supported_hw) - return true; + if (!opp_table->supported_hw) { + /* + * In the case that no supported_hw has been set by the + * platform but there is an opp-supported-hw value set for + * an OPP then the OPP should not be enabled as there is + * no way to see if the hardware supports it. + */ + if (of_find_property(np, "opp-supported-hw", NULL)) + return false; + else + return true; + } while (count--) { ret = of_property_read_u32_index(np, "opp-supported-hw", count, diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 567788664723..8a753fd5b79d 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -305,6 +305,16 @@ config ARM_ARCH_TIMER_EVTSTREAM This must be disabled for hardware validation purposes to detect any hardware anomalies of missing events. +config FSL_ERRATUM_A008585 + bool "Workaround for Freescale/NXP Erratum A-008585" + default y + depends on ARM_ARCH_TIMER && ARM64 + help + This option enables a workaround for Freescale/NXP Erratum + A-008585 ("ARM generic timer may contain an erroneous + value"). The workaround will only be active if the + fsl,erratum-a008585 property is found in the timer node. + config ARM_GLOBAL_TIMER bool "Support for the ARM global timer" if COMPILE_TEST select CLKSRC_OF if OF diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 57700541f951..73c487da6d2a 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -94,6 +94,43 @@ early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg); * Architected system timer support. */ +#ifdef CONFIG_FSL_ERRATUM_A008585 +DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); +EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); + +static int fsl_a008585_enable = -1; + +static int __init early_fsl_a008585_cfg(char *buf) +{ + int ret; + bool val; + + ret = strtobool(buf, &val); + if (ret) + return ret; + + fsl_a008585_enable = val; + return 0; +} +early_param("clocksource.arm_arch_timer.fsl-a008585", early_fsl_a008585_cfg); + +u32 __fsl_a008585_read_cntp_tval_el0(void) +{ + return __fsl_a008585_read_reg(cntp_tval_el0); +} + +u32 __fsl_a008585_read_cntv_tval_el0(void) +{ + return __fsl_a008585_read_reg(cntv_tval_el0); +} + +u64 __fsl_a008585_read_cntvct_el0(void) +{ + return __fsl_a008585_read_reg(cntvct_el0); +} +EXPORT_SYMBOL(__fsl_a008585_read_cntvct_el0); +#endif /* CONFIG_FSL_ERRATUM_A008585 */ + static __always_inline void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, struct clock_event_device *clk) @@ -243,6 +280,40 @@ static __always_inline void set_next_event(const int access, unsigned long evt, arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } +#ifdef CONFIG_FSL_ERRATUM_A008585 +static __always_inline void fsl_a008585_set_next_event(const int access, + unsigned long evt, struct clock_event_device *clk) +{ + unsigned long ctrl; + u64 cval = evt + arch_counter_get_cntvct(); + + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); + ctrl |= ARCH_TIMER_CTRL_ENABLE; + ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; + + if (access == ARCH_TIMER_PHYS_ACCESS) + write_sysreg(cval, cntp_cval_el0); + else if (access == ARCH_TIMER_VIRT_ACCESS) + write_sysreg(cval, cntv_cval_el0); + + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); +} + +static int fsl_a008585_set_next_event_virt(unsigned long evt, + struct clock_event_device *clk) +{ + fsl_a008585_set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); + return 0; +} + +static int fsl_a008585_set_next_event_phys(unsigned long evt, + struct clock_event_device *clk) +{ + fsl_a008585_set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); + return 0; +} +#endif /* CONFIG_FSL_ERRATUM_A008585 */ + static int arch_timer_set_next_event_virt(unsigned long evt, struct clock_event_device *clk) { @@ -271,6 +342,19 @@ static int arch_timer_set_next_event_phys_mem(unsigned long evt, return 0; } +static void fsl_a008585_set_sne(struct clock_event_device *clk) +{ +#ifdef CONFIG_FSL_ERRATUM_A008585 + if (!static_branch_unlikely(&arch_timer_read_ool_enabled)) + return; + + if (arch_timer_uses_ppi == VIRT_PPI) + clk->set_next_event = fsl_a008585_set_next_event_virt; + else + clk->set_next_event = fsl_a008585_set_next_event_phys; +#endif +} + static void __arch_timer_setup(unsigned type, struct clock_event_device *clk) { @@ -299,6 +383,8 @@ static void __arch_timer_setup(unsigned type, default: BUG(); } + + fsl_a008585_set_sne(clk); } else { clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; @@ -515,15 +601,19 @@ static void __init arch_counter_register(unsigned type) arch_timer_read_counter = arch_counter_get_cntvct; else arch_timer_read_counter = arch_counter_get_cntpct; - } else { - arch_timer_read_counter = arch_counter_get_cntvct_mem; - /* If the clocksource name is "arch_sys_counter" the - * VDSO will attempt to read the CP15-based counter. - * Ensure this does not happen when CP15-based - * counter is not available. + clocksource_counter.archdata.vdso_direct = true; + +#ifdef CONFIG_FSL_ERRATUM_A008585 + /* + * Don't use the vdso fastpath if errata require using + * the out-of-line counter accessor. */ - clocksource_counter.name = "arch_mem_counter"; + if (static_branch_unlikely(&arch_timer_read_ool_enabled)) + clocksource_counter.archdata.vdso_direct = false; +#endif + } else { + arch_timer_read_counter = arch_counter_get_cntvct_mem; } start_count = arch_timer_read_counter(); @@ -800,6 +890,15 @@ static int __init arch_timer_of_init(struct device_node *np) arch_timer_c3stop = !of_property_read_bool(np, "always-on"); +#ifdef CONFIG_FSL_ERRATUM_A008585 + if (fsl_a008585_enable < 0) + fsl_a008585_enable = of_property_read_bool(np, "fsl,erratum-a008585"); + if (fsl_a008585_enable) { + static_branch_enable(&arch_timer_read_ool_enabled); + pr_info("Enabling workaround for FSL erratum A-008585\n"); + } +#endif + /* * If we cannot rely on firmware initializing the timer registers then * we should use the physical timers instead. diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 74919aa81dcb..d8b164a7c4e5 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. config CPU_FREQ_GOV_SCHEDUTIL - tristate "'schedutil' cpufreq policy governor" + bool "'schedutil' cpufreq policy governor" depends on CPU_FREQ && SMP select CPU_FREQ_GOV_ATTR_SET select IRQ_WORK @@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL frequency tipping point is at utilization/capacity equal to 80% in both cases. - To compile this driver as a module, choose M here: the module will - be called cpufreq_schedutil. - If in doubt, say N. comment "CPU frequency scaling drivers" @@ -225,7 +222,7 @@ config CPUFREQ_DT help This adds a generic DT based cpufreq driver for frequency management. It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) - systems which share clock and voltage across all CPUs. + systems. If in doubt, say N. diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 6588ec567d93..1b2f28f69a81 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -19,10 +19,19 @@ #include <linux/delay.h> #include <linux/cpu.h> #include <linux/cpufreq.h> +#include <linux/dmi.h> #include <linux/vmalloc.h> +#include <asm/unaligned.h> + #include <acpi/cppc_acpi.h> +/* Minimum struct length needed for the DMI processor entry we want */ +#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48 + +/* Offest in the DMI processor structure for the max frequency */ +#define DMI_PROCESSOR_MAX_SPEED 0x14 + /* * These structs contain information parsed from per CPU * ACPI _CPC structures. @@ -32,6 +41,39 @@ */ static struct cppc_cpudata **all_cpu_data; +/* Capture the max KHz from DMI */ +static u64 cppc_dmi_max_khz; + +/* Callback function used to retrieve the max frequency from DMI */ +static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) +{ + const u8 *dmi_data = (const u8 *)dm; + u16 *mhz = (u16 *)private; + + if (dm->type == DMI_ENTRY_PROCESSOR && + dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { + u16 val = (u16)get_unaligned((const u16 *) + (dmi_data + DMI_PROCESSOR_MAX_SPEED)); + *mhz = val > *mhz ? val : *mhz; + } +} + +/* Look up the max frequency in DMI */ +static u64 cppc_get_dmi_max_khz(void) +{ + u16 mhz = 0; + + dmi_walk(cppc_find_dmi_mhz, &mhz); + + /* + * Real stupid fallback value, just in case there is no + * actual value set. + */ + mhz = mhz ? mhz : 1; + + return (1000 * mhz); +} + static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -42,7 +84,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, cpu = all_cpu_data[policy->cpu]; - cpu->perf_ctrls.desired_perf = target_freq; + cpu->perf_ctrls.desired_perf = (u64)target_freq * policy->max / cppc_dmi_max_khz; freqs.old = policy->cur; freqs.new = target_freq; @@ -94,8 +136,10 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) return ret; } - policy->min = cpu->perf_caps.lowest_perf; - policy->max = cpu->perf_caps.highest_perf; + cppc_dmi_max_khz = cppc_get_dmi_max_khz(); + + policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf; + policy->max = cppc_dmi_max_khz; policy->cpuinfo.min_freq = policy->min; policy->cpuinfo.max_freq = policy->max; policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num); @@ -113,7 +157,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu->cur_policy = policy; /* Set policy->cur to max now. The governors will adjust later. */ - policy->cur = cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf; + policy->cur = cppc_dmi_max_khz; + cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf; ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); if (ret) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2ee40fd360ca..71267626456b 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -11,6 +11,8 @@ #include <linux/of.h> #include <linux/platform_device.h> +#include "cpufreq-dt.h" + static const struct of_device_id machines[] __initconst = { { .compatible = "allwinner,sun4i-a10", }, { .compatible = "allwinner,sun5i-a10s", }, @@ -40,6 +42,7 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "samsung,exynos5250", }, #ifndef CONFIG_BL_SWITCHER { .compatible = "samsung,exynos5420", }, + { .compatible = "samsung,exynos5433", }, { .compatible = "samsung,exynos5800", }, #endif @@ -51,6 +54,7 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "renesas,r8a7779", }, { .compatible = "renesas,r8a7790", }, { .compatible = "renesas,r8a7791", }, + { .compatible = "renesas,r8a7792", }, { .compatible = "renesas,r8a7793", }, { .compatible = "renesas,r8a7794", }, { .compatible = "renesas,sh73a0", }, @@ -68,6 +72,8 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "sigma,tango4" }, + { .compatible = "ti,am33xx", }, + { .compatible = "ti,dra7", }, { .compatible = "ti,omap2", }, { .compatible = "ti,omap3", }, { .compatible = "ti,omap4", }, @@ -91,7 +97,8 @@ static int __init cpufreq_dt_platdev_init(void) if (!match) return -ENODEV; - return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1, - NULL, 0)); + return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt", + -1, match->data, + sizeof(struct cpufreq_dt_platform_data))); } device_initcall(cpufreq_dt_platdev_init); diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 3957de801ae8..5c07ae05d69a 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -25,6 +25,8 @@ #include <linux/slab.h> #include <linux/thermal.h> +#include "cpufreq-dt.h" + struct private_data { struct device *cpu_dev; struct thermal_cooling_device *cdev; @@ -353,6 +355,7 @@ static struct cpufreq_driver dt_cpufreq_driver = { static int dt_cpufreq_probe(struct platform_device *pdev) { + struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev); int ret; /* @@ -366,7 +369,8 @@ static int dt_cpufreq_probe(struct platform_device *pdev) if (ret) return ret; - dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); + if (data && data->have_governor_per_policy) + dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY; ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h new file mode 100644 index 000000000000..54d774e46c43 --- /dev/null +++ b/drivers/cpufreq/cpufreq-dt.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2016 Linaro + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CPUFREQ_DT_H__ +#define __CPUFREQ_DT_H__ + +#include <linux/types.h> + +struct cpufreq_dt_platform_data { + bool have_governor_per_policy; +}; + +#endif /* __CPUFREQ_DT_H__ */ diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3dd4884c6f9e..3a64136bf21b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -916,58 +916,18 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) +static int add_cpu_dev_symlink(struct cpufreq_policy *policy, + struct device *dev) { - struct device *cpu_dev; - - pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu); - - if (!policy) - return 0; - - cpu_dev = get_cpu_device(cpu); - if (WARN_ON(!cpu_dev)) - return 0; - - return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq"); -} - -static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) -{ - struct device *cpu_dev; - - pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu); - - cpu_dev = get_cpu_device(cpu); - if (WARN_ON(!cpu_dev)) - return; - - sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); + dev_dbg(dev, "%s: Adding symlink\n", __func__); + return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); } -/* Add/remove symlinks for all related CPUs */ -static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) +static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, + struct device *dev) { - unsigned int j; - int ret = 0; - - /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu(j, policy->real_cpus) { - ret = add_cpu_dev_symlink(policy, j); - if (ret) - break; - } - - return ret; -} - -static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) -{ - unsigned int j; - - /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu(j, policy->real_cpus) - remove_cpu_dev_symlink(policy, j); + dev_dbg(dev, "%s: Removing symlink\n", __func__); + sysfs_remove_link(&dev->kobj, "cpufreq"); } static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) @@ -999,7 +959,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } - return cpufreq_add_dev_symlink(policy); + return 0; } __weak struct cpufreq_governor *cpufreq_default_governor(void) @@ -1073,13 +1033,9 @@ static void handle_update(struct work_struct *work) static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { - struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; int ret; - if (WARN_ON(!dev)) - return NULL; - policy = kzalloc(sizeof(*policy), GFP_KERNEL); if (!policy) return NULL; @@ -1133,7 +1089,6 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify) down_write(&policy->rwsem); cpufreq_stats_free_table(policy); - cpufreq_remove_dev_symlink(policy); kobj = &policy->kobj; cmp = &policy->kobj_unregister; up_write(&policy->rwsem); @@ -1215,8 +1170,8 @@ static int cpufreq_online(unsigned int cpu) if (new_policy) { /* related_cpus should at least include policy->cpus. */ cpumask_copy(policy->related_cpus, policy->cpus); - /* Remember CPUs present at the policy creation time. */ - cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + /* Clear mask of registered CPUs */ + cpumask_clear(policy->real_cpus); } /* @@ -1331,6 +1286,8 @@ out_free_policy: return ret; } +static void cpufreq_offline(unsigned int cpu); + /** * cpufreq_add_dev - the cpufreq interface for a CPU device. * @dev: CPU device. @@ -1340,22 +1297,28 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { struct cpufreq_policy *policy; unsigned cpu = dev->id; + int ret; dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu); - if (cpu_online(cpu)) - return cpufreq_online(cpu); + if (cpu_online(cpu)) { + ret = cpufreq_online(cpu); + if (ret) + return ret; + } - /* - * A hotplug notifier will follow and we will handle it as CPU online - * then. For now, just create the sysfs link, unless there is no policy - * or the link is already present. - */ + /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus)) return 0; - return add_cpu_dev_symlink(policy, cpu); + ret = add_cpu_dev_symlink(policy, dev); + if (ret) { + cpumask_clear_cpu(cpu, policy->real_cpus); + cpufreq_offline(cpu); + } + + return ret; } static void cpufreq_offline(unsigned int cpu) @@ -1436,7 +1399,7 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) cpufreq_offline(cpu); cpumask_clear_cpu(cpu, policy->real_cpus); - remove_cpu_dev_symlink(policy, cpu); + remove_cpu_dev_symlink(policy, dev); if (cpumask_empty(policy->real_cpus)) cpufreq_policy_free(policy, true); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e415349ab31b..642dd0f183a8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work) } static void dbs_update_util_handler(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be9eade147f2..806f2039571e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -181,6 +181,8 @@ struct _pid { * @cpu: CPU number for this instance data * @update_util: CPUFreq utility callback information * @update_util_set: CPUFreq utility callback is set + * @iowait_boost: iowait-related boost fraction + * @last_update: Time of the last update. * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @pid: Stores PID parameters for this CPU @@ -206,6 +208,7 @@ struct cpudata { struct vid_data vid; struct _pid pid; + u64 last_update; u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; @@ -216,6 +219,7 @@ struct cpudata { struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; #endif + unsigned int iowait_boost; }; static struct cpudata **all_cpu_data; @@ -229,6 +233,7 @@ static struct cpudata **all_cpu_data; * @p_gain_pct: PID proportional gain * @i_gain_pct: PID integral gain * @d_gain_pct: PID derivative gain + * @boost_iowait: Whether or not to use iowait boosting. * * Stores per CPU model static PID configuration data. */ @@ -240,6 +245,7 @@ struct pstate_adjust_policy { int p_gain_pct; int d_gain_pct; int i_gain_pct; + bool boost_iowait; }; /** @@ -1029,7 +1035,7 @@ static struct cpu_defaults core_params = { }, }; -static struct cpu_defaults silvermont_params = { +static const struct cpu_defaults silvermont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1037,6 +1043,7 @@ static struct cpu_defaults silvermont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1050,7 +1057,7 @@ static struct cpu_defaults silvermont_params = { }, }; -static struct cpu_defaults airmont_params = { +static const struct cpu_defaults airmont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1058,6 +1065,7 @@ static struct cpu_defaults airmont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1071,7 +1079,7 @@ static struct cpu_defaults airmont_params = { }, }; -static struct cpu_defaults knl_params = { +static const struct cpu_defaults knl_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1091,7 +1099,7 @@ static struct cpu_defaults knl_params = { }, }; -static struct cpu_defaults bxt_params = { +static const struct cpu_defaults bxt_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1099,6 +1107,7 @@ static struct cpu_defaults bxt_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = core_get_max_pstate, @@ -1222,36 +1231,18 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu) static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; - u64 cummulative_iowait, delta_iowait_us; - u64 delta_iowait_mperf; - u64 mperf, now; - int32_t cpu_load; + int32_t busy_frac, boost; - cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); + busy_frac = div_fp(sample->mperf, sample->tsc); - /* - * Convert iowait time into number of IO cycles spent at max_freq. - * IO is considered as busy only for the cpu_load algorithm. For - * performance this is not needed since we always try to reach the - * maximum P-State, so we are already boosting the IOs. - */ - delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait; - delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling * - cpu->pstate.max_pstate, MSEC_PER_SEC); + boost = cpu->iowait_boost; + cpu->iowait_boost >>= 1; - mperf = cpu->sample.mperf + delta_iowait_mperf; - cpu->prev_cummulative_iowait = cummulative_iowait; + if (busy_frac < boost) + busy_frac = boost; - /* - * The load can be estimated as the ratio of the mperf counter - * running at a constant frequency during active periods - * (C0) and the time stamp counter running at the same frequency - * also during C-states. - */ - cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); - cpu->sample.busy_scaled = cpu_load; - - return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); + sample->busy_scaled = busy_frac * 100; + return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled); } static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) @@ -1325,15 +1316,29 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->mperf, sample->aperf, sample->tsc, - get_avg_frequency(cpu)); + get_avg_frequency(cpu), + fp_toint(cpu->iowait_boost * 100)); } static void intel_pstate_update_util(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); - u64 delta_ns = time - cpu->sample.time; + u64 delta_ns; + + if (pid_params.boost_iowait) { + if (flags & SCHED_CPUFREQ_IOWAIT) { + cpu->iowait_boost = int_tofp(1); + } else if (cpu->iowait_boost) { + /* Clear iowait_boost if the CPU may have been idle. */ + delta_ns = time - cpu->last_update; + if (delta_ns > TICK_NSEC) + cpu->iowait_boost = 0; + } + cpu->last_update = time; + } + delta_ns = time - cpu->sample.time; if ((s64)delta_ns >= pid_params.sample_rate_ns) { bool sample_taken = intel_pstate_sample(cpu, time); diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index be42f103db60..1b9bcd76c60e 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -123,7 +123,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); if (IS_ERR(priv.cpu_clk)) { - dev_err(priv.dev, "Unable to get cpuclk"); + dev_err(priv.dev, "Unable to get cpuclk\n"); return PTR_ERR(priv.cpu_clk); } @@ -132,7 +132,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); if (IS_ERR(priv.ddr_clk)) { - dev_err(priv.dev, "Unable to get ddrclk"); + dev_err(priv.dev, "Unable to get ddrclk\n"); err = PTR_ERR(priv.ddr_clk); goto out_cpu; } @@ -142,7 +142,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.powersave_clk = of_clk_get_by_name(np, "powersave"); if (IS_ERR(priv.powersave_clk)) { - dev_err(priv.dev, "Unable to get powersave"); + dev_err(priv.dev, "Unable to get powersave\n"); err = PTR_ERR(priv.powersave_clk); goto out_ddr; } @@ -155,7 +155,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) if (!err) return 0; - dev_err(priv.dev, "Failed to register cpufreq driver"); + dev_err(priv.dev, "Failed to register cpufreq driver\n"); clk_disable_unprepare(priv.powersave_clk); out_ddr: diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index e8a7bf57b31b..ea7a4e1b68c2 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -105,7 +105,6 @@ static int scpi_cpufreq_remove(struct platform_device *pdev) static struct platform_driver scpi_cpufreq_platdrv = { .driver = { .name = "scpi-cpufreq", - .owner = THIS_MODULE, }, .probe = scpi_cpufreq_probe, .remove = scpi_cpufreq_remove, diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 04042038ec4b..b366e6d830ea 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -163,7 +163,7 @@ static int sti_cpufreq_set_opp_info(void) reg_fields = sti_cpufreq_match(); if (!reg_fields) { - dev_err(dev, "This SoC doesn't support voltage scaling"); + dev_err(dev, "This SoC doesn't support voltage scaling\n"); return -ENODEV; } diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index 4ba3d3fe142f..f440d385ed34 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -121,6 +121,7 @@ static int __init arm_idle_init(void) dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { pr_err("Failed to allocate cpuidle device\n"); + ret = -ENOMEM; goto out_fail; } dev->cpu = cpu; diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index a5be56ec57f2..41254e702f1e 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -76,7 +76,7 @@ comment "DEVFREQ Drivers" config ARM_EXYNOS_BUS_DEVFREQ tristate "ARM EXYNOS Generic Memory Bus DEVFREQ Driver" - depends on ARCH_EXYNOS + depends on ARCH_EXYNOS || COMPILE_TEST select DEVFREQ_GOV_SIMPLE_ONDEMAND select DEVFREQ_GOV_PASSIVE select DEVFREQ_EVENT_EXYNOS_PPMU @@ -91,14 +91,26 @@ config ARM_EXYNOS_BUS_DEVFREQ This does not yet operate with optimal voltages. config ARM_TEGRA_DEVFREQ - tristate "Tegra DEVFREQ Driver" - depends on ARCH_TEGRA_124_SOC - select DEVFREQ_GOV_SIMPLE_ONDEMAND - select PM_OPP - help - This adds the DEVFREQ driver for the Tegra family of SoCs. - It reads ACTMON counters of memory controllers and adjusts the - operating frequencies and voltages with OPP support. + tristate "Tegra DEVFREQ Driver" + depends on ARCH_TEGRA_124_SOC + select DEVFREQ_GOV_SIMPLE_ONDEMAND + select PM_OPP + help + This adds the DEVFREQ driver for the Tegra family of SoCs. + It reads ACTMON counters of memory controllers and adjusts the + operating frequencies and voltages with OPP support. + +config ARM_RK3399_DMC_DEVFREQ + tristate "ARM RK3399 DMC DEVFREQ Driver" + depends on ARCH_ROCKCHIP + select DEVFREQ_EVENT_ROCKCHIP_DFI + select DEVFREQ_GOV_SIMPLE_ONDEMAND + select PM_DEVFREQ_EVENT + select PM_OPP + help + This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller). + It sets the frequency for the memory controller and reads the usage counts + from hardware. source "drivers/devfreq/event/Kconfig" diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 09f11d9d40d5..fbff40a508a4 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o # DEVFREQ Drivers obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o +obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra-devfreq.o # DEVFREQ Event Drivers diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig index eb6f74a2b6b9..0fdae8608961 100644 --- a/drivers/devfreq/event/Kconfig +++ b/drivers/devfreq/event/Kconfig @@ -15,7 +15,7 @@ if PM_DEVFREQ_EVENT config DEVFREQ_EVENT_EXYNOS_NOCP tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver" - depends on ARCH_EXYNOS + depends on ARCH_EXYNOS || COMPILE_TEST select PM_OPP help This add the devfreq-event driver for Exynos SoC. It provides NoC @@ -23,11 +23,18 @@ config DEVFREQ_EVENT_EXYNOS_NOCP config DEVFREQ_EVENT_EXYNOS_PPMU tristate "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver" - depends on ARCH_EXYNOS + depends on ARCH_EXYNOS || COMPILE_TEST select PM_OPP help This add the devfreq-event driver for Exynos SoC. It provides PPMU (Platform Performance Monitoring Unit) counters to estimate the utilization of each module. +config DEVFREQ_EVENT_ROCKCHIP_DFI + tristate "ROCKCHIP DFI DEVFREQ event Driver" + depends on ARCH_ROCKCHIP + help + This add the devfreq-event driver for Rockchip SoC. It provides DFI + (DDR Monitor Module) driver to count ddr load. + endif # PM_DEVFREQ_EVENT diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile index 3d6afd352253..dda7090a47c6 100644 --- a/drivers/devfreq/event/Makefile +++ b/drivers/devfreq/event/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o +obj-$(CONFIG_DEVFREQ_EVENT_ROCKCHIP_DFI) += rockchip-dfi.o diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 845bf25fb9fb..f55cf0eb2a66 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -406,8 +406,6 @@ static int of_get_devfreq_events(struct device_node *np, of_property_read_string(node, "event-name", &desc[j].name); j++; - - of_node_put(node); } info->desc = desc; diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c new file mode 100644 index 000000000000..43fcc5a7f515 --- /dev/null +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd + * Author: Lin Huang <hl@rock-chips.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/clk.h> +#include <linux/devfreq-event.h> +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/of.h> + +#define RK3399_DMC_NUM_CH 2 + +/* DDRMON_CTRL */ +#define DDRMON_CTRL 0x04 +#define CLR_DDRMON_CTRL (0x1f0000 << 0) +#define LPDDR4_EN (0x10001 << 4) +#define HARDWARE_EN (0x10001 << 3) +#define LPDDR3_EN (0x10001 << 2) +#define SOFTWARE_EN (0x10001 << 1) +#define SOFTWARE_DIS (0x10000 << 1) +#define TIME_CNT_EN (0x10001 << 0) + +#define DDRMON_CH0_COUNT_NUM 0x28 +#define DDRMON_CH0_DFI_ACCESS_NUM 0x2c +#define DDRMON_CH1_COUNT_NUM 0x3c +#define DDRMON_CH1_DFI_ACCESS_NUM 0x40 + +/* pmu grf */ +#define PMUGRF_OS_REG2 0x308 +#define DDRTYPE_SHIFT 13 +#define DDRTYPE_MASK 7 + +enum { + DDR3 = 3, + LPDDR3 = 6, + LPDDR4 = 7, + UNUSED = 0xFF +}; + +struct dmc_usage { + u32 access; + u32 total; +}; + +/* + * The dfi controller can monitor DDR load. It has an upper and lower threshold + * for the operating points. Whenever the usage leaves these bounds an event is + * generated to indicate the DDR frequency should be changed. + */ +struct rockchip_dfi { + struct devfreq_event_dev *edev; + struct devfreq_event_desc *desc; + struct dmc_usage ch_usage[RK3399_DMC_NUM_CH]; + struct device *dev; + void __iomem *regs; + struct regmap *regmap_pmu; + struct clk *clk; +}; + +static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev) +{ + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + void __iomem *dfi_regs = info->regs; + u32 val; + u32 ddr_type; + + /* get ddr type */ + regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val); + ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK; + + /* clear DDRMON_CTRL setting */ + writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL); + + /* set ddr type to dfi */ + if (ddr_type == LPDDR3) + writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL); + else if (ddr_type == LPDDR4) + writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL); + + /* enable count, use software mode */ + writel_relaxed(SOFTWARE_EN, dfi_regs + DDRMON_CTRL); +} + +static void rockchip_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) +{ + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + void __iomem *dfi_regs = info->regs; + + writel_relaxed(SOFTWARE_DIS, dfi_regs + DDRMON_CTRL); +} + +static int rockchip_dfi_get_busier_ch(struct devfreq_event_dev *edev) +{ + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + u32 tmp, max = 0; + u32 i, busier_ch = 0; + void __iomem *dfi_regs = info->regs; + + rockchip_dfi_stop_hardware_counter(edev); + + /* Find out which channel is busier */ + for (i = 0; i < RK3399_DMC_NUM_CH; i++) { + info->ch_usage[i].access = readl_relaxed(dfi_regs + + DDRMON_CH0_DFI_ACCESS_NUM + i * 20) * 4; + info->ch_usage[i].total = readl_relaxed(dfi_regs + + DDRMON_CH0_COUNT_NUM + i * 20); + tmp = info->ch_usage[i].access; + if (tmp > max) { + busier_ch = i; + max = tmp; + } + } + rockchip_dfi_start_hardware_counter(edev); + + return busier_ch; +} + +static int rockchip_dfi_disable(struct devfreq_event_dev *edev) +{ + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + + rockchip_dfi_stop_hardware_counter(edev); + clk_disable_unprepare(info->clk); + + return 0; +} + +static int rockchip_dfi_enable(struct devfreq_event_dev *edev) +{ + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + int ret; + + ret = clk_prepare_enable(info->clk); + if (ret) { + dev_err(&edev->dev, "failed to enable dfi clk: %d\n", ret); + return ret; + } + + rockchip_dfi_start_hardware_counter(edev); + return 0; +} + +static int rockchip_dfi_set_event(struct devfreq_event_dev *edev) +{ + return 0; +} + +static int rockchip_dfi_get_event(struct devfreq_event_dev *edev, + struct devfreq_event_data *edata) +{ + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + int busier_ch; + + busier_ch = rockchip_dfi_get_busier_ch(edev); + + edata->load_count = info->ch_usage[busier_ch].access; + edata->total_count = info->ch_usage[busier_ch].total; + + return 0; +} + +static const struct devfreq_event_ops rockchip_dfi_ops = { + .disable = rockchip_dfi_disable, + .enable = rockchip_dfi_enable, + .get_event = rockchip_dfi_get_event, + .set_event = rockchip_dfi_set_event, +}; + +static const struct of_device_id rockchip_dfi_id_match[] = { + { .compatible = "rockchip,rk3399-dfi" }, + { }, +}; + +static int rockchip_dfi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rockchip_dfi *data; + struct resource *res; + struct devfreq_event_desc *desc; + struct device_node *np = pdev->dev.of_node, *node; + + data = devm_kzalloc(dev, sizeof(struct rockchip_dfi), GFP_KERNEL); + if (!data) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(data->regs)) + return PTR_ERR(data->regs); + + data->clk = devm_clk_get(dev, "pclk_ddr_mon"); + if (IS_ERR(data->clk)) { + dev_err(dev, "Cannot get the clk dmc_clk\n"); + return PTR_ERR(data->clk); + }; + + /* try to find the optional reference to the pmu syscon */ + node = of_parse_phandle(np, "rockchip,pmu", 0); + if (node) { + data->regmap_pmu = syscon_node_to_regmap(node); + if (IS_ERR(data->regmap_pmu)) + return PTR_ERR(data->regmap_pmu); + } + data->dev = dev; + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + desc->ops = &rockchip_dfi_ops; + desc->driver_data = data; + desc->name = np->name; + data->desc = desc; + + data->edev = devm_devfreq_event_add_edev(&pdev->dev, desc); + if (IS_ERR(data->edev)) { + dev_err(&pdev->dev, + "failed to add devfreq-event device\n"); + return PTR_ERR(data->edev); + } + + platform_set_drvdata(pdev, data); + + return 0; +} + +static struct platform_driver rockchip_dfi_driver = { + .probe = rockchip_dfi_probe, + .driver = { + .name = "rockchip-dfi", + .of_match_table = rockchip_dfi_id_match, + }, +}; +module_platform_driver(rockchip_dfi_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Lin Huang <hl@rock-chips.com>"); +MODULE_DESCRIPTION("Rockchip DFI driver"); diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c new file mode 100644 index 000000000000..e24b73d66659 --- /dev/null +++ b/drivers/devfreq/rk3399_dmc.c @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd. + * Author: Lin Huang <hl@rock-chips.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/arm-smccc.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/devfreq.h> +#include <linux/devfreq-event.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_opp.h> +#include <linux/regulator/consumer.h> +#include <linux/rwsem.h> +#include <linux/suspend.h> + +#include <soc/rockchip/rockchip_sip.h> + +struct dram_timing { + unsigned int ddr3_speed_bin; + unsigned int pd_idle; + unsigned int sr_idle; + unsigned int sr_mc_gate_idle; + unsigned int srpd_lite_idle; + unsigned int standby_idle; + unsigned int auto_pd_dis_freq; + unsigned int dram_dll_dis_freq; + unsigned int phy_dll_dis_freq; + unsigned int ddr3_odt_dis_freq; + unsigned int ddr3_drv; + unsigned int ddr3_odt; + unsigned int phy_ddr3_ca_drv; + unsigned int phy_ddr3_dq_drv; + unsigned int phy_ddr3_odt; + unsigned int lpddr3_odt_dis_freq; + unsigned int lpddr3_drv; + unsigned int lpddr3_odt; + unsigned int phy_lpddr3_ca_drv; + unsigned int phy_lpddr3_dq_drv; + unsigned int phy_lpddr3_odt; + unsigned int lpddr4_odt_dis_freq; + unsigned int lpddr4_drv; + unsigned int lpddr4_dq_odt; + unsigned int lpddr4_ca_odt; + unsigned int phy_lpddr4_ca_drv; + unsigned int phy_lpddr4_ck_cs_drv; + unsigned int phy_lpddr4_dq_drv; + unsigned int phy_lpddr4_odt; +}; + +struct rk3399_dmcfreq { + struct device *dev; + struct devfreq *devfreq; + struct devfreq_simple_ondemand_data ondemand_data; + struct clk *dmc_clk; + struct devfreq_event_dev *edev; + struct mutex lock; + struct dram_timing timing; + + /* + * DDR Converser of Frequency (DCF) is used to implement DDR frequency + * conversion without the participation of CPU, we will implement and + * control it in arm trust firmware. + */ + wait_queue_head_t wait_dcf_queue; + int irq; + int wait_dcf_flag; + struct regulator *vdd_center; + unsigned long rate, target_rate; + unsigned long volt, target_volt; + struct dev_pm_opp *curr_opp; +}; + +static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev); + struct dev_pm_opp *opp; + unsigned long old_clk_rate = dmcfreq->rate; + unsigned long target_volt, target_rate; + int err; + + rcu_read_lock(); + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) { + rcu_read_unlock(); + return PTR_ERR(opp); + } + + target_rate = dev_pm_opp_get_freq(opp); + target_volt = dev_pm_opp_get_voltage(opp); + + dmcfreq->rate = dev_pm_opp_get_freq(dmcfreq->curr_opp); + dmcfreq->volt = dev_pm_opp_get_voltage(dmcfreq->curr_opp); + + rcu_read_unlock(); + + if (dmcfreq->rate == target_rate) + return 0; + + mutex_lock(&dmcfreq->lock); + + /* + * If frequency scaling from low to high, adjust voltage first. + * If frequency scaling from high to low, adjust frequency first. + */ + if (old_clk_rate < target_rate) { + err = regulator_set_voltage(dmcfreq->vdd_center, target_volt, + target_volt); + if (err) { + dev_err(dev, "Cannot to set voltage %lu uV\n", + target_volt); + goto out; + } + } + dmcfreq->wait_dcf_flag = 1; + + err = clk_set_rate(dmcfreq->dmc_clk, target_rate); + if (err) { + dev_err(dev, "Cannot to set frequency %lu (%d)\n", + target_rate, err); + regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt, + dmcfreq->volt); + goto out; + } + + /* + * Wait until bcf irq happen, it means freq scaling finish in + * arm trust firmware, use 100ms as timeout time. + */ + if (!wait_event_timeout(dmcfreq->wait_dcf_queue, + !dmcfreq->wait_dcf_flag, HZ / 10)) + dev_warn(dev, "Timeout waiting for dcf interrupt\n"); + + /* + * Check the dpll rate, + * There only two result we will get, + * 1. Ddr frequency scaling fail, we still get the old rate. + * 2. Ddr frequency scaling sucessful, we get the rate we set. + */ + dmcfreq->rate = clk_get_rate(dmcfreq->dmc_clk); + + /* If get the incorrect rate, set voltage to old value. */ + if (dmcfreq->rate != target_rate) { + dev_err(dev, "Get wrong ddr frequency, Request frequency %lu,\ + Current frequency %lu\n", target_rate, dmcfreq->rate); + regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt, + dmcfreq->volt); + goto out; + } else if (old_clk_rate > target_rate) + err = regulator_set_voltage(dmcfreq->vdd_center, target_volt, + target_volt); + if (err) + dev_err(dev, "Cannot to set vol %lu uV\n", target_volt); + + dmcfreq->curr_opp = opp; +out: + mutex_unlock(&dmcfreq->lock); + return err; +} + +static int rk3399_dmcfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev); + struct devfreq_event_data edata; + int ret = 0; + + ret = devfreq_event_get_event(dmcfreq->edev, &edata); + if (ret < 0) + return ret; + + stat->current_frequency = dmcfreq->rate; + stat->busy_time = edata.load_count; + stat->total_time = edata.total_count; + + return ret; +} + +static int rk3399_dmcfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev); + + *freq = dmcfreq->rate; + + return 0; +} + +static struct devfreq_dev_profile rk3399_devfreq_dmc_profile = { + .polling_ms = 200, + .target = rk3399_dmcfreq_target, + .get_dev_status = rk3399_dmcfreq_get_dev_status, + .get_cur_freq = rk3399_dmcfreq_get_cur_freq, +}; + +static __maybe_unused int rk3399_dmcfreq_suspend(struct device *dev) +{ + struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev); + int ret = 0; + + ret = devfreq_event_disable_edev(dmcfreq->edev); + if (ret < 0) { + dev_err(dev, "failed to disable the devfreq-event devices\n"); + return ret; + } + + ret = devfreq_suspend_device(dmcfreq->devfreq); + if (ret < 0) { + dev_err(dev, "failed to suspend the devfreq devices\n"); + return ret; + } + + return 0; +} + +static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev) +{ + struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev); + int ret = 0; + + ret = devfreq_event_enable_edev(dmcfreq->edev); + if (ret < 0) { + dev_err(dev, "failed to enable the devfreq-event devices\n"); + return ret; + } + + ret = devfreq_resume_device(dmcfreq->devfreq); + if (ret < 0) { + dev_err(dev, "failed to resume the devfreq devices\n"); + return ret; + } + return ret; +} + +static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend, + rk3399_dmcfreq_resume); + +static irqreturn_t rk3399_dmc_irq(int irq, void *dev_id) +{ + struct rk3399_dmcfreq *dmcfreq = dev_id; + struct arm_smccc_res res; + + dmcfreq->wait_dcf_flag = 0; + wake_up(&dmcfreq->wait_dcf_queue); + + /* Clear the DCF interrupt */ + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, + ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ, + 0, 0, 0, 0, &res); + + return IRQ_HANDLED; +} + +static int of_get_ddr_timings(struct dram_timing *timing, + struct device_node *np) +{ + int ret = 0; + + ret = of_property_read_u32(np, "rockchip,ddr3_speed_bin", + &timing->ddr3_speed_bin); + ret |= of_property_read_u32(np, "rockchip,pd_idle", + &timing->pd_idle); + ret |= of_property_read_u32(np, "rockchip,sr_idle", + &timing->sr_idle); + ret |= of_property_read_u32(np, "rockchip,sr_mc_gate_idle", + &timing->sr_mc_gate_idle); + ret |= of_property_read_u32(np, "rockchip,srpd_lite_idle", + &timing->srpd_lite_idle); + ret |= of_property_read_u32(np, "rockchip,standby_idle", + &timing->standby_idle); + ret |= of_property_read_u32(np, "rockchip,auto_pd_dis_freq", + &timing->auto_pd_dis_freq); + ret |= of_property_read_u32(np, "rockchip,dram_dll_dis_freq", + &timing->dram_dll_dis_freq); + ret |= of_property_read_u32(np, "rockchip,phy_dll_dis_freq", + &timing->phy_dll_dis_freq); + ret |= of_property_read_u32(np, "rockchip,ddr3_odt_dis_freq", + &timing->ddr3_odt_dis_freq); + ret |= of_property_read_u32(np, "rockchip,ddr3_drv", + &timing->ddr3_drv); + ret |= of_property_read_u32(np, "rockchip,ddr3_odt", + &timing->ddr3_odt); + ret |= of_property_read_u32(np, "rockchip,phy_ddr3_ca_drv", + &timing->phy_ddr3_ca_drv); + ret |= of_property_read_u32(np, "rockchip,phy_ddr3_dq_drv", + &timing->phy_ddr3_dq_drv); + ret |= of_property_read_u32(np, "rockchip,phy_ddr3_odt", + &timing->phy_ddr3_odt); + ret |= of_property_read_u32(np, "rockchip,lpddr3_odt_dis_freq", + &timing->lpddr3_odt_dis_freq); + ret |= of_property_read_u32(np, "rockchip,lpddr3_drv", + &timing->lpddr3_drv); + ret |= of_property_read_u32(np, "rockchip,lpddr3_odt", + &timing->lpddr3_odt); + ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_ca_drv", + &timing->phy_lpddr3_ca_drv); + ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_dq_drv", + &timing->phy_lpddr3_dq_drv); + ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_odt", + &timing->phy_lpddr3_odt); + ret |= of_property_read_u32(np, "rockchip,lpddr4_odt_dis_freq", + &timing->lpddr4_odt_dis_freq); + ret |= of_property_read_u32(np, "rockchip,lpddr4_drv", + &timing->lpddr4_drv); + ret |= of_property_read_u32(np, "rockchip,lpddr4_dq_odt", + &timing->lpddr4_dq_odt); + ret |= of_property_read_u32(np, "rockchip,lpddr4_ca_odt", + &timing->lpddr4_ca_odt); + ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ca_drv", + &timing->phy_lpddr4_ca_drv); + ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ck_cs_drv", + &timing->phy_lpddr4_ck_cs_drv); + ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_dq_drv", + &timing->phy_lpddr4_dq_drv); + ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_odt", + &timing->phy_lpddr4_odt); + + return ret; +} + +static int rk3399_dmcfreq_probe(struct platform_device *pdev) +{ + struct arm_smccc_res res; + struct device *dev = &pdev->dev; + struct device_node *np = pdev->dev.of_node; + struct rk3399_dmcfreq *data; + int ret, irq, index, size; + uint32_t *timing; + struct dev_pm_opp *opp; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "Cannot get the dmc interrupt resource\n"); + return -EINVAL; + } + data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL); + if (!data) + return -ENOMEM; + + mutex_init(&data->lock); + + data->vdd_center = devm_regulator_get(dev, "center"); + if (IS_ERR(data->vdd_center)) { + dev_err(dev, "Cannot get the regulator \"center\"\n"); + return PTR_ERR(data->vdd_center); + } + + data->dmc_clk = devm_clk_get(dev, "dmc_clk"); + if (IS_ERR(data->dmc_clk)) { + dev_err(dev, "Cannot get the clk dmc_clk\n"); + return PTR_ERR(data->dmc_clk); + }; + + data->irq = irq; + ret = devm_request_irq(dev, irq, rk3399_dmc_irq, 0, + dev_name(dev), data); + if (ret) { + dev_err(dev, "Failed to request dmc irq: %d\n", ret); + return ret; + } + + init_waitqueue_head(&data->wait_dcf_queue); + data->wait_dcf_flag = 0; + + data->edev = devfreq_event_get_edev_by_phandle(dev, 0); + if (IS_ERR(data->edev)) + return -EPROBE_DEFER; + + ret = devfreq_event_enable_edev(data->edev); + if (ret < 0) { + dev_err(dev, "failed to enable devfreq-event devices\n"); + return ret; + } + + /* + * Get dram timing and pass it to arm trust firmware, + * the dram drvier in arm trust firmware will get these + * timing and to do dram initial. + */ + if (!of_get_ddr_timings(&data->timing, np)) { + timing = &data->timing.ddr3_speed_bin; + size = sizeof(struct dram_timing) / 4; + for (index = 0; index < size; index++) { + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index, + ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM, + 0, 0, 0, 0, &res); + if (res.a0) { + dev_err(dev, "Failed to set dram param: %ld\n", + res.a0); + return -EINVAL; + } + } + } + + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, + ROCKCHIP_SIP_CONFIG_DRAM_INIT, + 0, 0, 0, 0, &res); + + /* + * We add a devfreq driver to our parent since it has a device tree node + * with operating points. + */ + if (dev_pm_opp_of_add_table(dev)) { + dev_err(dev, "Invalid operating-points in device tree.\n"); + rcu_read_unlock(); + return -EINVAL; + } + + of_property_read_u32(np, "upthreshold", + &data->ondemand_data.upthreshold); + of_property_read_u32(np, "downdifferential", + &data->ondemand_data.downdifferential); + + data->rate = clk_get_rate(data->dmc_clk); + + rcu_read_lock(); + opp = devfreq_recommended_opp(dev, &data->rate, 0); + if (IS_ERR(opp)) { + rcu_read_unlock(); + return PTR_ERR(opp); + } + rcu_read_unlock(); + data->curr_opp = opp; + + rk3399_devfreq_dmc_profile.initial_freq = data->rate; + + data->devfreq = devfreq_add_device(dev, + &rk3399_devfreq_dmc_profile, + "simple_ondemand", + &data->ondemand_data); + if (IS_ERR(data->devfreq)) + return PTR_ERR(data->devfreq); + devm_devfreq_register_opp_notifier(dev, data->devfreq); + + data->dev = dev; + platform_set_drvdata(pdev, data); + + return 0; +} + +static const struct of_device_id rk3399dmc_devfreq_of_match[] = { + { .compatible = "rockchip,rk3399-dmc" }, + { }, +}; + +static struct platform_driver rk3399_dmcfreq_driver = { + .probe = rk3399_dmcfreq_probe, + .driver = { + .name = "rk3399-dmc-freq", + .pm = &rk3399_dmcfreq_pm, + .of_match_table = rk3399dmc_devfreq_of_match, + }, +}; +module_platform_driver(rk3399_dmcfreq_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Lin Huang <hl@rock-chips.com>"); +MODULE_DESCRIPTION("RK3399 dmcfreq driver with devfreq framework"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index df7ab2458e50..39c01b942ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1708,11 +1708,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: finishing device.\n"); adev->shutdown = true; + drm_crtc_force_disable_all(adev->ddev); /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); - drm_crtc_force_disable_all(adev->ddev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); kfree(adev->ip_block_status); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 7ea8aa7ca408..6bc712f32c8b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -175,6 +175,7 @@ struct nvkm_device_func { void (*fini)(struct nvkm_device *, bool suspend); resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar); resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar); + bool cpu_coherent; }; struct nvkm_device_quirk { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6190035edfea..864323b19cf7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -209,7 +209,8 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, nvbo->tile_flags = tile_flags; nvbo->bo.bdev = &drm->ttm.bdev; - nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; + if (!nvxx_device(&drm->device)->func->cpu_coherent) + nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; nvbo->page_shift = 12; if (drm->client.vm) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index b1b693219db3..62ad0300cfa5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1614,6 +1614,7 @@ nvkm_device_pci_func = { .fini = nvkm_device_pci_fini, .resource_addr = nvkm_device_pci_resource_addr, .resource_size = nvkm_device_pci_resource_size, + .cpu_coherent = !IS_ENABLED(CONFIG_ARM), }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 939682f18788..9b638bd905ff 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -245,6 +245,7 @@ nvkm_device_tegra_func = { .fini = nvkm_device_tegra_fini, .resource_addr = nvkm_device_tegra_resource_addr, .resource_size = nvkm_device_tegra_resource_size, + .cpu_coherent = false, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c index edec30fd3ecd..0a7b6ed5ed28 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c @@ -37,7 +37,10 @@ nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *base, int cookie) { struct nv04_fifo_chan *chan = nv04_fifo_chan(base); struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem; + + mutex_lock(&chan->fifo->base.engine.subdev.mutex); nvkm_ramht_remove(imem->ramht, cookie); + mutex_unlock(&chan->fifo->base.engine.subdev.mutex); } static int diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index e6abc09b67e3..1f78ec2548ec 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3015,6 +3015,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (rdev->pdev->device == 0x6811 && rdev->pdev->revision == 0x81) max_mclk = 120000; + /* limit sclk/mclk on Jet parts for stability */ + if (rdev->pdev->device == 0x6665 && + rdev->pdev->revision == 0xc3) { + max_sclk = 75000; + max_mclk = 80000; + } if (rps->vce_active) { rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 9688bfa92ccd..611b6b9bb3cb 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -122,7 +122,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, return 0; cmd = urb->transfer_buffer; - for (i = y; i < height ; i++) { + for (i = y; i < y + height ; i++) { const int line_offset = fb->base.pitches[0] * i; const int byte_offset = line_offset + (x * bpp); const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp); diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 5d11fea3c8ec..f3135ae22df4 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -950,6 +950,12 @@ static const struct input_device_id joydev_ids[] = { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT_MASK(EV_ABS) }, + .absbit = { BIT_MASK(ABS_Z) }, + }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, + .evbit = { BIT_MASK(EV_ABS) }, .absbit = { BIT_MASK(ABS_WHEEL) }, }, { diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c index cc07ba0f044d..27fa8b87cd5f 100644 --- a/drivers/mtd/nand/davinci_nand.c +++ b/drivers/mtd/nand/davinci_nand.c @@ -240,6 +240,9 @@ static void nand_davinci_hwctl_4bit(struct mtd_info *mtd, int mode) unsigned long flags; u32 val; + /* Reset ECC hardware */ + davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET); + spin_lock_irqsave(&davinci_nand_lock, flags); /* Start 4-bit ECC calculation for read/write */ diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index a59361c36f40..5513bfd9cdc9 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -2169,7 +2169,7 @@ scan_tail: return 0; return_error: - if (info->dma) + if (!IS_ERR_OR_NULL(info->dma)) dma_release_channel(info->dma); if (nand_chip->ecc.priv) { nand_bch_free(nand_chip->ecc.priv); diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e21f7cc5ae4d..8d6208c0b400 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/if_arp.h> +#include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/skb.h> @@ -501,9 +502,8 @@ EXPORT_SYMBOL_GPL(can_free_echo_skb); /* * CAN device restart for bus-off recovery */ -static void can_restart(unsigned long data) +static void can_restart(struct net_device *dev) { - struct net_device *dev = (struct net_device *)data; struct can_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; struct sk_buff *skb; @@ -543,6 +543,14 @@ restart: netdev_err(dev, "Error %d during restart", err); } +static void can_restart_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct can_priv *priv = container_of(dwork, struct can_priv, restart_work); + + can_restart(priv->dev); +} + int can_restart_now(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); @@ -556,8 +564,8 @@ int can_restart_now(struct net_device *dev) if (priv->state != CAN_STATE_BUS_OFF) return -EBUSY; - /* Runs as soon as possible in the timer context */ - mod_timer(&priv->restart_timer, jiffies); + cancel_delayed_work_sync(&priv->restart_work); + can_restart(dev); return 0; } @@ -578,8 +586,8 @@ void can_bus_off(struct net_device *dev) netif_carrier_off(dev); if (priv->restart_ms) - mod_timer(&priv->restart_timer, - jiffies + (priv->restart_ms * HZ) / 1000); + schedule_delayed_work(&priv->restart_work, + msecs_to_jiffies(priv->restart_ms)); } EXPORT_SYMBOL_GPL(can_bus_off); @@ -688,6 +696,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max) return NULL; priv = netdev_priv(dev); + priv->dev = dev; if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; @@ -697,7 +706,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max) priv->state = CAN_STATE_STOPPED; - init_timer(&priv->restart_timer); + INIT_DELAYED_WORK(&priv->restart_work, can_restart_work); return dev; } @@ -778,8 +787,6 @@ int open_candev(struct net_device *dev) if (!netif_carrier_ok(dev)) netif_carrier_on(dev); - setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev); - return 0; } EXPORT_SYMBOL_GPL(open_candev); @@ -794,7 +801,7 @@ void close_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); - del_timer_sync(&priv->restart_timer); + cancel_delayed_work_sync(&priv->restart_work); can_flush_echo_skb(dev); } EXPORT_SYMBOL_GPL(close_candev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 8d4f8495dbb3..541456398dfb 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -453,25 +453,29 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, static int bcmgenet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_gset(priv->phydev, cmd); } static int bcmgenet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_sset(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -937,7 +941,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(dev->phydev, e); + return phy_ethtool_get_eee(priv->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -954,7 +958,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(dev->phydev, 0); + ret = phy_init_eee(priv->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -964,12 +968,14 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(dev->phydev, e); + return phy_ethtool_set_eee(priv->phydev, e); } static int bcmgenet_nway_reset(struct net_device *dev) { - return genphy_restart_aneg(dev->phydev); + struct bcmgenet_priv *priv = netdev_priv(dev); + + return genphy_restart_aneg(priv->phydev); } /* standard ethtool support functions. */ @@ -996,13 +1002,12 @@ static struct ethtool_ops bcmgenet_ethtool_ops = { static int bcmgenet_power_down(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode) { - struct net_device *ndev = priv->dev; int ret = 0; u32 reg; switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(ndev->phydev); + phy_detach(priv->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1063,6 +1068,7 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); int val = 0; if (!netif_running(dev)) @@ -1072,10 +1078,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!dev->phydev) + if (!priv->phydev) val = -ENODEV; else - val = phy_mii_ioctl(dev->phydev, rq, cmd); + val = phy_mii_ioctl(priv->phydev, rq, cmd); break; default: @@ -2458,7 +2464,6 @@ static void bcmgenet_irq_task(struct work_struct *work) { struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); - struct net_device *ndev = priv->dev; netif_dbg(priv, intr, priv->dev, "%s\n", __func__); @@ -2471,7 +2476,7 @@ static void bcmgenet_irq_task(struct work_struct *work) /* Link UP/DOWN event */ if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { - phy_mac_interrupt(ndev->phydev, + phy_mac_interrupt(priv->phydev, !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; } @@ -2833,7 +2838,7 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(dev->phydev); + phy_start(priv->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2932,7 +2937,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - phy_stop(dev->phydev); + phy_stop(priv->phydev); bcmgenet_intr_disable(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_disable_tx_napi(priv); @@ -2958,7 +2963,7 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(dev->phydev); + phy_disconnect(priv->phydev); /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); @@ -3517,7 +3522,7 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(dev->phydev); + phy_suspend(priv->phydev); netif_device_detach(dev); @@ -3581,7 +3586,7 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(dev->phydev); + phy_init_hw(priv->phydev); /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev); @@ -3614,7 +3619,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(dev->phydev); + phy_resume(priv->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 0f0868c56f05..1e2dc34d331a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -597,6 +597,7 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; + struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e907acd81da9..457c3bc8cfff 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev) if (GENET_IS_V4(priv)) return; - if (dev->phydev) { - phy_init_hw(dev->phydev); - phy_start_aneg(dev->phydev); + if (priv->phydev) { + phy_init_hw(priv->phydev); + phy_start_aneg(priv->phydev); } } @@ -236,7 +236,6 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev) static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { - struct net_device *ndev = priv->dev; u32 reg; /* Speed settings are set in bcmgenet_mii_setup() */ @@ -245,14 +244,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL); if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(ndev->phydev, + fixed_phy_set_link_update(priv->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -303,7 +302,7 @@ int bcmgenet_mii_config(struct net_device *dev) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((phydev->supported & PHY_BASIC_FEATURES) == + if ((priv->phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -372,7 +371,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = dev->phydev; + phydev = priv->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -383,6 +382,8 @@ int bcmgenet_mii_probe(struct net_device *dev) } } + priv->phydev = phydev; + /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -390,7 +391,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev); if (ret) { - phy_disconnect(phydev); + phy_disconnect(priv->phydev); return ret; } @@ -400,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - phydev->irq = PHY_IGNORE_INTERRUPT; + priv->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -605,6 +606,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } + priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a2551bcd1027..ea967df4b202 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18122,14 +18122,14 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, rtnl_lock(); - /* We needn't recover from permanent error */ - if (state == pci_channel_io_frozen) - tp->pcierr_recovery = true; - /* We probably don't have netdev yet */ if (!netdev || !netif_running(netdev)) goto done; + /* We needn't recover from permanent error */ + if (state == pci_channel_io_frozen) + tp->pcierr_recovery = true; + tg3_phy_stop(tp); tg3_netif_stop(tp); @@ -18226,7 +18226,7 @@ static void tg3_io_resume(struct pci_dev *pdev) rtnl_lock(); - if (!netif_running(netdev)) + if (!netdev || !netif_running(netdev)) goto done; tg3_full_lock(tp, 0); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 01f7e811739b..692ee248e486 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -89,10 +89,10 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC, + .driver_data = FEC_QUIRK_USE_GASKET, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_HAS_RACC, + .driver_data = 0, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | @@ -180,6 +180,7 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); /* FEC receive acceleration */ #define FEC_RACC_IPDIS (1 << 1) #define FEC_RACC_PRODIS (1 << 2) +#define FEC_RACC_SHIFT16 BIT(7) #define FEC_RACC_OPTIONS (FEC_RACC_IPDIS | FEC_RACC_PRODIS) /* @@ -945,9 +946,11 @@ fec_restart(struct net_device *ndev) #if !defined(CONFIG_M5272) if (fep->quirks & FEC_QUIRK_HAS_RACC) { - /* set RX checksum */ val = readl(fep->hwp + FEC_RACC); + /* align IP header */ + val |= FEC_RACC_SHIFT16; if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) + /* set RX checksum */ val |= FEC_RACC_OPTIONS; else val &= ~FEC_RACC_OPTIONS; @@ -1428,6 +1431,12 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) prefetch(skb->data - NET_IP_ALIGN); skb_put(skb, pkt_len - 4); data = skb->data; + +#if !defined(CONFIG_M5272) + if (fep->quirks & FEC_QUIRK_HAS_RACC) + data = skb_pull_inline(skb, 2); +#endif + if (!is_copybreak && need_swap) swap_buffer(data, pkt_len); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 715583f69d28..4d7bbd2df5c0 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -99,8 +99,11 @@ static struct nvdimm_map *alloc_nvdimm_map(struct device *dev, nvdimm_map->size = size; kref_init(&nvdimm_map->kref); - if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) + if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) { + dev_err(&nvdimm_bus->dev, "failed to request %pa + %zd for %s\n", + &offset, size, dev_name(dev)); goto err_request_region; + } if (flags) nvdimm_map->mem = memremap(offset, size, flags); @@ -171,6 +174,9 @@ void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, kref_get(&nvdimm_map->kref); nvdimm_bus_unlock(dev); + if (!nvdimm_map) + return NULL; + if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map)) return NULL; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 8024a0ef86d3..0b78a8211f4a 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -52,10 +52,28 @@ struct nvdimm_drvdata { struct nd_region_data { int ns_count; int ns_active; - unsigned int flush_mask; - void __iomem *flush_wpq[0][0]; + unsigned int hints_shift; + void __iomem *flush_wpq[0]; }; +static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd, + int dimm, int hint) +{ + unsigned int num = 1 << ndrd->hints_shift; + unsigned int mask = num - 1; + + return ndrd->flush_wpq[dimm * num + (hint & mask)]; +} + +static inline void ndrd_set_flush_wpq(struct nd_region_data *ndrd, int dimm, + int hint, void __iomem *flush) +{ + unsigned int num = 1 << ndrd->hints_shift; + unsigned int mask = num - 1; + + ndrd->flush_wpq[dimm * num + (hint & mask)] = flush; +} + static inline struct nd_namespace_index *to_namespace_index( struct nvdimm_drvdata *ndd, int i) { diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e8d5ba7b29af..4c0ac4abb629 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -38,7 +38,7 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm), nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es"); - for (i = 0; i < nvdimm->num_flush; i++) { + for (i = 0; i < (1 << ndrd->hints_shift); i++) { struct resource *res = &nvdimm->flush_wpq[i]; unsigned long pfn = PHYS_PFN(res->start); void __iomem *flush_page; @@ -54,14 +54,15 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, if (j < i) flush_page = (void __iomem *) ((unsigned long) - ndrd->flush_wpq[dimm][j] & PAGE_MASK); + ndrd_get_flush_wpq(ndrd, dimm, j) + & PAGE_MASK); else flush_page = devm_nvdimm_ioremap(dev, - PHYS_PFN(pfn), PAGE_SIZE); + PFN_PHYS(pfn), PAGE_SIZE); if (!flush_page) return -ENXIO; - ndrd->flush_wpq[dimm][i] = flush_page - + (res->start & ~PAGE_MASK); + ndrd_set_flush_wpq(ndrd, dimm, i, flush_page + + (res->start & ~PAGE_MASK)); } return 0; @@ -93,7 +94,10 @@ int nd_region_activate(struct nd_region *nd_region) return -ENOMEM; dev_set_drvdata(dev, ndrd); - ndrd->flush_mask = (1 << ilog2(num_flush)) - 1; + if (!num_flush) + return 0; + + ndrd->hints_shift = ilog2(num_flush); for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm; @@ -900,8 +904,8 @@ void nvdimm_flush(struct nd_region *nd_region) */ wmb(); for (i = 0; i < nd_region->ndr_mappings; i++) - if (ndrd->flush_wpq[i][0]) - writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]); + if (ndrd_get_flush_wpq(ndrd, i, 0)) + writeq(1, ndrd_get_flush_wpq(ndrd, i, idx)); wmb(); } EXPORT_SYMBOL_GPL(nvdimm_flush); @@ -925,7 +929,7 @@ int nvdimm_has_flush(struct nd_region *nd_region) for (i = 0; i < nd_region->ndr_mappings; i++) /* flush hints present, flushing required */ - if (ndrd->flush_wpq[i][0]) + if (ndrd_get_flush_wpq(ndrd, i, 0)) return 1; /* diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index ed5a097f0801..f63d4b0deff0 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -16,6 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#define pr_fmt(fmt) "OF: NUMA: " fmt + #include <linux/of.h> #include <linux/of_address.h> #include <linux/nodemask.h> @@ -49,10 +51,9 @@ static void __init of_numa_parse_cpu_nodes(void) if (r) continue; - pr_debug("NUMA: CPU on %u\n", nid); + pr_debug("CPU on %u\n", nid); if (nid >= MAX_NUMNODES) - pr_warn("NUMA: Node id %u exceeds maximum value\n", - nid); + pr_warn("Node id %u exceeds maximum value\n", nid); else node_set(nid, numa_nodes_parsed); } @@ -63,13 +64,9 @@ static int __init of_numa_parse_memory_nodes(void) struct device_node *np = NULL; struct resource rsrc; u32 nid; - int r = 0; - - for (;;) { - np = of_find_node_by_type(np, "memory"); - if (!np) - break; + int i, r; + for_each_node_by_type(np, "memory") { r = of_property_read_u32(np, "numa-node-id", &nid); if (r == -EINVAL) /* @@ -78,27 +75,23 @@ static int __init of_numa_parse_memory_nodes(void) * "numa-node-id" property */ continue; - else if (r) - /* some other error */ - break; - r = of_address_to_resource(np, 0, &rsrc); - if (r) { - pr_err("NUMA: bad reg property in memory node\n"); - break; + if (nid >= MAX_NUMNODES) { + pr_warn("Node id %u exceeds maximum value\n", nid); + r = -EINVAL; } - pr_debug("NUMA: base = %llx len = %llx, node = %u\n", - rsrc.start, rsrc.end - rsrc.start + 1, nid); - + for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) + r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); - r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); - if (r) - break; + if (!i || r) { + of_node_put(np); + pr_err("bad property in memory node\n"); + return r ? : -EINVAL; + } } - of_node_put(np); - return r; + return 0; } static int __init of_numa_parse_distance_map_v1(struct device_node *map) @@ -107,17 +100,17 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map) int entry_count; int i; - pr_info("NUMA: parsing numa-distance-map-v1\n"); + pr_info("parsing numa-distance-map-v1\n"); matrix = of_get_property(map, "distance-matrix", NULL); if (!matrix) { - pr_err("NUMA: No distance-matrix property in distance-map\n"); + pr_err("No distance-matrix property in distance-map\n"); return -EINVAL; } entry_count = of_property_count_u32_elems(map, "distance-matrix"); if (entry_count <= 0) { - pr_err("NUMA: Invalid distance-matrix\n"); + pr_err("Invalid distance-matrix\n"); return -EINVAL; } @@ -132,7 +125,7 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map) matrix++; numa_set_distance(nodea, nodeb, distance); - pr_debug("NUMA: distance[node%d -> node%d] = %d\n", + pr_debug("distance[node%d -> node%d] = %d\n", nodea, nodeb, distance); /* Set default distance of node B->A same as A->B */ @@ -166,8 +159,6 @@ int of_node_to_nid(struct device_node *device) np = of_node_get(device); while (np) { - struct device_node *parent; - r = of_property_read_u32(np, "numa-node-id", &nid); /* * -EINVAL indicates the property was not found, and @@ -178,22 +169,15 @@ int of_node_to_nid(struct device_node *device) if (r != -EINVAL) break; - parent = of_get_parent(np); - of_node_put(np); - np = parent; + np = of_get_next_parent(np); } if (np && r) - pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n", + pr_warn("Invalid \"numa-node-id\" property in node %s\n", np->name); of_node_put(np); - if (!r) { - if (nid >= MAX_NUMNODES) - pr_warn("NUMA: Node id %u exceeds maximum value\n", - nid); - else - return nid; - } + if (!r) + return nid; return NUMA_NO_NODE; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index f5e1008a223d..30370817bf13 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -534,6 +534,24 @@ static int armpmu_filter_match(struct perf_event *event) return cpumask_test_cpu(cpu, &armpmu->supported_cpus); } +static ssize_t armpmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev)); + return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus); +} + +static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL); + +static struct attribute *armpmu_common_attrs[] = { + &dev_attr_cpus.attr, + NULL, +}; + +static struct attribute_group armpmu_common_attr_group = { + .attrs = armpmu_common_attrs, +}; + static void armpmu_init(struct arm_pmu *armpmu) { atomic_set(&armpmu->active_events, 0); @@ -549,7 +567,10 @@ static void armpmu_init(struct arm_pmu *armpmu) .stop = armpmu_stop, .read = armpmu_read, .filter_match = armpmu_filter_match, + .attr_groups = armpmu->attr_groups, }; + armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] = + &armpmu_common_attr_group; } /* Set at runtime when we know what CPU type we are. */ @@ -602,7 +623,7 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) irqs = min(pmu_device->num_resources, num_possible_cpus()); irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { + if (irq > 0 && irq_is_percpu(irq)) { on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu_disable_percpu_irq, &irq, 1); free_percpu_irq(irq, &hw_events->percpu_pmu); @@ -616,7 +637,7 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); - if (irq >= 0) + if (irq > 0) free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } } @@ -638,7 +659,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) } irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { + if (irq > 0 && irq_is_percpu(irq)) { err = request_percpu_irq(irq, handler, "arm-pmu", &hw_events->percpu_pmu); if (err) { @@ -919,7 +940,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ irq = platform_get_irq(pdev, i); - if (irq >= 0) { + if (irq > 0) { bool spi = !irq_is_percpu(irq); if (i > 0 && spi != using_spi) { @@ -970,7 +991,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (cpumask_weight(&pmu->supported_cpus) == 0) { int irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) { + if (irq > 0 && irq_is_percpu(irq)) { /* If using PPIs, check the affinity of the partition */ int ret; @@ -1029,7 +1050,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, ret = of_pmu_irq_cfg(pmu); if (!ret) ret = init_fn(pmu); - } else { + } else if (probe_table) { cpumask_setall(&pmu->supported_cpus); ret = probe_current_pmu(pmu, probe_table); } @@ -1039,6 +1060,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, goto out_free; } + ret = cpu_pmu_init(pmu); if (ret) goto out_free; diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c index db9973bb53f1..fa0f19b975a6 100644 --- a/drivers/power/avs/smartreflex.c +++ b/drivers/power/avs/smartreflex.c @@ -136,7 +136,7 @@ static void sr_set_clk_length(struct omap_sr *sr) if (IS_ERR(fck)) { dev_err(&sr->pdev->dev, "%s: unable to get fck for device %s\n", - __func__, dev_name(&sr->pdev->dev)); + __func__, dev_name(&sr->pdev->dev)); return; } @@ -170,8 +170,8 @@ static void sr_start_vddautocomp(struct omap_sr *sr) { if (!sr_class || !(sr_class->enable) || !(sr_class->configure)) { dev_warn(&sr->pdev->dev, - "%s: smartreflex class driver not registered\n", - __func__); + "%s: smartreflex class driver not registered\n", + __func__); return; } @@ -183,8 +183,8 @@ static void sr_stop_vddautocomp(struct omap_sr *sr) { if (!sr_class || !(sr_class->disable)) { dev_warn(&sr->pdev->dev, - "%s: smartreflex class driver not registered\n", - __func__); + "%s: smartreflex class driver not registered\n", + __func__); return; } @@ -225,9 +225,8 @@ static int sr_late_init(struct omap_sr *sr_info) error: list_del(&sr_info->node); - dev_err(&sr_info->pdev->dev, "%s: ERROR in registering" - "interrupt handler. Smartreflex will" - "not function as desired\n", __func__); + dev_err(&sr_info->pdev->dev, "%s: ERROR in registering interrupt handler. Smartreflex will not function as desired\n", + __func__); return ret; } @@ -263,7 +262,7 @@ static void sr_v1_disable(struct omap_sr *sr) if (timeout >= SR_DISABLE_TIMEOUT) dev_warn(&sr->pdev->dev, "%s: Smartreflex disable timedout\n", - __func__); + __func__); /* Disable MCUDisableAcknowledge interrupt & clear pending interrupt */ sr_modify_reg(sr, ERRCONFIG_V1, ERRCONFIG_MCUDISACKINTEN, @@ -308,7 +307,7 @@ static void sr_v2_disable(struct omap_sr *sr) if (timeout >= SR_DISABLE_TIMEOUT) dev_warn(&sr->pdev->dev, "%s: Smartreflex disable timedout\n", - __func__); + __func__); /* Disable MCUDisableAcknowledge interrupt & clear pending interrupt */ sr_write_reg(sr, IRQENABLE_CLR, IRQENABLE_MCUDISABLEACKINT); @@ -322,7 +321,7 @@ static struct omap_sr_nvalue_table *sr_retrieve_nvalue_row( if (!sr->nvalue_table) { dev_warn(&sr->pdev->dev, "%s: Missing ntarget value table\n", - __func__); + __func__); return NULL; } @@ -356,8 +355,8 @@ int sr_configure_errgen(struct omap_sr *sr) u8 senp_shift, senn_shift; if (!sr) { - pr_warn("%s: NULL omap_sr from %pF\n", __func__, - (void *)_RET_IP_); + pr_warn("%s: NULL omap_sr from %pF\n", + __func__, (void *)_RET_IP_); return -EINVAL; } @@ -387,8 +386,8 @@ int sr_configure_errgen(struct omap_sr *sr) vpboundint_st = ERRCONFIG_VPBOUNDINTST_V2; break; default: - dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex" - "module without specifying the ip\n", __func__); + dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n", + __func__); return -EINVAL; } @@ -423,8 +422,8 @@ int sr_disable_errgen(struct omap_sr *sr) u32 vpboundint_en, vpboundint_st; if (!sr) { - pr_warn("%s: NULL omap_sr from %pF\n", __func__, - (void *)_RET_IP_); + pr_warn("%s: NULL omap_sr from %pF\n", + __func__, (void *)_RET_IP_); return -EINVAL; } @@ -440,8 +439,8 @@ int sr_disable_errgen(struct omap_sr *sr) vpboundint_st = ERRCONFIG_VPBOUNDINTST_V2; break; default: - dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex" - "module without specifying the ip\n", __func__); + dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n", + __func__); return -EINVAL; } @@ -478,8 +477,8 @@ int sr_configure_minmax(struct omap_sr *sr) u8 senp_shift, senn_shift; if (!sr) { - pr_warn("%s: NULL omap_sr from %pF\n", __func__, - (void *)_RET_IP_); + pr_warn("%s: NULL omap_sr from %pF\n", + __func__, (void *)_RET_IP_); return -EINVAL; } @@ -504,8 +503,8 @@ int sr_configure_minmax(struct omap_sr *sr) senp_shift = SRCONFIG_SENPENABLE_V2_SHIFT; break; default: - dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex" - "module without specifying the ip\n", __func__); + dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n", + __func__); return -EINVAL; } @@ -537,8 +536,8 @@ int sr_configure_minmax(struct omap_sr *sr) IRQENABLE_MCUBOUNDSINT | IRQENABLE_MCUDISABLEACKINT); break; default: - dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex" - "module without specifying the ip\n", __func__); + dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n", + __func__); return -EINVAL; } @@ -563,16 +562,16 @@ int sr_enable(struct omap_sr *sr, unsigned long volt) int ret; if (!sr) { - pr_warn("%s: NULL omap_sr from %pF\n", __func__, - (void *)_RET_IP_); + pr_warn("%s: NULL omap_sr from %pF\n", + __func__, (void *)_RET_IP_); return -EINVAL; } volt_data = omap_voltage_get_voltdata(sr->voltdm, volt); if (IS_ERR(volt_data)) { - dev_warn(&sr->pdev->dev, "%s: Unable to get voltage table" - "for nominal voltage %ld\n", __func__, volt); + dev_warn(&sr->pdev->dev, "%s: Unable to get voltage table for nominal voltage %ld\n", + __func__, volt); return PTR_ERR(volt_data); } @@ -615,8 +614,8 @@ int sr_enable(struct omap_sr *sr, unsigned long volt) void sr_disable(struct omap_sr *sr) { if (!sr) { - pr_warn("%s: NULL omap_sr from %pF\n", __func__, - (void *)_RET_IP_); + pr_warn("%s: NULL omap_sr from %pF\n", + __func__, (void *)_RET_IP_); return; } @@ -658,13 +657,13 @@ int sr_register_class(struct omap_sr_class_data *class_data) struct omap_sr *sr_info; if (!class_data) { - pr_warning("%s:, Smartreflex class data passed is NULL\n", + pr_warn("%s:, Smartreflex class data passed is NULL\n", __func__); return -EINVAL; } if (sr_class) { - pr_warning("%s: Smartreflex class driver already registered\n", + pr_warn("%s: Smartreflex class driver already registered\n", __func__); return -EBUSY; } @@ -696,7 +695,7 @@ void omap_sr_enable(struct voltagedomain *voltdm) struct omap_sr *sr = _sr_lookup(voltdm); if (IS_ERR(sr)) { - pr_warning("%s: omap_sr struct for voltdm not found\n", __func__); + pr_warn("%s: omap_sr struct for voltdm not found\n", __func__); return; } @@ -704,8 +703,8 @@ void omap_sr_enable(struct voltagedomain *voltdm) return; if (!sr_class || !(sr_class->enable) || !(sr_class->configure)) { - dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not" - "registered\n", __func__); + dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n", + __func__); return; } @@ -728,7 +727,7 @@ void omap_sr_disable(struct voltagedomain *voltdm) struct omap_sr *sr = _sr_lookup(voltdm); if (IS_ERR(sr)) { - pr_warning("%s: omap_sr struct for voltdm not found\n", __func__); + pr_warn("%s: omap_sr struct for voltdm not found\n", __func__); return; } @@ -736,8 +735,8 @@ void omap_sr_disable(struct voltagedomain *voltdm) return; if (!sr_class || !(sr_class->disable)) { - dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not" - "registered\n", __func__); + dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n", + __func__); return; } @@ -760,7 +759,7 @@ void omap_sr_disable_reset_volt(struct voltagedomain *voltdm) struct omap_sr *sr = _sr_lookup(voltdm); if (IS_ERR(sr)) { - pr_warning("%s: omap_sr struct for voltdm not found\n", __func__); + pr_warn("%s: omap_sr struct for voltdm not found\n", __func__); return; } @@ -768,8 +767,8 @@ void omap_sr_disable_reset_volt(struct voltagedomain *voltdm) return; if (!sr_class || !(sr_class->disable)) { - dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not" - "registered\n", __func__); + dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n", + __func__); return; } @@ -787,8 +786,8 @@ void omap_sr_disable_reset_volt(struct voltagedomain *voltdm) void omap_sr_register_pmic(struct omap_sr_pmic_data *pmic_data) { if (!pmic_data) { - pr_warning("%s: Trying to register NULL PMIC data structure" - "with smartreflex\n", __func__); + pr_warn("%s: Trying to register NULL PMIC data structure with smartreflex\n", + __func__); return; } @@ -801,7 +800,7 @@ static int omap_sr_autocomp_show(void *data, u64 *val) struct omap_sr *sr_info = data; if (!sr_info) { - pr_warning("%s: omap_sr struct not found\n", __func__); + pr_warn("%s: omap_sr struct not found\n", __func__); return -EINVAL; } @@ -815,13 +814,13 @@ static int omap_sr_autocomp_store(void *data, u64 val) struct omap_sr *sr_info = data; if (!sr_info) { - pr_warning("%s: omap_sr struct not found\n", __func__); + pr_warn("%s: omap_sr struct not found\n", __func__); return -EINVAL; } /* Sanity check */ if (val > 1) { - pr_warning("%s: Invalid argument %lld\n", __func__, val); + pr_warn("%s: Invalid argument %lld\n", __func__, val); return -EINVAL; } @@ -848,19 +847,13 @@ static int __init omap_sr_probe(struct platform_device *pdev) int i, ret = 0; sr_info = devm_kzalloc(&pdev->dev, sizeof(struct omap_sr), GFP_KERNEL); - if (!sr_info) { - dev_err(&pdev->dev, "%s: unable to allocate sr_info\n", - __func__); + if (!sr_info) return -ENOMEM; - } sr_info->name = devm_kzalloc(&pdev->dev, SMARTREFLEX_NAME_LEN, GFP_KERNEL); - if (!sr_info->name) { - dev_err(&pdev->dev, "%s: unable to allocate SR instance name\n", - __func__); + if (!sr_info->name) return -ENOMEM; - } platform_set_drvdata(pdev, sr_info); @@ -912,7 +905,7 @@ static int __init omap_sr_probe(struct platform_device *pdev) if (sr_class) { ret = sr_late_init(sr_info); if (ret) { - pr_warning("%s: Error in SR late init\n", __func__); + pr_warn("%s: Error in SR late init\n", __func__); goto err_list_del; } } @@ -923,7 +916,7 @@ static int __init omap_sr_probe(struct platform_device *pdev) if (IS_ERR_OR_NULL(sr_dbg_dir)) { ret = PTR_ERR(sr_dbg_dir); pr_err("%s:sr debugfs dir creation failed(%d)\n", - __func__, ret); + __func__, ret); goto err_list_del; } } @@ -945,8 +938,8 @@ static int __init omap_sr_probe(struct platform_device *pdev) nvalue_dir = debugfs_create_dir("nvalue", sr_info->dbg_dir); if (IS_ERR_OR_NULL(nvalue_dir)) { - dev_err(&pdev->dev, "%s: Unable to create debugfs directory" - "for n-values\n", __func__); + dev_err(&pdev->dev, "%s: Unable to create debugfs directory for n-values\n", + __func__); ret = PTR_ERR(nvalue_dir); goto err_debugfs; } @@ -1053,12 +1046,12 @@ static int __init sr_init(void) if (sr_pmic_data && sr_pmic_data->sr_pmic_init) sr_pmic_data->sr_pmic_init(); else - pr_warning("%s: No PMIC hook to init smartreflex\n", __func__); + pr_warn("%s: No PMIC hook to init smartreflex\n", __func__); ret = platform_driver_probe(&smartreflex_driver, omap_sr_probe); if (ret) { pr_err("%s: platform driver register failed for SR\n", - __func__); + __func__); return ret; } diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index ba9af4a2bd2a..ec6381e57eb7 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -486,6 +486,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->dma_boundary = 0xffffffff; + shost->use_blk_mq = scsi_use_blk_mq; + device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 1f36aca44394..1deb6adc411f 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1160,7 +1160,6 @@ bool scsi_use_blk_mq = true; bool scsi_use_blk_mq = false; #endif module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO); -EXPORT_SYMBOL_GPL(scsi_use_blk_mq); static int __init init_scsi(void) { diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 57a4b9973320..85c8a51bc563 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -29,6 +29,7 @@ extern int scsi_init_hosts(void); extern void scsi_exit_hosts(void); /* scsi.c */ +extern bool scsi_use_blk_mq; extern int scsi_setup_command_freelist(struct Scsi_Host *shost); extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); #ifdef CONFIG_SCSI_LOGGING diff --git a/drivers/soc/samsung/pm_domains.c b/drivers/soc/samsung/pm_domains.c index 4822346aadc6..7112004b8032 100644 --- a/drivers/soc/samsung/pm_domains.c +++ b/drivers/soc/samsung/pm_domains.c @@ -215,29 +215,22 @@ no_clk: /* Assign the child power domains to their parents */ for_each_matching_node(np, exynos_pm_domain_of_match) { - struct generic_pm_domain *child_domain, *parent_domain; - struct of_phandle_args args; + struct of_phandle_args child, parent; - args.np = np; - args.args_count = 0; - child_domain = of_genpd_get_from_provider(&args); - if (IS_ERR(child_domain)) - continue; + child.np = np; + child.args_count = 0; if (of_parse_phandle_with_args(np, "power-domains", - "#power-domain-cells", 0, &args) != 0) - continue; - - parent_domain = of_genpd_get_from_provider(&args); - if (IS_ERR(parent_domain)) + "#power-domain-cells", 0, + &parent) != 0) continue; - if (pm_genpd_add_subdomain(parent_domain, child_domain)) + if (of_genpd_add_subdomain(&parent, &child)) pr_warn("%s failed to add subdomain: %s\n", - parent_domain->name, child_domain->name); + parent.np->name, child.np->name); else pr_info("%s has as child subdomain: %s.\n", - parent_domain->name, child_domain->name); + parent.np->name, child.np->name); } return 0; diff --git a/drivers/staging/board/board.c b/drivers/staging/board/board.c index 45807d8287d1..86dc41101610 100644 --- a/drivers/staging/board/board.c +++ b/drivers/staging/board/board.c @@ -140,7 +140,6 @@ static int board_staging_add_dev_domain(struct platform_device *pdev, const char *domain) { struct of_phandle_args pd_args; - struct generic_pm_domain *pd; struct device_node *np; np = of_find_node_by_path(domain); @@ -151,14 +150,8 @@ static int board_staging_add_dev_domain(struct platform_device *pdev, pd_args.np = np; pd_args.args_count = 0; - pd = of_genpd_get_from_provider(&pd_args); - if (IS_ERR(pd)) { - pr_err("Cannot find genpd %s (%ld)\n", domain, PTR_ERR(pd)); - return PTR_ERR(pd); - } - pr_debug("Found genpd %s for device %s\n", pd->name, pdev->name); - return pm_genpd_add_device(pd, &pdev->dev); + return of_genpd_add_device(&pd_args, &pdev->dev); } #else static inline int board_staging_add_dev_domain(struct platform_device *pdev, diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 98d36548153d..bbb4b3e5b4ff 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1842,6 +1842,16 @@ out_commit: ocfs2_commit_trans(osb, handle); out: + /* + * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(), + * even in case of error here like ENOSPC and ENOMEM. So, we need + * to unlock the target page manually to prevent deadlocks when + * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED + * to VM code. + */ + if (wc->w_target_locked) + unlock_page(mmap_page); + ocfs2_free_write_ctxt(inode, wc); if (data_ac) { diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5261751f6bd4..5f5270941ba0 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -32,6 +32,7 @@ enum can_mode { * CAN common private data */ struct can_priv { + struct net_device *dev; struct can_device_stats can_stats; struct can_bittiming bittiming, data_bittiming; @@ -47,7 +48,7 @@ struct can_priv { u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; - struct timer_list restart_timer; + struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); int (*do_set_data_bittiming)(struct net_device *dev); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 797d9c8e9a1b..ad4f1f33a74e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -228,7 +228,11 @@ static inline void cpu_hotplug_done(void) {} #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP -extern int disable_nonboot_cpus(void); +extern int freeze_secondary_cpus(int primary); +static inline int disable_nonboot_cpus(void) +{ + return freeze_secondary_cpus(0); +} extern void enable_nonboot_cpus(void); #else /* !CONFIG_PM_SLEEP_SMP */ static inline int disable_nonboot_cpus(void) { return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 34bd80512a0c..eb445a4e2a83 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -47,6 +47,8 @@ enum cpuhp_state { CPUHP_AP_PERF_METAG_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_VFP_STARTING, + CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, + CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h index 0a83a1e648b0..4db00b02ca3f 100644 --- a/include/linux/devfreq-event.h +++ b/include/linux/devfreq-event.h @@ -148,11 +148,6 @@ static inline int devfreq_event_reset_event(struct devfreq_event_dev *edev) return -EINVAL; } -static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev) -{ - return ERR_PTR(-EINVAL); -} - static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( struct device *dev, int index) { diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 66533e18276c..dc69df04abc1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -718,7 +718,7 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_mmap_writecombine dma_mmap_wc #endif -#ifdef CONFIG_NEED_DMA_MAP_STATE +#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG) #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME #define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 661af564fae8..a534c7f15a61 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,6 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); + * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); + * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * @@ -270,6 +272,16 @@ struct static_key_false { #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ + struct static_key_true name[count] = { \ + [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ + } + +#define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ + struct static_key_false name[count] = { \ + [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ + } + extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d351fd3e1049..e5fb81376e92 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -120,5 +120,5 @@ struct mfc_cache { struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 3987b64040c5..19a1c0c2993b 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -116,7 +116,7 @@ struct mfc6_cache { struct rtmsg; extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #ifdef CONFIG_IPV6_MROUTE extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index e18843809eec..9ff07d3fc8de 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -14,7 +14,7 @@ #include <linux/interrupt.h> #include <linux/perf_event.h> - +#include <linux/sysfs.h> #include <asm/cputype.h> /* @@ -77,6 +77,13 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; }; +enum armpmu_attr_groups { + ARMPMU_ATTR_GROUP_COMMON, + ARMPMU_ATTR_GROUP_EVENTS, + ARMPMU_ATTR_GROUP_FORMATS, + ARMPMU_NR_ATTR_GROUPS +}; + struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; @@ -111,6 +118,8 @@ struct arm_pmu { struct pmu_hw_events __percpu *hw_events; struct list_head entry; struct notifier_block cpu_pm_nb; + /* the attr_groups array must be NULL-terminated */ + const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) @@ -151,6 +160,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table); +#define ARMV8_PMU_PDEV_NAME "armv8-pmu" + #endif /* CONFIG_ARM_PMU */ #endif /* __ARM_PMU_H__ */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 31fec858088c..a09fe5c009c8 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -51,6 +51,8 @@ struct generic_pm_domain { struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; + struct fwnode_handle *provider; /* Identity of the domain provider */ + bool has_provider; const char *name; atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ @@ -116,7 +118,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev); extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td); @@ -129,6 +130,7 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); extern int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); +extern int pm_genpd_remove(struct generic_pm_domain *genpd); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -138,10 +140,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) { return ERR_PTR(-ENOSYS); } -static inline struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev) -{ - return NULL; -} static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td) @@ -168,6 +166,10 @@ static inline int pm_genpd_init(struct generic_pm_domain *genpd, { return -ENOSYS; } +static inline int pm_genpd_remove(struct generic_pm_domain *genpd) +{ + return -ENOTSUPP; +} #endif static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, @@ -192,57 +194,57 @@ struct genpd_onecell_data { unsigned int num_domains; }; -typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, - void *data); - #ifdef CONFIG_PM_GENERIC_DOMAINS_OF -int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, - void *data); +int of_genpd_add_provider_simple(struct device_node *np, + struct generic_pm_domain *genpd); +int of_genpd_add_provider_onecell(struct device_node *np, + struct genpd_onecell_data *data); void of_genpd_del_provider(struct device_node *np); -struct generic_pm_domain *of_genpd_get_from_provider( - struct of_phandle_args *genpdspec); - -struct generic_pm_domain *__of_genpd_xlate_simple( - struct of_phandle_args *genpdspec, - void *data); -struct generic_pm_domain *__of_genpd_xlate_onecell( - struct of_phandle_args *genpdspec, - void *data); +extern int of_genpd_add_device(struct of_phandle_args *args, + struct device *dev); +extern int of_genpd_add_subdomain(struct of_phandle_args *parent, + struct of_phandle_args *new_subdomain); +extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int genpd_dev_pm_attach(struct device *dev); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ -static inline int __of_genpd_add_provider(struct device_node *np, - genpd_xlate_t xlate, void *data) +static inline int of_genpd_add_provider_simple(struct device_node *np, + struct generic_pm_domain *genpd) { - return 0; + return -ENOTSUPP; } -static inline void of_genpd_del_provider(struct device_node *np) {} -static inline struct generic_pm_domain *of_genpd_get_from_provider( - struct of_phandle_args *genpdspec) +static inline int of_genpd_add_provider_onecell(struct device_node *np, + struct genpd_onecell_data *data) { - return NULL; + return -ENOTSUPP; } -#define __of_genpd_xlate_simple NULL -#define __of_genpd_xlate_onecell NULL +static inline void of_genpd_del_provider(struct device_node *np) {} -static inline int genpd_dev_pm_attach(struct device *dev) +static inline int of_genpd_add_device(struct of_phandle_args *args, + struct device *dev) { return -ENODEV; } -#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ -static inline int of_genpd_add_provider_simple(struct device_node *np, - struct generic_pm_domain *genpd) +static inline int of_genpd_add_subdomain(struct of_phandle_args *parent, + struct of_phandle_args *new_subdomain) { - return __of_genpd_add_provider(np, __of_genpd_xlate_simple, genpd); + return -ENODEV; } -static inline int of_genpd_add_provider_onecell(struct device_node *np, - struct genpd_onecell_data *data) + +static inline int genpd_dev_pm_attach(struct device *dev) +{ + return -ENODEV; +} + +static inline +struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { - return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data); + return ERR_PTR(-ENOTSUPP); } +#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ #ifdef CONFIG_PM extern int dev_pm_domain_attach(struct device *dev, bool power_on); diff --git a/include/linux/property.h b/include/linux/property.h index 3a2f9ae25c86..856e50b2140c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -190,7 +190,7 @@ struct property_entry { .length = ARRAY_SIZE(_val_) * sizeof(_type_), \ .is_array = true, \ .is_string = false, \ - { .pointer = { _type_##_data = _val_ } }, \ + { .pointer = { ._type_##_data = _val_ } }, \ } #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 62c68e513e39..98fe95fea30c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3469,15 +3469,20 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#define SCHED_CPUFREQ_RT (1U << 0) +#define SCHED_CPUFREQ_DL (1U << 1) +#define SCHED_CPUFREQ_IOWAIT (1U << 2) + +#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) + #ifdef CONFIG_CPU_FREQ struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); + void (*func)(struct update_util_data *data, u64 time, unsigned int flags); }; void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)); + void (*func)(struct update_util_data *data, u64 time, + unsigned int flags)); void cpufreq_remove_update_util_hook(int cpu); #endif /* CONFIG_CPU_FREQ */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 7693e39b14fe..d9718378a8be 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -245,6 +245,7 @@ static inline bool idle_should_freeze(void) return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER); } +extern void __init pm_states_init(void); extern void freeze_set_ops(const struct platform_freeze_ops *ops); extern void freeze_wake(void); @@ -279,6 +280,7 @@ static inline bool pm_resume_via_firmware(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } static inline bool idle_should_freeze(void) { return false; } +static inline void __init pm_states_init(void) {} static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {} static inline void freeze_wake(void) {} #endif /* !CONFIG_SUSPEND */ diff --git a/include/linux/swap.h b/include/linux/swap.h index b17cc4830fa6..4a529c984a3f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -257,6 +257,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node) static inline void workingset_node_pages_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_pages(node)); node->count--; } @@ -272,6 +273,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node) static inline void workingset_node_shadows_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_shadows(node)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ce93c4b10d26..ced0df374e60 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -554,6 +554,9 @@ struct sctp_chunk { atomic_t refcnt; + /* How many times this chunk have been sent, for prsctp RTX policy */ + int sent_count; + /* This is our link to the per-transport transmitted list. */ struct list_head transmitted_list; @@ -603,16 +606,6 @@ struct sctp_chunk { /* This needs to be recoverable for SCTP_SEND_FAILED events. */ struct sctp_sndrcvinfo sinfo; - /* We use this field to record param for prsctp policies, - * for TTL policy, it is the time_to_drop of this chunk, - * for RTX policy, it is the max_sent_count of this chunk, - * for PRIO policy, it is the priority of this chunk. - */ - unsigned long prsctp_param; - - /* How many times this chunk have been sent, for prsctp RTX policy */ - int sent_count; - /* Which association does this belong to? */ struct sctp_association *asoc; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 0dee7afa93d6..7e4cd53139ed 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -771,12 +771,9 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost) shost->tmf_in_progress; } -extern bool scsi_use_blk_mq; - static inline bool shost_use_blk_mq(struct Scsi_Host *shost) { - return scsi_use_blk_mq; - + return shost->use_blk_mq; } extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 19e50300ce7d..54e3aad32806 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -69,7 +69,8 @@ TRACE_EVENT(pstate_sample, u64 mperf, u64 aperf, u64 tsc, - u32 freq + u32 freq, + u32 io_boost ), TP_ARGS(core_busy, @@ -79,7 +80,8 @@ TRACE_EVENT(pstate_sample, mperf, aperf, tsc, - freq + freq, + io_boost ), TP_STRUCT__entry( @@ -91,6 +93,7 @@ TRACE_EVENT(pstate_sample, __field(u64, aperf) __field(u64, tsc) __field(u32, freq) + __field(u32, io_boost) ), TP_fast_assign( @@ -102,9 +105,10 @@ TRACE_EVENT(pstate_sample, __entry->aperf = aperf; __entry->tsc = tsc; __entry->freq = freq; + __entry->io_boost = io_boost; ), - TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ", + TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu", (unsigned long)__entry->core_busy, (unsigned long)__entry->scaled_busy, (unsigned long)__entry->from, @@ -112,7 +116,8 @@ TRACE_EVENT(pstate_sample, (unsigned long long)__entry->mperf, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, - (unsigned long)__entry->freq + (unsigned long)__entry->freq, + (unsigned long)__entry->io_boost ) ); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5e8dab5bf9ad..d6b729beba49 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3446,9 +3446,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * Except for the root, subtree_control must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ - if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { - ret = -EBUSY; - goto out_unlock; + if (enable && cgroup_parent(cgrp)) { + struct cgrp_cset_link *link; + + /* + * Because namespaces pin csets too, @cgrp->cset_links + * might not be empty even when @cgrp is empty. Walk and + * verify each cset. + */ + spin_lock_irq(&css_set_lock); + + ret = 0; + list_for_each_entry(link, &cgrp->cset_links, cset_link) { + if (css_set_populated(link->cset)) { + ret = -EBUSY; + break; + } + } + + spin_unlock_irq(&css_set_lock); + + if (ret) + goto out_unlock; } /* save and update control masks and prepare csses */ @@ -3899,7 +3918,9 @@ void cgroup_file_notify(struct cgroup_file *cfile) * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question * - * Return the number of tasks in the cgroup. + * Return the number of tasks in the cgroup. The returned number can be + * higher than the actual number of tasks due to css_set references from + * namespace roots and temporary usages. */ static int cgroup_task_count(const struct cgroup *cgrp) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 341bf80f80bd..ebbf027dd4a1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1024,12 +1024,13 @@ EXPORT_SYMBOL_GPL(cpu_up); #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; -int disable_nonboot_cpus(void) +int freeze_secondary_cpus(int primary) { - int cpu, first_cpu, error = 0; + int cpu, error = 0; cpu_maps_update_begin(); - first_cpu = cpumask_first(cpu_online_mask); + if (!cpu_online(primary)) + primary = cpumask_first(cpu_online_mask); /* * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time @@ -1038,7 +1039,7 @@ int disable_nonboot_cpus(void) pr_info("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { - if (cpu == first_cpu) + if (cpu == primary) continue; trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1, CPUHP_OFFLINE); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c27e53326bef..2b4c20ab5bbe 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -325,8 +325,7 @@ static struct file_system_type cpuset_fs_type = { /* * Return in pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy - * until we find one that does have some online cpus. The top - * cpuset always has some cpus online. + * until we find one that does have some online cpus. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. @@ -335,8 +334,20 @@ static struct file_system_type cpuset_fs_type = { */ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { - while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) + while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { cs = parent_cs(cs); + if (unlikely(!cs)) { + /* + * The top cpuset doesn't have any online cpu as a + * consequence of a race between cpuset_hotplug_work + * and cpu hotplug notifier. But we know the top + * cpuset's effective_cpus is on its way to to be + * identical to cpu_online_mask. + */ + cpumask_copy(pmask, cpu_online_mask); + return; + } + } cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); } @@ -2074,7 +2085,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) * which could have been changed by cpuset just after it inherits the * state from the parent and before it sits on the cgroup's task list. */ -void cpuset_fork(struct task_struct *task) +static void cpuset_fork(struct task_struct *task) { if (task_css_is_root(task, cpuset_cgrp_id)) return; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 68d3ebc12601..e8517b63eb37 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -186,7 +186,7 @@ config PM_SLEEP_DEBUG config DPM_WATCHDOG bool "Device suspend/resume watchdog" - depends on PM_DEBUG && PSTORE + depends on PM_DEBUG && PSTORE && EXPERT ---help--- Sets up a watchdog timer to capture drivers that are locked up attempting to suspend/resume a device. @@ -197,7 +197,7 @@ config DPM_WATCHDOG config DPM_WATCHDOG_TIMEOUT int "Watchdog timeout in seconds" range 1 120 - default 60 + default 120 depends on DPM_WATCHDOG config PM_TRACE diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 33c79b6105c5..b26dbc48c75b 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -306,8 +306,10 @@ static int create_image(int platform_mode) if (error) printk(KERN_ERR "PM: Error %d creating hibernation image\n", error); - if (!in_suspend) + if (!in_suspend) { events_check_enabled = false; + clear_free_pages(); + } platform_leave(platform_mode); @@ -1189,22 +1191,6 @@ static int __init nohibernate_setup(char *str) return 1; } -static int __init page_poison_nohibernate_setup(char *str) -{ -#ifdef CONFIG_PAGE_POISONING_ZERO - /* - * The zeroing option for page poison skips the checks on alloc. - * since hibernation doesn't save free pages there's no way to - * guarantee the pages will still be zeroed. - */ - if (!strcmp(str, "on")) { - pr_info("Disabling hibernation due to page poisoning\n"); - return nohibernate_setup(str); - } -#endif - return 1; -} - __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); @@ -1212,4 +1198,3 @@ __setup("hibernate=", hibernate_setup); __setup("resumewait", resumewait_setup); __setup("resumedelay=", resumedelay_setup); __setup("nohibernate", nohibernate_setup); -__setup("page_poison=", page_poison_nohibernate_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c index 5ea50b1b7595..281a697fd458 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -644,6 +644,7 @@ static int __init pm_init(void) return error; hibernate_image_size_init(); hibernate_reserved_size_init(); + pm_states_init(); power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; diff --git a/kernel/power/power.h b/kernel/power/power.h index 242d8b827dd5..56d1d0dedf76 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -110,6 +110,8 @@ extern int create_basic_memory_bitmaps(void); extern void free_basic_memory_bitmaps(void); extern int hibernate_preallocate_memory(void); +extern void clear_free_pages(void); + /** * Auxiliary structure used for reading the snapshot image data and * metadata from and writing them to the list of page backup entries diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index b02228411d57..4f0f0604f1c4 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1132,6 +1132,28 @@ void free_basic_memory_bitmaps(void) pr_debug("PM: Basic memory bitmaps freed\n"); } +void clear_free_pages(void) +{ +#ifdef CONFIG_PAGE_POISONING_ZERO + struct memory_bitmap *bm = free_pages_map; + unsigned long pfn; + + if (WARN_ON(!(free_pages_map))) + return; + + memory_bm_position_reset(bm); + pfn = memory_bm_next_pfn(bm); + while (pfn != BM_END_OF_MAP) { + if (pfn_valid(pfn)) + clear_highpage(pfn_to_page(pfn)); + + pfn = memory_bm_next_pfn(bm); + } + memory_bm_position_reset(bm); + pr_info("PM: free pages cleared after restore\n"); +#endif /* PAGE_POISONING_ZERO */ +} + /** * snapshot_additional_pages - Estimate the number of extra pages needed. * @zone: Memory zone to carry out the computation for. diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0acab9d7f96f..1e7f5da648d9 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -118,10 +118,18 @@ static bool valid_state(suspend_state_t state) */ static bool relative_states; +void __init pm_states_init(void) +{ + /* + * freeze state should be supported even without any suspend_ops, + * initialize pm_states accordingly here + */ + pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2]; +} + static int __init sleep_states_setup(char *str) { relative_states = !strncmp(str, "1", 1); - pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2]; return 1; } @@ -211,7 +219,7 @@ static int platform_suspend_begin(suspend_state_t state) { if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) return freeze_ops->begin(); - else if (suspend_ops->begin) + else if (suspend_ops && suspend_ops->begin) return suspend_ops->begin(state); else return 0; @@ -221,7 +229,7 @@ static void platform_resume_end(suspend_state_t state) { if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end) freeze_ops->end(); - else if (suspend_ops->end) + else if (suspend_ops && suspend_ops->end) suspend_ops->end(); } diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index 1141954e73b4..dbc51442ecbc 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); */ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)) + unsigned int flags)) { if (WARN_ON(!data || !func)) return; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a84641b222c1..69e06898997d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -12,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cpufreq.h> -#include <linux/module.h> #include <linux/slab.h> #include <trace/events/power.h> @@ -48,11 +47,14 @@ struct sugov_cpu { struct sugov_policy *sg_policy; unsigned int cached_raw_freq; + unsigned long iowait_boost; + unsigned long iowait_boost_max; + u64 last_update; /* The fields below are only needed when sharing a policy. */ unsigned long util; unsigned long max; - u64 last_update; + unsigned int flags; }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -144,24 +146,75 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, return cpufreq_driver_resolve_freq(policy, freq); } +static void sugov_get_util(unsigned long *util, unsigned long *max) +{ + struct rq *rq = this_rq(); + unsigned long cfs_max; + + cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); + + *util = min(rq->cfs.avg.util_avg, cfs_max); + *max = cfs_max; +} + +static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, + unsigned int flags) +{ + if (flags & SCHED_CPUFREQ_IOWAIT) { + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else if (sg_cpu->iowait_boost) { + s64 delta_ns = time - sg_cpu->last_update; + + /* Clear iowait_boost if the CPU apprears to have been idle. */ + if (delta_ns > TICK_NSEC) + sg_cpu->iowait_boost = 0; + } +} + +static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, + unsigned long *max) +{ + unsigned long boost_util = sg_cpu->iowait_boost; + unsigned long boost_max = sg_cpu->iowait_boost_max; + + if (!boost_util) + return; + + if (*util * boost_max < *max * boost_util) { + *util = boost_util; + *max = boost_max; + } + sg_cpu->iowait_boost >>= 1; +} + static void sugov_update_single(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; + unsigned long util, max; unsigned int next_f; + sugov_set_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + if (!sugov_should_update_freq(sg_policy, time)) return; - next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : - get_next_freq(sg_cpu, util, max); + if (flags & SCHED_CPUFREQ_RT_DL) { + next_f = policy->cpuinfo.max_freq; + } else { + sugov_get_util(&util, &max); + sugov_iowait_boost(sg_cpu, &util, &max); + next_f = get_next_freq(sg_cpu, util, max); + } sugov_update_commit(sg_policy, time, next_f); } static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max) + unsigned long util, unsigned long max, + unsigned int flags) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; @@ -169,9 +222,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 last_freq_update_time = sg_policy->last_freq_update_time; unsigned int j; - if (util == ULONG_MAX) + if (flags & SCHED_CPUFREQ_RT_DL) return max_f; + sugov_iowait_boost(sg_cpu, &util, &max); + for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu; unsigned long j_util, j_max; @@ -186,41 +241,50 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, * frequency update and the time elapsed between the last update * of the CPU utilization and the last frequency update is long * enough, don't take the CPU into account as it probably is - * idle now. + * idle now (and clear iowait_boost for it). */ delta_ns = last_freq_update_time - j_sg_cpu->last_update; - if (delta_ns > TICK_NSEC) + if (delta_ns > TICK_NSEC) { + j_sg_cpu->iowait_boost = 0; continue; - - j_util = j_sg_cpu->util; - if (j_util == ULONG_MAX) + } + if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) return max_f; + j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; if (j_util * max > j_max * util) { util = j_util; max = j_max; } + + sugov_iowait_boost(j_sg_cpu, &util, &max); } return get_next_freq(sg_cpu, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned long util, max; unsigned int next_f; + sugov_get_util(&util, &max); + raw_spin_lock(&sg_policy->update_lock); sg_cpu->util = util; sg_cpu->max = max; + sg_cpu->flags = flags; + + sugov_set_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max); + next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); sugov_update_commit(sg_policy, time, next_f); } @@ -444,10 +508,13 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->sg_policy = sg_policy; if (policy_is_shared(policy)) { - sg_cpu->util = ULONG_MAX; + sg_cpu->util = 0; sg_cpu->max = 0; + sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; sg_cpu->cached_raw_freq = 0; + sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, sugov_update_shared); } else { @@ -495,28 +562,15 @@ static struct cpufreq_governor schedutil_gov = { .limits = sugov_limits, }; -static int __init sugov_module_init(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} - -static void __exit sugov_module_exit(void) -{ - cpufreq_unregister_governor(&schedutil_gov); -} - -MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); -MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); -MODULE_LICENSE("GPL"); - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL struct cpufreq_governor *cpufreq_default_governor(void) { return &schedutil_gov; } - -fs_initcall(sugov_module_init); -#else -module_init(sugov_module_init); #endif -module_exit(sugov_module_exit); + +static int __init sugov_register(void) +{ + return cpufreq_register_governor(&schedutil_gov); +} +fs_initcall(sugov_register); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1ce8867283dc..974779656999 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -735,9 +735,8 @@ static void update_curr_dl(struct rq *rq) return; } - /* kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + /* kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 039de34f1521..a5cd07b25aa1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2875,12 +2875,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) { - struct rq *rq = rq_of(cfs_rq); - int cpu = cpu_of(rq); - - if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { - unsigned long max = rq->cpu_capacity_orig; - + if (&this_rq()->cfs == cfs_rq) { /* * There are a few boundary cases this might miss but it should * get called often enough that that should (hopefully) not be @@ -2897,8 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_clock(rq), - min(cfs_rq->avg.util_avg, max), max); + cpufreq_update_util(rq_of(cfs_rq), 0); } } @@ -3159,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) static inline void update_load_avg(struct sched_entity *se, int not_used) { - struct cfs_rq *cfs_rq = cfs_rq_of(se); - struct rq *rq = rq_of(cfs_rq); - - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_of(cfs_rq_of(se)), 0); } static inline void @@ -4509,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + /* + * If in_iowait is set, the code below may not trigger any cpufreq + * utilization updates, so do it here explicitly with the IOWAIT flag + * passed. + */ + if (p->in_iowait) + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); + for_each_sched_entity(se) { if (se->on_rq) break; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d5690b722691..2516b8df6dbb 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -957,9 +957,8 @@ static void update_curr_rt(struct rq *rq) if (unlikely((s64)delta_exec <= 0)) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c64fc5114004..b7fc1ced4380 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1763,27 +1763,13 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); /** * cpufreq_update_util - Take a note about CPU utilization changes. - * @time: Current time. - * @util: Current utilization. - * @max: Utilization ceiling. + * @rq: Runqueue to carry out the update for. + * @flags: Update reason flags. * - * This function is called by the scheduler on every invocation of - * update_load_avg() on the CPU whose utilization is being updated. + * This function is called by the scheduler on the CPU whose utilization is + * being updated. * * It can only be called from RCU-sched read-side critical sections. - */ -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) -{ - struct update_util_data *data; - - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); - if (data) - data->func(data, time, util, max); -} - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. * * The way cpufreq is currently arranged requires it to evaluate the CPU * performance state (frequency/voltage) on a regular basis to prevent it from @@ -1797,13 +1783,23 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo * but that really is a band-aid. Going forward it should be replaced with * solutions targeted more specifically at RT and DL tasks. */ -static inline void cpufreq_trigger_update(u64 time) +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) +{ + struct update_util_data *data; + + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data) + data->func(data, rq_clock(rq), flags); +} + +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) { - cpufreq_update_util(time, ULONG_MAX, 0); + if (cpu_of(rq) == smp_processor_id()) + cpufreq_update_util(rq, flags); } #else -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 22f4cd96acb0..afcc550877ff 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -76,8 +76,6 @@ config PAGE_POISONING_ZERO no longer necessary to write zeros when GFP_ZERO is used on allocation. - Enabling page poisoning with this option will disable hibernation - If unsure, say N bool diff --git a/mm/filemap.c b/mm/filemap.c index 8a287dfc5372..2d0986a64f1f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -110,6 +110,62 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ +static int page_cache_tree_insert(struct address_space *mapping, + struct page *page, void **shadowp) +{ + struct radix_tree_node *node; + void **slot; + int error; + + error = __radix_tree_create(&mapping->page_tree, page->index, 0, + &node, &slot); + if (error) + return error; + if (*slot) { + void *p; + + p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + if (!radix_tree_exceptional_entry(p)) + return -EEXIST; + + mapping->nrexceptional--; + if (!dax_mapping(mapping)) { + if (shadowp) + *shadowp = p; + if (node) + workingset_node_shadows_dec(node); + } else { + /* DAX can replace empty locked entry with a hole */ + WARN_ON_ONCE(p != + (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | + RADIX_DAX_ENTRY_LOCK)); + /* DAX accounts exceptional entries as normal pages */ + if (node) + workingset_node_pages_dec(node); + /* Wakeup waiters for exceptional entry lock */ + dax_wake_mapping_entry_waiter(mapping, page->index, + false); + } + } + radix_tree_replace_slot(slot, page); + mapping->nrpages++; + if (node) { + workingset_node_pages_inc(node); + /* + * Don't track node that contains actual pages. + * + * Avoid acquiring the list_lru lock if already + * untracked. The list_empty() test is safe as + * node->private_list is protected by + * mapping->tree_lock. + */ + if (!list_empty(&node->private_list)) + list_lru_del(&workingset_shadow_nodes, + &node->private_list); + } + return 0; +} + static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { @@ -561,7 +617,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) spin_lock_irqsave(&mapping->tree_lock, flags); __delete_from_page_cache(old, NULL); - error = radix_tree_insert(&mapping->page_tree, offset, new); + error = page_cache_tree_insert(mapping, new, NULL); BUG_ON(error); mapping->nrpages++; @@ -584,62 +640,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -static int page_cache_tree_insert(struct address_space *mapping, - struct page *page, void **shadowp) -{ - struct radix_tree_node *node; - void **slot; - int error; - - error = __radix_tree_create(&mapping->page_tree, page->index, 0, - &node, &slot); - if (error) - return error; - if (*slot) { - void *p; - - p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); - if (!radix_tree_exceptional_entry(p)) - return -EEXIST; - - mapping->nrexceptional--; - if (!dax_mapping(mapping)) { - if (shadowp) - *shadowp = p; - if (node) - workingset_node_shadows_dec(node); - } else { - /* DAX can replace empty locked entry with a hole */ - WARN_ON_ONCE(p != - (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | - RADIX_DAX_ENTRY_LOCK)); - /* DAX accounts exceptional entries as normal pages */ - if (node) - workingset_node_pages_dec(node); - /* Wakeup waiters for exceptional entry lock */ - dax_wake_mapping_entry_waiter(mapping, page->index, - false); - } - } - radix_tree_replace_slot(slot, page); - mapping->nrpages++; - if (node) { - workingset_node_pages_inc(node); - /* - * Don't track node that contains actual pages. - * - * Avoid acquiring the list_lru lock if already - * untracked. The list_empty() test is safe as - * node->private_list is protected by - * mapping->tree_lock. - */ - if (!list_empty(&node->private_list)) - list_lru_del(&workingset_shadow_nodes, - &node->private_list); - } - return 0; -} - static int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask, @@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rmap_item(void) { struct rmap_item *rmap_item; - rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL); + rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL | + __GFP_NORETRY | __GFP_NOWARN); if (rmap_item) ksm_rmap_items++; return rmap_item; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b58906b6215c..9d29ba0f7192 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1555,8 +1555,8 @@ static struct page *new_node_page(struct page *page, unsigned long private, { gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; int nid = page_to_nid(page); - nodemask_t nmask = node_online_map; - struct page *new_page; + nodemask_t nmask = node_states[N_MEMORY]; + struct page *new_page = NULL; /* * TODO: allocate a destination hugepage from a nearest neighbor node, @@ -1567,14 +1567,14 @@ static struct page *new_node_page(struct page *page, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(nid, nmask)); - if (nid != next_node_in(nid, nmask)) - node_clear(nid, nmask); + node_clear(nid, nmask); if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages_nodemask(gfp_mask, 0, + if (!nodes_empty(nmask)) + new_page = __alloc_pages_nodemask(gfp_mask, 0, node_zonelist(nid, gfp_mask), &nmask); if (!new_page) new_page = __alloc_pages(gfp_mask, 0, diff --git a/mm/mmap.c b/mm/mmap.c index ca9d91bca0d6..69cad562cd00 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -88,6 +88,11 @@ static void unmap_region(struct mm_struct *mm, * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * + * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and + * MAP_PRIVATE: + * r: (no) no + * w: (no) no + * x: (yes) yes */ pgprot_t protection_map[16] = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, diff --git a/mm/workingset.c b/mm/workingset.c index 69551cfae97b..617475f529f4 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -418,21 +418,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ - - BUG_ON(!node->count); - BUG_ON(node->count & RADIX_TREE_COUNT_MASK); + BUG_ON(!workingset_node_shadows(node)); + BUG_ON(workingset_node_pages(node)); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { if (node->slots[i]) { BUG_ON(!radix_tree_exceptional_entry(node->slots[i])); node->slots[i] = NULL; - BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); - node->count -= 1U << RADIX_TREE_COUNT_SHIFT; + workingset_node_shadows_dec(node); BUG_ON(!mapping->nrexceptional); mapping->nrexceptional--; } } - BUG_ON(node->count); + BUG_ON(workingset_node_shadows(node)); inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); if (!__radix_tree_delete_node(&mapping->page_tree, node)) BUG(); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a87bcd2d4a94..5f006e13de56 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2123,7 +2123,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait) + struct rtmsg *rtm, int nowait, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; @@ -2168,6 +2168,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_push(skb2, sizeof(struct iphdr)); skb_reset_network_header(skb2); iph = ip_hdr(skb2); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b5b47a26d4ec..62c3ed0b7556 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2503,7 +2503,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, - r, nowait); + r, nowait, portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 08323bd95f2a..a756b8749a26 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2329,10 +2329,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->inet_dport), + &sk->sk_v6_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5288cec4a2b2..d48d5571e62a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1966,12 +1966,14 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) + if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - else - nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, nskb->csum); + } else { + __wsum csum = skb_copy_and_csum_bits(skb, 0, + skb_put(nskb, copy), + copy, 0); + nskb->csum = csum_block_add(nskb->csum, csum, len); + } if (skb->len <= copy) { /* We've eaten all the data from this skb. diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 704274cbd495..edc3daab354e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -648,7 +648,6 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = skb->protocol; err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index fccb5dd91902..7f4265b1649b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2285,8 +2285,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, return 1; } -int ip6mr_get_route(struct net *net, - struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, + int nowait, u32 portid) { int err; struct mr6_table *mrt; @@ -2331,6 +2331,7 @@ int ip6mr_get_route(struct net *net, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_reset_transport_header(skb2); skb_put(skb2, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3a224b97905..269218aacbea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3202,7 +3202,9 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait, + portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81315e1..4a60cd5e1875 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) u32 *tlv = (u32 *)(skbdata); u16 totlen = nla_total_size(dlen); /*alignment + hdr */ char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | dlen; + u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN); *tlv = htonl(htlv); memset(dptr, 0, totlen - NLA_HDRLEN); @@ -627,7 +627,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data; - u16 ifehdrln = ifehdr->metalen; + int ifehdrln = (int)ifehdr->metalen; struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data); spin_lock(&ife->tcf_lock); @@ -740,8 +740,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - iethh = eth_hdr(skb); - err = skb_cow_head(skb, hdrm); if (unlikely(err)) { ife->tcf_qstats.drops++; @@ -752,6 +750,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) skb_push(skb, skb->dev->hard_header_len); + iethh = (struct ethhdr *)skb->data; __skb_push(skb, hdrm); memcpy(skb->data, iethh, skb->mac_len); skb_reset_mac_header(skb); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index f27ffee106f6..ca0516e6f743 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1153,6 +1153,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) if (!skb) return NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); @@ -1256,6 +1257,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } bstats_update(&cl->bstats, skb); + qdisc_qstats_backlog_inc(sch, skb); ++sch->q.qlen; agg = cl->agg; @@ -1476,6 +1478,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index add3cc7d37ec..20a350bd1b1d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -400,6 +400,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, enqueue: ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; increment_qlen(skb, q); } else if (net_xmit_drop_count(ret)) { @@ -428,6 +429,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; decrement_qlen(skb, q); } @@ -450,6 +452,7 @@ static void sfb_reset(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->slot = 0; q->double_buffering = false; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index a55e54738b81..0a3dbec0a8fb 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -179,6 +179,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg, msg->expires_at, jiffies); } + if (asoc->peer.prsctp_capable && + SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) + msg->expires_at = + jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); + /* This is the biggest possible DATA chunk that can fit into * the packet */ @@ -335,7 +340,7 @@ errout: /* Check whether this message has expired. */ int sctp_chunk_abandoned(struct sctp_chunk *chunk) { - if (!chunk->asoc->prsctp_enable || + if (!chunk->asoc->peer.prsctp_capable || !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) { struct sctp_datamsg *msg = chunk->msg; @@ -349,14 +354,14 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) } if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && - time_after(jiffies, chunk->prsctp_param)) { + time_after(jiffies, chunk->msg->expires_at)) { if (chunk->sent_count) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; else chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && - chunk->sent_count > chunk->prsctp_param) { + chunk->sent_count > chunk->sinfo.sinfo_timetolive) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 72e54a416af6..107233da5cc9 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -326,7 +326,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); - if (chunk->asoc->prsctp_enable && + if (chunk->asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) chunk->asoc->sent_cnt_removable++; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) @@ -383,7 +383,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, transmitted_list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->transmitted_list); @@ -418,7 +418,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); @@ -442,7 +442,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, { struct sctp_transport *transport; - if (!asoc->prsctp_enable || !asoc->sent_cnt_removable) + if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable) return; msg_len = sctp_prsctp_prune_sent(asoc, sinfo, @@ -1055,7 +1055,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(chunk); @@ -1347,7 +1347,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) tsn = ntohl(tchunk->subh.data_hdr->tsn); if (TSN_lte(tsn, ctsn)) { list_del_init(&tchunk->transmitted_list); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(tchunk); diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f3508aa75815..cef0cee182d4 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -272,28 +272,17 @@ out: return err; } -static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sock *sk, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_comm_param *commp = p; - struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; - struct sctp_association *assoc = - list_entry(ep->asocs.next, struct sctp_association, asocs); + struct sctp_association *assoc; int err = 0; - /* find the ep only once through the transports by this condition */ - if (tsp->asoc != assoc) - goto out; - - if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) - goto out; - lock_sock(sk); - if (sk != assoc->base.sk) - goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -312,7 +301,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh) < 0) { cb->args[3] = 1; - err = 2; + err = 1; goto release; } cb->args[3] = 1; @@ -321,7 +310,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { - err = 2; + err = 1; goto release; } next: @@ -333,10 +322,35 @@ next: cb->args[4] = 0; release: release_sock(sk); + sock_put(sk); return err; +} + +static int sctp_get_sock(struct sctp_transport *tsp, void *p) +{ + struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct sctp_association *assoc = + list_entry(ep->asocs.next, struct sctp_association, asocs); + + /* find the ep only once through the transports by this condition */ + if (tsp->asoc != assoc) + goto out; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) + goto out; + + sock_hold(sk); + cb->args[5] = (long)sk; + + return 1; + out: cb->args[2]++; - return err; + return 0; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) @@ -472,10 +486,18 @@ skip: * 2 : to record the transport pos of this time's traversal * 3 : to mark if we have dumped the ep info of the current asoc * 4 : to work as a temporary variable to traversal list + * 5 : to save the sk we get from travelsing the tsp list. */ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); + +next: + cb->args[5] = 0; + sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp); + + if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp)) + goto next; + done: cb->args[1] = cb->args[4]; cb->args[4] = 0; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8c77b87a8565..46ffecc57214 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -706,20 +706,6 @@ nodata: return retval; } -static void sctp_set_prsctp_policy(struct sctp_chunk *chunk, - const struct sctp_sndrcvinfo *sinfo) -{ - if (!chunk->asoc->prsctp_enable) - return; - - if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = - jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) || - SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = sinfo->sinfo_timetolive; -} - /* Make a DATA chunk for the given association from the provided * parameters. However, do not populate the data payload. */ @@ -753,7 +739,6 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); - sctp_set_prsctp_policy(retval, sinfo); nodata: return retval; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fc417a8b476..8ed2d99bde6d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4469,17 +4469,21 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *paddr, void *p) { struct sctp_transport *transport; - int err = 0; + int err = -ENOENT; rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); if (!transport || !sctp_transport_hold(transport)) goto out; - err = cb(transport, p); + + sctp_association_hold(transport->asoc); sctp_transport_put(transport); -out: rcu_read_unlock(); + err = cb(transport, p); + sctp_association_put(transport->asoc); + +out: return err; } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 17dbbe64cd73..8a398b3fb532 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -465,6 +465,8 @@ void vsock_pending_work(struct work_struct *work) if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); + + listener->sk_ack_backlog--; } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We @@ -475,8 +477,6 @@ void vsock_pending_work(struct work_struct *work) goto out; } - listener->sk_ack_backlog--; - /* We need to remove ourself from the global connected sockets list so * incoming packets can't find this socket, and to reduce the reference * count. @@ -2010,5 +2010,5 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.1.0-k"); +MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 42396a74405d..a68f03133df9 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -363,6 +363,7 @@ is_mcounted_section_name(char const *const txtname) strcmp(".sched.text", txtname) == 0 || strcmp(".spinlock.text", txtname) == 0 || strcmp(".irqentry.text", txtname) == 0 || + strcmp(".softirqentry.text", txtname) == 0 || strcmp(".kprobes.text", txtname) == 0 || strcmp(".text.unlikely", txtname) == 0; } diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 96e2486a6fc4..2d48011bc362 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -134,6 +134,7 @@ my %text_sections = ( ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, + ".softirqentry.text" => 1, ".kprobes.text" => 1, ".text.unlikely" => 1, ); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index dd48f421844c..f64c57bf1d4b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -603,7 +603,8 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; sprintf(t->label[i], "label%d", i); - t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS, + t->flush[i] = test_alloc(t, max(PAGE_SIZE, + sizeof(u64) * NUM_HINTS), &t->flush_dma[i]); if (!t->flush[i]) return -ENOMEM; |